source: trunk/server/common/patches/rubygem-activesupport-2.3.x-CVE-2009-3009.patch @ 1329

Last change on this file since 1329 was 1329, checked in by mitchb, 15 years ago
Scriptsify activesupport and actionpack ruby gems, patch for CVE-2009-3009
File size: 11.3 KB
RevLine 
[1329]1diff --git a/activesupport/lib/active_support/multibyte.rb b/activesupport/lib/active_support/multibyte.rb
2index 65a96af..b6354ee 100644
3--- a/activesupport/lib/active_support/multibyte.rb
4+++ b/activesupport/lib/active_support/multibyte.rb
5@@ -1,9 +1,5 @@
6 # encoding: utf-8
7 
8-require 'active_support/multibyte/chars'
9-require 'active_support/multibyte/exceptions'
10-require 'active_support/multibyte/unicode_database'
11-
12 module ActiveSupport #:nodoc:
13   module Multibyte
14     # A list of all available normalization forms. See http://www.unicode.org/reports/tr15/tr15-29.html for more
15@@ -27,7 +23,35 @@ module ActiveSupport #:nodoc:
16     #
17     # Example:
18     #   ActiveSupport::Multibyte.proxy_class = CharsForUTF32
19-    mattr_accessor :proxy_class
20-    self.proxy_class = ActiveSupport::Multibyte::Chars
21+    def self.proxy_class=(klass)
22+      @proxy_class = klass
23+    end
24+
25+    # Returns the currect proxy class
26+    def self.proxy_class
27+      @proxy_class ||= ActiveSupport::Multibyte::Chars
28+    end
29+
30+    # Regular expressions that describe valid byte sequences for a character
31+    VALID_CHARACTER = {
32+      # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
33+      'UTF-8' => /\A(?:
34+                  [\x00-\x7f]                                         |
35+                  [\xc2-\xdf] [\x80-\xbf]                             |
36+                  \xe0        [\xa0-\xbf] [\x80-\xbf]                 |
37+                  [\xe1-\xef] [\x80-\xbf] [\x80-\xbf]                 |
38+                  \xf0        [\x90-\xbf] [\x80-\xbf] [\x80-\xbf]     |
39+                  [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf]     |
40+                  \xf4        [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn,
41+      # Quick check for valid Shift-JIS characters, disregards the odd-even pairing
42+      'Shift_JIS' => /\A(?:
43+                  [\x00-\x7e \xa1-\xdf]                                     |
44+                  [\x81-\x9f \xe0-\xef] [\x40-\x7e \x80-\x9e \x9f-\xfc])\z /xn
45+    }
46   end
47 end
48+
49+require 'active_support/multibyte/chars'
50+require 'active_support/multibyte/exceptions'
51+require 'active_support/multibyte/unicode_database'
52+require 'active_support/multibyte/utils'
53diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb
54index 3d392d2..16bc130 100644
55--- a/activesupport/lib/active_support/multibyte/chars.rb
56+++ b/activesupport/lib/active_support/multibyte/chars.rb
57@@ -73,16 +73,7 @@ module ActiveSupport #:nodoc:
58       UNICODE_TRAILERS_PAT = /(#{codepoints_to_pattern(UNICODE_LEADERS_AND_TRAILERS)})+\Z/
59       UNICODE_LEADERS_PAT = /\A(#{codepoints_to_pattern(UNICODE_LEADERS_AND_TRAILERS)})+/
60 
61-      # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
62-      UTF8_PAT = /\A(?:
63-                     [\x00-\x7f]                                     |
64-                     [\xc2-\xdf] [\x80-\xbf]                         |
65-                     \xe0        [\xa0-\xbf] [\x80-\xbf]             |
66-                     [\xe1-\xef] [\x80-\xbf] [\x80-\xbf]             |
67-                     \xf0        [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
68-                     [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
69-                     \xf4        [\x80-\x8f] [\x80-\xbf] [\x80-\xbf]
70-                    )*\z/xn
71+      UTF8_PAT = ActiveSupport::Multibyte::VALID_CHARACTER['UTF-8']
72 
73       attr_reader :wrapped_string
74       alias to_s wrapped_string
75@@ -307,23 +298,23 @@ module ActiveSupport #:nodoc:
76       def rstrip
77         chars(@wrapped_string.gsub(UNICODE_TRAILERS_PAT, ''))
78       end
79-     
80+
81       # Strips entire range of Unicode whitespace from the left of the string.
82       def lstrip
83         chars(@wrapped_string.gsub(UNICODE_LEADERS_PAT, ''))
84       end
85-     
86+
87       # Strips entire range of Unicode whitespace from the right and left of the string.
88       def strip
89         rstrip.lstrip
90       end
91-     
92+
93       # Returns the number of codepoints in the string
94       def size
95         self.class.u_unpack(@wrapped_string).size
96       end
97       alias_method :length, :size
98-     
99+
100       # Reverses all characters in the string.
101       #
102       # Example:
103@@ -331,7 +322,7 @@ module ActiveSupport #:nodoc:
104       def reverse
105         chars(self.class.u_unpack(@wrapped_string).reverse.pack('U*'))
106       end
107-     
108+
109       # Implements Unicode-aware slice with codepoints. Slicing on one point returns the codepoints for that
110       # character.
111       #
112@@ -646,7 +637,7 @@ module ActiveSupport #:nodoc:
113           string.split(//u).map do |c|
114             c.force_encoding(Encoding::ASCII) if c.respond_to?(:force_encoding)
115 
116-            if !UTF8_PAT.match(c)
117+            if !ActiveSupport::Multibyte::VALID_CHARACTER['UTF-8'].match(c)
118               n = c.unpack('C')[0]
119               n < 128 ? n.chr :
120               n < 160 ? [UCD.cp1252[n] || n].pack('U') :
121diff --git a/activesupport/lib/active_support/multibyte/utils.rb b/activesupport/lib/active_support/multibyte/utils.rb
122new file mode 100644
123index 0000000..acef84d
124--- /dev/null
125+++ b/activesupport/lib/active_support/multibyte/utils.rb
126@@ -0,0 +1,61 @@
127+# encoding: utf-8
128+
129+module ActiveSupport #:nodoc:
130+  module Multibyte #:nodoc:
131+    if Kernel.const_defined?(:Encoding)
132+      # Returns a regular expression that matches valid characters in the current encoding
133+      def self.valid_character
134+        VALID_CHARACTER[Encoding.default_internal.to_s]
135+      end
136+    else
137+      def self.valid_character
138+        case $KCODE
139+        when 'UTF8'
140+          VALID_CHARACTER['UTF-8']
141+        when 'SJIS'
142+          VALID_CHARACTER['Shift_JIS']
143+        end
144+      end
145+    end
146+
147+    if 'string'.respond_to?(:valid_encoding?)
148+      # Verifies the encoding of a string
149+      def self.verify(string)
150+        string.valid_encoding?
151+      end
152+    else
153+      def self.verify(string)
154+        if expression = valid_character
155+          for c in string.split(//)
156+            return false unless valid_character.match(c)
157+          end
158+        end
159+        true
160+      end
161+    end
162+
163+    # Verifies the encoding of the string and raises an exception when it's not valid
164+    def self.verify!(string)
165+      raise EncodingError.new("Found characters with invalid encoding") unless verify(string)
166+    end
167+
168+    if 'string'.respond_to?(:force_encoding)
169+      # Removes all invalid characters from the string.
170+      #
171+      # Note: this method is a no-op in Ruby 1.9
172+      def self.clean(string)
173+        string
174+      end
175+    else
176+      def self.clean(string)
177+        if expression = valid_character
178+          stripped = []; for c in string.split(//)
179+            stripped << c if valid_character.match(c)
180+          end; stripped.join
181+        else
182+          string
183+        end
184+      end
185+    end
186+  end
187+end
188\ No newline at end of file
189diff --git a/activesupport/test/multibyte_utils_test.rb b/activesupport/test/multibyte_utils_test.rb
190new file mode 100644
191index 0000000..d8ac5ff
192--- /dev/null
193+++ b/activesupport/test/multibyte_utils_test.rb
194@@ -0,0 +1,141 @@
195+# encoding: utf-8
196+
197+require 'abstract_unit'
198+require 'multibyte_test_helpers'
199+
200+class MultibyteUtilsTest < ActiveSupport::TestCase
201+  include MultibyteTestHelpers
202+
203+  test "valid_character returns an expression for the current encoding" do
204+    with_encoding('None') do
205+      assert_nil ActiveSupport::Multibyte.valid_character
206+    end
207+    with_encoding('UTF8') do
208+      assert_equal ActiveSupport::Multibyte::VALID_CHARACTER['UTF-8'], ActiveSupport::Multibyte.valid_character
209+    end
210+    with_encoding('SJIS') do
211+      assert_equal ActiveSupport::Multibyte::VALID_CHARACTER['Shift_JIS'], ActiveSupport::Multibyte.valid_character
212+    end
213+  end
214+
215+  test "verify verifies ASCII strings are properly encoded" do
216+    with_encoding('None') do
217+      examples.each do |example|
218+        assert ActiveSupport::Multibyte.verify(example)
219+      end
220+    end
221+  end
222+
223+  test "verify verifies UTF-8 strings are properly encoded" do
224+    with_encoding('UTF8') do
225+      assert ActiveSupport::Multibyte.verify(example('valid UTF-8'))
226+      assert !ActiveSupport::Multibyte.verify(example('invalid UTF-8'))
227+    end
228+  end
229+
230+  test "verify verifies Shift-JIS strings are properly encoded" do
231+    with_encoding('SJIS') do
232+      assert ActiveSupport::Multibyte.verify(example('valid Shift-JIS'))
233+      assert !ActiveSupport::Multibyte.verify(example('invalid Shift-JIS'))
234+    end
235+  end
236+
237+  test "verify! raises an exception when it finds an invalid character" do
238+    with_encoding('UTF8') do
239+      assert_raises(ActiveSupport::Multibyte::EncodingError) do
240+        ActiveSupport::Multibyte.verify!(example('invalid UTF-8'))
241+      end
242+    end
243+  end
244+
245+  test "verify! doesn't raise an exception when the encoding is valid" do
246+    with_encoding('UTF8') do
247+      assert_nothing_raised do
248+        ActiveSupport::Multibyte.verify!(example('valid UTF-8'))
249+      end
250+    end
251+  end
252+
253+  if RUBY_VERSION < '1.9'
254+    test "clean leaves ASCII strings intact" do
255+      with_encoding('None') do
256+        [
257+          'word', "\270\236\010\210\245"
258+        ].each do |string|
259+          assert_equal string, ActiveSupport::Multibyte.clean(string)
260+        end
261+      end
262+    end
263+
264+    test "clean cleans invalid characters from UTF-8 encoded strings" do
265+      with_encoding('UTF8') do
266+        cleaned_utf8 = [8].pack('C*')
267+        assert_equal example('valid UTF-8'), ActiveSupport::Multibyte.clean(example('valid UTF-8'))
268+        assert_equal cleaned_utf8, ActiveSupport::Multibyte.clean(example('invalid UTF-8'))
269+      end
270+    end
271+
272+    test "clean cleans invalid characters from Shift-JIS encoded strings" do
273+      with_encoding('SJIS') do
274+        cleaned_sjis = [184, 0, 136, 165].pack('C*')
275+        assert_equal example('valid Shift-JIS'), ActiveSupport::Multibyte.clean(example('valid Shift-JIS'))
276+        assert_equal cleaned_sjis, ActiveSupport::Multibyte.clean(example('invalid Shift-JIS'))
277+      end
278+    end
279+  else
280+    test "clean is a no-op" do
281+      with_encoding('UTF8') do
282+        assert_equal example('invalid Shift-JIS'), ActiveSupport::Multibyte.clean(example('invalid Shift-JIS'))
283+      end
284+    end
285+  end
286+
287+  private
288+
289+  STRINGS = {
290+    'valid ASCII'       => [65, 83, 67, 73, 73].pack('C*'),
291+    'invalid ASCII'     => [128].pack('C*'),
292+    'valid UTF-8'       => [227, 129, 147, 227, 129, 171, 227, 129, 161, 227, 130, 143].pack('C*'),
293+    'invalid UTF-8'     => [184, 158, 8, 136, 165].pack('C*'),
294+    'valid Shift-JIS'   => [131, 122, 129, 91, 131, 128].pack('C*'),
295+    'invalid Shift-JIS' => [184, 158, 8, 0, 255, 136, 165].pack('C*')
296+  }
297+
298+  if Kernel.const_defined?(:Encoding)
299+    def example(key)
300+      STRINGS[key].force_encoding(Encoding.default_internal)
301+    end
302+
303+    def examples
304+      STRINGS.values.map { |s| s.force_encoding(Encoding.default_internal) }
305+    end
306+  else
307+    def example(key)
308+      STRINGS[key]
309+    end
310+
311+    def examples
312+      STRINGS.values
313+    end
314+  end
315+
316+  if 'string'.respond_to?(:encoding)
317+    def with_encoding(enc)
318+      before = Encoding.default_internal
319+
320+      case enc
321+      when 'UTF8'
322+        Encoding.default_internal = Encoding::UTF_8
323+      when 'SJIS'
324+        Encoding.default_internal = Encoding::Shift_JIS
325+      else
326+        Encoding.default_internal = Encoding::BINARY
327+      end
328+      yield
329+
330+      Encoding.default_internal = before
331+    end
332+  else
333+    alias with_encoding with_kcode
334+  end
335+end
336\ No newline at end of file
337
Note: See TracBrowser for help on using the repository browser.