class MultibyteCharsExtrasTest
The default Multibyte Chars proxy has more features than the normal string implementation. Tests for the implementation of these features should run on all Ruby versions and shouldn't be tested through the proxy methods.
Public Instance Methods
test_capitalize_should_be_unicode_aware()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 513 def test_capitalize_should_be_unicode_aware { "аБвг аБвг" => "Абвг абвг", "аБвг АБВГ" => "Абвг абвг", "АБВГ АБВГ" => "Абвг абвг", "" => "" }.each do |f, t| assert_equal t, chars(f).capitalize end end
test_class_is_not_forwarded()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 718 def test_class_is_not_forwarded assert_equal BYTE_STRING.dup.mb_chars.class, ActiveSupport::Multibyte::Chars end
test_composition_exclusion_is_set_up_properly()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 568 def test_composition_exclusion_is_set_up_properly # Normalization of DEVANAGARI LETTER QA breaks when composition exclusion isn't used correctly qa = [0x915, 0x93c].pack("U*") assert_equal qa, chars(qa).normalize(:c) end
test_downcase_should_be_unicode_aware()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 503 def test_downcase_should_be_unicode_aware assert_equal "абвгд\0f", chars("аБвгд\0F").downcase assert_equal "こにちわ", chars("こにちわ").downcase end
test_limit_should_keep_under_the_specified_byte_limit()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 561 def test_limit_should_keep_under_the_specified_byte_limit example = chars(UNICODE_STRING) (1..UNICODE_STRING.length).each do |limit| assert example.limit(limit).to_s.length <= limit end end
test_limit_should_not_break_on_blank_strings()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 531 def test_limit_should_not_break_on_blank_strings example = chars("") assert_equal example, example.limit(0) assert_equal example, example.limit(1) end
test_limit_should_work_on_a_multibyte_string()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 537 def test_limit_should_work_on_a_multibyte_string example = chars(UNICODE_STRING) bytesize = UNICODE_STRING.bytesize assert_equal UNICODE_STRING, example.limit(bytesize) assert_equal "", example.limit(0) assert_equal "", example.limit(1) assert_equal "こ", example.limit(3) assert_equal "こに", example.limit(6) assert_equal "こに", example.limit(8) assert_equal "こにち", example.limit(9) assert_equal "こにちわ", example.limit(50) end
test_limit_should_work_on_an_ascii_string()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 551 def test_limit_should_work_on_an_ascii_string ascii = chars(ASCII_STRING) assert_equal ASCII_STRING, ascii.limit(ASCII_STRING.length) assert_equal "", ascii.limit(0) assert_equal "o", ascii.limit(1) assert_equal "oh", ascii.limit(2) assert_equal "ohay", ascii.limit(4) assert_equal "ohayo", ascii.limit(50) end
test_normalization_C_pri_29()
click to toggle source
Test for the Public Review Issue #29, bad explanation of composition might lead to a bad implementation: www.unicode.org/review/pr-29.html
# File activesupport/test/multibyte_chars_test.rb, line 576 def test_normalization_C_pri_29 [ [0x0B47, 0x0300, 0x0B3E], [0x1100, 0x0300, 0x1161] ].map { |c| c.pack("U*") }.each do |c| assert_equal_codepoints c, chars(c).normalize(:c) end end
test_normalization_shouldnt_strip_null_bytes()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 585 def test_normalization_shouldnt_strip_null_bytes null_byte_str = "Test\0test" assert_equal null_byte_str, chars(null_byte_str).normalize(:kc) assert_equal null_byte_str, chars(null_byte_str).normalize(:c) assert_equal null_byte_str, chars(null_byte_str).normalize(:d) assert_equal null_byte_str, chars(null_byte_str).normalize(:kd) assert_equal null_byte_str, chars(null_byte_str).decompose assert_equal null_byte_str, chars(null_byte_str).compose end
test_should_compute_grapheme_length()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 611 def test_should_compute_grapheme_length [ ["", 0], ["abc", 3], ["こにちわ", 4], [[0x0924, 0x094D, 0x0930].pack("U*"), 2], # GB3 [%w(cr lf), 1], # GB4 [%w(cr n), 2], [%w(lf n), 2], [%w(control n), 2], [%w(cr extend), 2], [%w(lf extend), 2], [%w(control extend), 2], # GB 5 [%w(n cr), 2], [%w(n lf), 2], [%w(n control), 2], [%w(extend cr), 2], [%w(extend lf), 2], [%w(extend control), 2], # GB 6 [%w(l l), 1], [%w(l v), 1], [%w(l lv), 1], [%w(l lvt), 1], # GB7 [%w(lv v), 1], [%w(lv t), 1], [%w(v v), 1], [%w(v t), 1], # GB8 [%w(lvt t), 1], [%w(t t), 1], # GB8a [%w(r r), 1], # GB9 [%w(n extend), 1], # GB9a [%w(n spacingmark), 1], # GB10 [%w(n n), 2], # Other [%w(n cr lf n), 3], [%w(n l v t), 2], [%w(cr extend n), 3], ].each do |input, expected_length| if input.kind_of?(Array) str = string_from_classes(input) else str = input end assert_equal expected_length, chars(str).grapheme_length, input.inspect end end
test_simple_normalization()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 596 def test_simple_normalization comp_str = [ 44, # LATIN CAPITAL LETTER D 307, # COMBINING DOT ABOVE 328, # COMBINING OGONEK 323 # COMBINING DOT BELOW ].pack("U*") assert_equal_codepoints "", chars("").normalize assert_equal_codepoints [44, 105, 106, 328, 323].pack("U*"), chars(comp_str).normalize(:kc).to_s assert_equal_codepoints [44, 307, 328, 323].pack("U*"), chars(comp_str).normalize(:c).to_s assert_equal_codepoints [44, 307, 110, 780, 78, 769].pack("U*"), chars(comp_str).normalize(:d).to_s assert_equal_codepoints [44, 105, 106, 110, 780, 78, 769].pack("U*"), chars(comp_str).normalize(:kd).to_s end
test_swapcase_should_be_unicode_aware()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 508 def test_swapcase_should_be_unicode_aware assert_equal "аaéÜ\0f", chars("АAÉü\0F").swapcase assert_equal "こにちわ", chars("こにちわ").swapcase end
test_tidy_bytes_should_forcibly_tidy_bytes_if_specified()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 711 def test_tidy_bytes_should_forcibly_tidy_bytes_if_specified byte_string = "\xF0\xA5\xA4\xA4" # valid as both CP-1252 and UTF-8, but with different interpretations. assert_not_equal "𥤤", chars(byte_string).tidy_bytes # Forcible conversion to UTF-8 assert_equal "𥤤", chars(byte_string).tidy_bytes(true) end
test_tidy_bytes_should_tidy_bytes()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 668 def test_tidy_bytes_should_tidy_bytes single_byte_cases = { "\x21" => "!", # Valid ASCII byte, low "\x41" => "A", # Valid ASCII byte, mid "\x7E" => "~", # Valid ASCII byte, high "\x80" => "€", # Continuation byte, low (cp125) "\x94" => "”", # Continuation byte, mid (cp125) "\x9F" => "Ÿ", # Continuation byte, high (cp125) "\xC0" => "À", # Overlong encoding, start of 2-byte sequence, but codepoint < 128 "\xC1" => "Á", # Overlong encoding, start of 2-byte sequence, but codepoint < 128 "\xC2" => "Â", # Start of 2-byte sequence, low "\xC8" => "È", # Start of 2-byte sequence, mid "\xDF" => "ß", # Start of 2-byte sequence, high "\xE0" => "à", # Start of 3-byte sequence, low "\xE8" => "è", # Start of 3-byte sequence, mid "\xEF" => "ï", # Start of 3-byte sequence, high "\xF0" => "ð", # Start of 4-byte sequence "\xF1" => "ñ", # Unused byte "\xFF" => "ÿ", # Restricted byte "\x00" => "\x00" # null char } single_byte_cases.each do |bad, good| assert_equal good, chars(bad).tidy_bytes.to_s assert_equal "#{good}#{good}", chars("#{bad}#{bad}").tidy_bytes assert_equal "#{good}#{good}#{good}", chars("#{bad}#{bad}#{bad}").tidy_bytes assert_equal "#{good}a", chars("#{bad}a").tidy_bytes assert_equal "#{good}á", chars("#{bad}á").tidy_bytes assert_equal "a#{good}a", chars("a#{bad}a").tidy_bytes assert_equal "á#{good}á", chars("á#{bad}á").tidy_bytes assert_equal "a#{good}", chars("a#{bad}").tidy_bytes assert_equal "á#{good}", chars("á#{bad}").tidy_bytes end byte_string = "\270\236\010\210\245" tidy_string = [0xb8, 0x17e, 0x8, 0x2c6, 0xa5].pack("U*") assert_equal_codepoints tidy_string, chars(byte_string).tidy_bytes assert_nothing_raised { chars(byte_string).tidy_bytes.to_s.unpack("U*") } # UTF-8 leading byte followed by too few continuation bytes assert_equal_codepoints "\xc3\xb0\xc2\xa5\xc2\xa4\x21", chars("\xf0\xa5\xa4\x21").tidy_bytes end
test_titleize_should_be_unicode_aware()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 522 def test_titleize_should_be_unicode_aware assert_equal "Él Que Se Enteró", chars("ÉL QUE SE ENTERÓ").titleize assert_equal "Абвг Абвг", chars("аБвг аБвг").titleize end
test_titleize_should_not_affect_characters_that_do_not_case_fold()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 527 def test_titleize_should_not_affect_characters_that_do_not_case_fold assert_equal "日本語", chars("日本語").titleize end
test_upcase_should_be_unicode_aware()
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 498 def test_upcase_should_be_unicode_aware assert_equal "АБВГД\0F", chars("аБвгд\0f").upcase assert_equal "こにちわ", chars("こにちわ").upcase end
Private Instance Methods
string_from_classes(classes)
click to toggle source
# File activesupport/test/multibyte_chars_test.rb, line 724 def string_from_classes(classes) # Characters from the character classes as described in UAX #29 character_from_class = { l: 0x1100, v: 0x1160, t: 0x11A8, lv: 0xAC00, lvt: 0xAC01, cr: 0x000D, lf: 0x000A, extend: 0x094D, n: 0x64, spacingmark: 0x0903, r: 0x1F1E6, control: 0x0001 } classes.collect do |k| character_from_class[k.intern] end.pack("U*") end