MultibyteCharsExtrasTest

The default Multibyte Chars proxy has more features than the normal string implementation. Tests for the implementation of these features should run on all Ruby versions and shouldn't be tested through the proxy methods.

Methods

T

test_capitalize_should_be_unicode_aware,
test_composition_exclusion_is_set_up_properly,
test_downcase_should_be_unicode_aware,
test_limit_should_keep_under_the_specified_byte_limit,
test_limit_should_not_break_on_blank_strings,
test_limit_should_work_on_a_multibyte_string,
test_limit_should_work_on_an_ascii_string,
test_normalization_C_pri_29,
test_normalization_shouldnt_strip_null_bytes,
test_should_compute_grapheme_length,
test_simple_normalization,
test_tidy_bytes_should_forcibly_tidy_bytes_if_specified,
test_tidy_bytes_should_tidy_bytes,
test_titleize_should_be_unicode_aware,
test_titleize_should_not_affect_characters_that_do_not_case_fold,
test_upcase_should_be_unicode_aware

Included Modules

MultibyteTestHelpers

Instance Public methods

test_capitalize_should_be_unicode_aware() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 491
def test_capitalize_should_be_unicode_aware
  { 'аБвг аБвг' => 'Абвг абвг',
    'аБвг АБВГ' => 'Абвг абвг',
    'АБВГ АБВГ' => 'Абвг абвг',
    '' => '' }.each do |f,t|
      assert_equal t, chars(f).capitalize
  end
end

test_composition_exclusion_is_set_up_properly() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 546
def test_composition_exclusion_is_set_up_properly
  # Normalization of DEVANAGARI LETTER QA breaks when composition exclusion isn't used correctly
  qa = [0x915, 0x93c].pack('U*')
  assert_equal qa, chars(qa).normalize(:c)
end

test_downcase_should_be_unicode_aware() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 486
def test_downcase_should_be_unicode_aware
  assert_equal "абвгд\0f", chars("аБвгд\0f").downcase
  assert_equal 'こにちわ', chars('こにちわ').downcase
end

test_limit_should_keep_under_the_specified_byte_limit() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 539
def test_limit_should_keep_under_the_specified_byte_limit
  example = chars(UNICODE_STRING)
  (1..UNICODE_STRING.length).each do |limit|
    assert example.limit(limit).to_s.length <= limit
  end
end

test_limit_should_not_break_on_blank_strings() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 509
def test_limit_should_not_break_on_blank_strings
  example = chars('')
  assert_equal example, example.limit(0)
  assert_equal example, example.limit(1)
end

test_limit_should_work_on_a_multibyte_string() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 515
def test_limit_should_work_on_a_multibyte_string
  example = chars(UNICODE_STRING)
  bytesize = UNICODE_STRING.respond_to?(:bytesize) ? UNICODE_STRING.bytesize : UNICODE_STRING.size

  assert_equal UNICODE_STRING, example.limit(bytesize)
  assert_equal '', example.limit(0)
  assert_equal '', example.limit(1)
  assert_equal 'こ', example.limit(3)
  assert_equal 'こに', example.limit(6)
  assert_equal 'こに', example.limit(8)
  assert_equal 'こにち', example.limit(9)
  assert_equal 'こにちわ', example.limit(50)
end

test_limit_should_work_on_an_ascii_string() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 529
def test_limit_should_work_on_an_ascii_string
  ascii = chars(ASCII_STRING)
  assert_equal ASCII_STRING, ascii.limit(ASCII_STRING.length)
  assert_equal '', ascii.limit(0)
  assert_equal 'o', ascii.limit(1)
  assert_equal 'oh', ascii.limit(2)
  assert_equal 'ohay', ascii.limit(4)
  assert_equal 'ohayo', ascii.limit(50)
end

test_normalization_C_pri_29() Link

Test for the Public Review Issue #29, bad explanation of composition might lead to a bad implementation: www.unicode.org/review/pr-29.html

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 554
def test_normalization_C_pri_29
  [
    [0x0B47, 0x0300, 0x0B3E],
    [0x1100, 0x0300, 0x1161]
  ].map { |c| c.pack('U*') }.each do |c|
    assert_equal_codepoints c, chars(c).normalize(:c)
  end
end

test_normalization_shouldnt_strip_null_bytes() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 563
def test_normalization_shouldnt_strip_null_bytes
  null_byte_str = "Test\0test"

  assert_equal null_byte_str, chars(null_byte_str).normalize(:kc)
  assert_equal null_byte_str, chars(null_byte_str).normalize(:c)
  assert_equal null_byte_str, chars(null_byte_str).normalize(:d)
  assert_equal null_byte_str, chars(null_byte_str).normalize(:kd)
  assert_equal null_byte_str, chars(null_byte_str).decompose
  assert_equal null_byte_str, chars(null_byte_str).compose
end

test_should_compute_grapheme_length() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 589
def test_should_compute_grapheme_length
  [
    ['', 0],
    ['abc', 3],
    ['こにちわ', 4],
    [[0x0924, 0x094D, 0x0930].pack('U*'), 2],
    [%w(cr lf), 1],
    [%w(l l), 1],
    [%w(l v), 1],
    [%w(l lv), 1],
    [%w(l lvt), 1],
    [%w(lv v), 1],
    [%w(lv t), 1],
    [%w(v v), 1],
    [%w(v t), 1],
    [%w(lvt t), 1],
    [%w(t t), 1],
    [%w(n extend), 1],
    [%w(n n), 2],
    [%w(n cr lf n), 3],
    [%w(n l v t), 2]
  ].each do |input, expected_length|
    if input.kind_of?(Array)
      str = string_from_classes(input)
    else
      str = input
    end
    assert_equal expected_length, chars(str).g_length
  end
end

test_simple_normalization() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 574
def test_simple_normalization
  comp_str = [
    44,  # LATIN CAPITAL LETTER D
    307, # COMBINING DOT ABOVE
    328, # COMBINING OGONEK
    323 # COMBINING DOT BELOW
  ].pack("U*")

  assert_equal_codepoints '', chars('').normalize
  assert_equal_codepoints [44,105,106,328,323].pack("U*"), chars(comp_str).normalize(:kc).to_s
  assert_equal_codepoints [44,307,328,323].pack("U*"), chars(comp_str).normalize(:c).to_s
  assert_equal_codepoints [44,307,110,780,78,769].pack("U*"), chars(comp_str).normalize(:d).to_s
  assert_equal_codepoints [44,105,106,110,780,78,769].pack("U*"), chars(comp_str).normalize(:kd).to_s
end

test_tidy_bytes_should_forcibly_tidy_bytes_if_specified() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 664
def test_tidy_bytes_should_forcibly_tidy_bytes_if_specified
  byte_string = "\xF0\xA5\xA4\xA4" # valid as both CP-1252 and UTF-8, but with different interpretations.
  assert_not_equal "ð¥¤¤", chars(byte_string).tidy_bytes
  # Forcible conversion to UTF-8
  assert_equal "ð¥¤¤", chars(byte_string).tidy_bytes(true)
end

test_tidy_bytes_should_tidy_bytes() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 620
def test_tidy_bytes_should_tidy_bytes

  single_byte_cases = {
    "\x21" => "!",   # Valid ASCII byte, low
    "\x41" => "A",   # Valid ASCII byte, mid
    "\x7E" => "~",   # Valid ASCII byte, high
    "\x80" => "€",   # Continuation byte, low (cp125)
    "\x94" => "”",   # Continuation byte, mid (cp125)
    "\x9F" => "Ÿ",   # Continuation byte, high (cp125)
    "\xC0" => "À",   # Overlong encoding, start of 2-byte sequence, but codepoint < 128
    "\xC1" => "Á",   # Overlong encoding, start of 2-byte sequence, but codepoint < 128
    "\xC2" => "Â",   # Start of 2-byte sequence, low
    "\xC8" => "È",   # Start of 2-byte sequence, mid
    "\xDF" => "ß",   # Start of 2-byte sequence, high
    "\xE0" => "à",   # Start of 3-byte sequence, low
    "\xE8" => "è",   # Start of 3-byte sequence, mid
    "\xEF" => "ï",   # Start of 3-byte sequence, high
    "\xF0" => "ð",   # Start of 4-byte sequence
    "\xF1" => "ñ",   # Unused byte
    "\xFF" => "ÿ",   # Restricted byte
    "\x00" => "\x00" # null char
  }

  single_byte_cases.each do |bad, good|
    assert_equal good, chars(bad).tidy_bytes.to_s
    assert_equal "#{good}#{good}", chars("#{bad}#{bad}").tidy_bytes
    assert_equal "#{good}#{good}#{good}", chars("#{bad}#{bad}#{bad}").tidy_bytes
    assert_equal "#{good}a", chars("#{bad}a").tidy_bytes
    assert_equal "#{good}á", chars("#{bad}á").tidy_bytes
    assert_equal "a#{good}a", chars("a#{bad}a").tidy_bytes
    assert_equal "á#{good}á", chars("á#{bad}á").tidy_bytes
    assert_equal "a#{good}", chars("a#{bad}").tidy_bytes
    assert_equal "á#{good}", chars("á#{bad}").tidy_bytes
  end

  byte_string = "\270\236\010\210\245"
  tidy_string = [0xb8, 0x17e, 0x8, 0x2c6, 0xa5].pack('U*')
  assert_equal_codepoints tidy_string, chars(byte_string).tidy_bytes
  assert_nothing_raised { chars(byte_string).tidy_bytes.to_s.unpack('U*') }

  # UTF-8 leading byte followed by too few continuation bytes
  assert_equal_codepoints "\xc3\xb0\xc2\xa5\xc2\xa4\x21", chars("\xf0\xa5\xa4\x21").tidy_bytes
end

test_titleize_should_be_unicode_aware() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 500
def test_titleize_should_be_unicode_aware
  assert_equal "Él Que Se Enteró", chars("ÉL QUE SE ENTERÓ").titleize
  assert_equal "Абвг Абвг", chars("аБвг аБвг").titleize
end

test_titleize_should_not_affect_characters_that_do_not_case_fold() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 505
def test_titleize_should_not_affect_characters_that_do_not_case_fold
  assert_equal "日本語", chars("日本語").titleize
end

test_upcase_should_be_unicode_aware() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 481
def test_upcase_should_be_unicode_aware
  assert_equal "АБВГД\0F", chars("аБвгд\0f").upcase
  assert_equal 'こにちわ', chars('こにちわ').upcase
end

Class MultibyteCharsExtrasTest < Test::Unit::TestCase