MultibyteCharsExtrasTest

The default Multibyte Chars proxy has more features than the normal string implementation. Tests for the implementation of these features should run on all Ruby versions and shouldn't be tested through the proxy methods.

Methods

T

test_capitalize_should_be_unicode_aware,
test_class_is_not_forwarded,
test_composition_exclusion_is_set_up_properly,
test_downcase_should_be_unicode_aware,
test_limit_should_keep_under_the_specified_byte_limit,
test_limit_should_not_break_on_blank_strings,
test_limit_should_work_on_a_multibyte_string,
test_limit_should_work_on_an_ascii_string,
test_normalization_C_pri_29,
test_normalization_shouldnt_strip_null_bytes,
test_should_compute_grapheme_length,
test_simple_normalization,
test_swapcase_should_be_unicode_aware,
test_tidy_bytes_should_forcibly_tidy_bytes_if_specified,
test_tidy_bytes_should_tidy_bytes,
test_titleize_should_be_unicode_aware,
test_titleize_should_not_affect_characters_that_do_not_case_fold,
test_upcase_should_be_unicode_aware

Included Modules

MultibyteTestHelpers

Instance Public methods

test_capitalize_should_be_unicode_aware() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 499
def test_capitalize_should_be_unicode_aware
  { 'аБвг аБвг' => 'Абвг абвг',
    'аБвг АБВГ' => 'Абвг абвг',
    'АБВГ АБВГ' => 'Абвг абвг',
    '' => '' }.each do |f,t|
      assert_equal t, chars(f).capitalize
  end
end

test_class_is_not_forwarded() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 679
def test_class_is_not_forwarded
  assert_equal BYTE_STRING.dup.mb_chars.class, ActiveSupport::Multibyte::Chars
end

test_composition_exclusion_is_set_up_properly() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 554
def test_composition_exclusion_is_set_up_properly
  # Normalization of DEVANAGARI LETTER QA breaks when composition exclusion isn't used correctly
  qa = [0x915, 0x93c].pack('U*')
  assert_equal qa, chars(qa).normalize(:c)
end

test_downcase_should_be_unicode_aware() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 489
def test_downcase_should_be_unicode_aware
  assert_equal "абвгд\0f", chars("аБвгд\0F").downcase
  assert_equal 'こにちわ', chars('こにちわ').downcase
end

test_limit_should_keep_under_the_specified_byte_limit() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 547
def test_limit_should_keep_under_the_specified_byte_limit
  example = chars(UNICODE_STRING)
  (1..UNICODE_STRING.length).each do |limit|
    assert example.limit(limit).to_s.length <= limit
  end
end

test_limit_should_not_break_on_blank_strings() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 517
def test_limit_should_not_break_on_blank_strings
  example = chars('')
  assert_equal example, example.limit(0)
  assert_equal example, example.limit(1)
end

test_limit_should_work_on_a_multibyte_string() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 523
def test_limit_should_work_on_a_multibyte_string
  example = chars(UNICODE_STRING)
  bytesize = UNICODE_STRING.bytesize

  assert_equal UNICODE_STRING, example.limit(bytesize)
  assert_equal '', example.limit(0)
  assert_equal '', example.limit(1)
  assert_equal 'こ', example.limit(3)
  assert_equal 'こに', example.limit(6)
  assert_equal 'こに', example.limit(8)
  assert_equal 'こにち', example.limit(9)
  assert_equal 'こにちわ', example.limit(50)
end

test_limit_should_work_on_an_ascii_string() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 537
def test_limit_should_work_on_an_ascii_string
  ascii = chars(ASCII_STRING)
  assert_equal ASCII_STRING, ascii.limit(ASCII_STRING.length)
  assert_equal '', ascii.limit(0)
  assert_equal 'o', ascii.limit(1)
  assert_equal 'oh', ascii.limit(2)
  assert_equal 'ohay', ascii.limit(4)
  assert_equal 'ohayo', ascii.limit(50)
end

test_normalization_C_pri_29() Link

Test for the Public Review Issue #29, bad explanation of composition might lead to a bad implementation: www.unicode.org/review/pr-29.html

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 562
def test_normalization_C_pri_29
  [
    [0x0B47, 0x0300, 0x0B3E],
    [0x1100, 0x0300, 0x1161]
  ].map { |c| c.pack('U*') }.each do |c|
    assert_equal_codepoints c, chars(c).normalize(:c)
  end
end

test_normalization_shouldnt_strip_null_bytes() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 571
def test_normalization_shouldnt_strip_null_bytes
  null_byte_str = "Test\0test"

  assert_equal null_byte_str, chars(null_byte_str).normalize(:kc)
  assert_equal null_byte_str, chars(null_byte_str).normalize(:c)
  assert_equal null_byte_str, chars(null_byte_str).normalize(:d)
  assert_equal null_byte_str, chars(null_byte_str).normalize(:kd)
  assert_equal null_byte_str, chars(null_byte_str).decompose
  assert_equal null_byte_str, chars(null_byte_str).compose
end

test_should_compute_grapheme_length() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 597
def test_should_compute_grapheme_length
  [
    ['', 0],
    ['abc', 3],
    ['こにちわ', 4],
    [[0x0924, 0x094D, 0x0930].pack('U*'), 2],
    [%w(cr lf), 1],
    [%w(l l), 1],
    [%w(l v), 1],
    [%w(l lv), 1],
    [%w(l lvt), 1],
    [%w(lv v), 1],
    [%w(lv t), 1],
    [%w(v v), 1],
    [%w(v t), 1],
    [%w(lvt t), 1],
    [%w(t t), 1],
    [%w(n extend), 1],
    [%w(n n), 2],
    [%w(n cr lf n), 3],
    [%w(n l v t), 2]
  ].each do |input, expected_length|
    if input.kind_of?(Array)
      str = string_from_classes(input)
    else
      str = input
    end
    assert_equal expected_length, chars(str).grapheme_length
  end
end

test_simple_normalization() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 582
def test_simple_normalization
  comp_str = [
    44,  # LATIN CAPITAL LETTER D
    307, # COMBINING DOT ABOVE
    328, # COMBINING OGONEK
    323 # COMBINING DOT BELOW
  ].pack("U*")

  assert_equal_codepoints '', chars('').normalize
  assert_equal_codepoints [44,105,106,328,323].pack("U*"), chars(comp_str).normalize(:kc).to_s
  assert_equal_codepoints [44,307,328,323].pack("U*"), chars(comp_str).normalize(:c).to_s
  assert_equal_codepoints [44,307,110,780,78,769].pack("U*"), chars(comp_str).normalize(:d).to_s
  assert_equal_codepoints [44,105,106,110,780,78,769].pack("U*"), chars(comp_str).normalize(:kd).to_s
end

test_swapcase_should_be_unicode_aware() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 494
def test_swapcase_should_be_unicode_aware
  assert_equal "аaéÜ\0f", chars("АAÉü\0F").swapcase
  assert_equal 'こにちわ', chars('こにちわ').swapcase
end

test_tidy_bytes_should_forcibly_tidy_bytes_if_specified() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 672
def test_tidy_bytes_should_forcibly_tidy_bytes_if_specified
  byte_string = "\xF0\xA5\xA4\xA4" # valid as both CP-1252 and UTF-8, but with different interpretations.
  assert_not_equal "ð¥¤¤", chars(byte_string).tidy_bytes
  # Forcible conversion to UTF-8
  assert_equal "ð¥¤¤", chars(byte_string).tidy_bytes(true)
end

test_tidy_bytes_should_tidy_bytes() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 628
def test_tidy_bytes_should_tidy_bytes

  single_byte_cases = {
    "\x21" => "!",   # Valid ASCII byte, low
    "\x41" => "A",   # Valid ASCII byte, mid
    "\x7E" => "~",   # Valid ASCII byte, high
    "\x80" => "€",   # Continuation byte, low (cp125)
    "\x94" => "”",   # Continuation byte, mid (cp125)
    "\x9F" => "Ÿ",   # Continuation byte, high (cp125)
    "\xC0" => "À",   # Overlong encoding, start of 2-byte sequence, but codepoint < 128
    "\xC1" => "Á",   # Overlong encoding, start of 2-byte sequence, but codepoint < 128
    "\xC2" => "Â",   # Start of 2-byte sequence, low
    "\xC8" => "È",   # Start of 2-byte sequence, mid
    "\xDF" => "ß",   # Start of 2-byte sequence, high
    "\xE0" => "à",   # Start of 3-byte sequence, low
    "\xE8" => "è",   # Start of 3-byte sequence, mid
    "\xEF" => "ï",   # Start of 3-byte sequence, high
    "\xF0" => "ð",   # Start of 4-byte sequence
    "\xF1" => "ñ",   # Unused byte
    "\xFF" => "ÿ",   # Restricted byte
    "\x00" => "\x00" # null char
  }

  single_byte_cases.each do |bad, good|
    assert_equal good, chars(bad).tidy_bytes.to_s
    assert_equal "#{good}#{good}", chars("#{bad}#{bad}").tidy_bytes
    assert_equal "#{good}#{good}#{good}", chars("#{bad}#{bad}#{bad}").tidy_bytes
    assert_equal "#{good}a", chars("#{bad}a").tidy_bytes
    assert_equal "#{good}á", chars("#{bad}á").tidy_bytes
    assert_equal "a#{good}a", chars("a#{bad}a").tidy_bytes
    assert_equal "á#{good}á", chars("á#{bad}á").tidy_bytes
    assert_equal "a#{good}", chars("a#{bad}").tidy_bytes
    assert_equal "á#{good}", chars("á#{bad}").tidy_bytes
  end

  byte_string = "\270\236\010\210\245"
  tidy_string = [0xb8, 0x17e, 0x8, 0x2c6, 0xa5].pack('U*')
  assert_equal_codepoints tidy_string, chars(byte_string).tidy_bytes
  assert_nothing_raised { chars(byte_string).tidy_bytes.to_s.unpack('U*') }

  # UTF-8 leading byte followed by too few continuation bytes
  assert_equal_codepoints "\xc3\xb0\xc2\xa5\xc2\xa4\x21", chars("\xf0\xa5\xa4\x21").tidy_bytes
end

test_titleize_should_be_unicode_aware() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 508
def test_titleize_should_be_unicode_aware
  assert_equal "Él Que Se Enteró", chars("ÉL QUE SE ENTERÓ").titleize
  assert_equal "Абвг Абвг", chars("аБвг аБвг").titleize
end

test_titleize_should_not_affect_characters_that_do_not_case_fold() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 513
def test_titleize_should_not_affect_characters_that_do_not_case_fold
  assert_equal "日本語", chars("日本語").titleize
end

test_upcase_should_be_unicode_aware() Link

Source: show | on GitHub

# File activesupport/test/multibyte_chars_test.rb, line 484
def test_upcase_should_be_unicode_aware
  assert_equal "АБВГД\0F", chars("аБвгд\0f").upcase
  assert_equal 'こにちわ', chars('こにちわ').upcase
end

Class MultibyteCharsExtrasTest < ActiveSupport::TestCase