Usage:

$ wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt
$ ruby case-folding.rb CaseFolding.txt -o casefold.h
Namespace
Methods
D
L
R
Included Modules
Attributes
[R] fold
[R] fold_locale
[R] unfold
[R] unfold_locale
Class Public methods
load(*args)
# File enc/unicode/case-folding.rb, line 170
def self.load(*args)
  new.load(*args)
end
Instance Public methods
display(dest)
# File enc/unicode/case-folding.rb, line 140
def display(dest)
  # print the header
  dest.print("/* DO NOT EDIT THIS FILE. */\n")
  dest.print("/* Generated by enc/unicode/case-folding.rb */\n\n")

  # print folding data

  # CaseFold + CaseFold_Locale
  name = "CaseFold_11"
  data = print_table(dest, name, "CaseFold"=>fold, "CaseFold_Locale"=>fold_locale)
  dest.print lookup_hash(name, "CodePointList3", data)

  # print unfolding data

  # CaseUnfold_11 + CaseUnfold_11_Locale
  name = "CaseUnfold_11"
  data = print_table(dest, name, name=>unfold[0], "#{name}_Locale"=>unfold_locale[0])
  dest.print lookup_hash(name, "CodePointList3", data)

  # CaseUnfold_12 + CaseUnfold_12_Locale
  name = "CaseUnfold_12"
  data = print_table(dest, name, name=>unfold[1], "#{name}_Locale"=>unfold_locale[1])
  dest.print lookup_hash(name, "CodePointList2", data)

  # CaseUnfold_13
  name = "CaseUnfold_13"
  data = print_table(dest, name, name=>unfold[2])
  dest.print lookup_hash(name, "CodePointList2", data)
end
load(filename)
# File enc/unicode/case-folding.rb, line 40
def load(filename)
  pattern = /([0-9A-F]{4,6}); ([CFT]); ([0-9A-F]{4,6})(?: ([0-9A-F]{4,6}))?(?: ([0-9A-F]{4,6}))?;/

  @fold = fold = {}
  @unfold = unfold = [{}, {}, {}]
  turkic = []

  IO.foreach(filename) do |line|
    next unless res = pattern.match(line)
    ch_from = res[1].to_i(16)

    if res[2] == 'T'
      # Turkic case folding
      turkic << ch_from
      next
    end

    # store folding data
    ch_to = res[3..6].inject([]) do |a, i|
      break a unless i
      a << i.to_i(16)
    end
    fold[ch_from] = ch_to

    # store unfolding data
    i = ch_to.length - 1
    (unfold[i][ch_to] ||= []) << ch_from
  end

  # move locale dependent data to (un)fold_locale
  @fold_locale = fold_locale = {}
  @unfold_locale = unfold_locale = [{}, {}]
  for ch_from in turkic
    key = fold[ch_from]
    i = key.length - 1
    unfold_locale[i][i == 0 ? key[0] : key] = unfold[i].delete(key)
    fold_locale[ch_from] = fold.delete(ch_from)
  end
  self
end
lookup_hash(key, type, data)
# File enc/unicode/case-folding.rb, line 85
def lookup_hash(key, type, data)
  hash = "onigenc_unicode_#{key}_hash"
  lookup = "onigenc_unicode_#{key}_lookup"
  arity = Array(data[0][0]).size
  gperf = %W"gperf -7 -k#{[*1..(arity*3)].join(",")} -F,-1 -c -j1 -i1 -t -T -E -C -H #{hash} -N #{lookup} -n"
  argname = arity > 1 ? "codes" : "code"
  argdecl = "const OnigCodePoint #{arity > 1 ? "*": ""}#{argname}"
  n = 7
  m = (1 << n) - 1
  min, max = data.map {|c, *|c}.flatten.minmax
  src = IO.popen(gperf, "r+") {|f|
    f << "short\n%%\n"
    data.each_with_index {|(k, _), i|
      k = Array(k)
      ks = k.map {|j| [(j >> n*2) & m, (j >> n) & m, (j) & m]}.flatten.map {|c| "\\x%.2x" % c}.join("")
      f.printf "\"%s\", ::::/*%s*/ %d\n", ks, k.map {|c| "0x%.4x" % c}.join(","), i
    }
    f << "%%\n"
    f.close_write
    f.read
  }
  src.sub!(/^(#{hash})\s*\(.*?\).*?\n\{\n(.*)^\}/m) {
    name = $1
    body = $2
    body.gsub!(/\(unsigned char\)str\[(\d+)\]/, "bits_#{arity > 1 ? 'at' : 'of'}(#{argname}, \\1)")
    "#{name}(#{argdecl})\n{\n#{body}}"
  }
  src.sub!(/const short *\*\n^(#{lookup})\s*\(.*?\).*?\n\{\n(.*)^\}/m) {
    name = $1
    body = $2
    body.sub!(/\benum\s+\{(\n[ \t]+)/, "\\&MIN_CODE_VALUE = 0x#{min.to_s(16)},\\1""MAX_CODE_VALUE = 0x#{max.to_s(16)},\\1")
    body.gsub!(/(#{hash})\s*\(.*?\)/, "\\1(#{argname})")
    body.gsub!(/\{"",-1}/, "-1")
    body.gsub!(/\{"(?:[^"]|\")+", *::::(.*)\}/, '\1')
    body.sub!(/(\s+if\s)\(len\b.*\)/) do
      "#$1(" <<
        (arity > 1 ? (0...arity).map {|i| range_check("#{argname}[#{i}]")}.join(" &&\n      ") : range_check(argname)) <<
        ")"
    end
    v = nil
    body.sub!(/(if\s*\(.*MAX_HASH_VALUE.*\)\n([ \t]*))\{(.*?)\n\2\}/m) {
      pre = $1
      indent = $2
      s = $3
      s.sub!(/const char *\* *(\w+)( *= *wordlist\[\w+\]).\w+/, 'short \1 = wordlist[key]')
      v = $1
      s.sub!(/\bif *\(.*\)/, "if (#{v} >= 0 && code#{arity}_equal(#{argname}, #{key}_Table[#{v}].from))")
      "#{pre}{#{s}\n#{indent}}"
    }
    body.sub!(/\b(return\s+&)([^;]+);/, '\1'"#{key}_Table[#{v}].to;")
    "static const #{type} *\n#{name}(#{argdecl})\n{\n#{body}}"
  }
  src
end
range_check(code)
# File enc/unicode/case-folding.rb, line 81
def range_check(code)
  "#{code} <= MAX_CODE_VALUE && #{code} >= MIN_CODE_VALUE"
end