class Unihan2

Constants

DATA_DIR

Public Class Methods

new() click to toggle source
# File lib/unihan2.rb, line 6
def initialize
  read_strokes
  read_version
end

Public Instance Methods

strokes(char) click to toggle source

return total strokes of the character char @param char [String] the character @return [Integer] the total strokes

# File lib/unihan2.rb, line 15
def strokes(char)
  @strokes[char]
end
ver(code) click to toggle source

return unicode version of specific character @param code [String] character or codepoing @return [Float] unicode version

# File lib/unihan2.rb, line 22
def ver(code)
  return nil if code.nil?

  if code.is_a? Integer
    i = code
  elsif code.size == 1
    i = code.codepoints.first
  else
    i = code.hex
  end

  ver_bsearch(i, 0, @vers.size-1)
end

Private Instance Methods

read_strokes() click to toggle source
# File lib/unihan2.rb, line 38
def read_strokes
  fn = File.join(DATA_DIR, 'Unihan_DictionaryLikeData.txt')
  @strokes = {}
  File.foreach(fn) do |line|
    next if line.start_with? '#'
    line.chomp!
    cells = line.split("\t")
    if cells[1] == 'kTotalStrokes'
      c = [cells[0].sub(/^U\+(.*)$/, '\1').hex].pack('U')
      i = cells[2].to_i
      @strokes[c] = i
    end
  end
end
read_version() click to toggle source
# File lib/unihan2.rb, line 53
def read_version
  @vers = []
  fn = File.join(DATA_DIR, 'unicode-chars-ver.csv')
  CSV.foreach(fn, headers: true) do |row|
    @vers << {
      range: (row['cp1'].hex..row['cp2'].hex),
      age: row['age'].to_f
    }
  end
end
ver_bsearch(code, start, stop) click to toggle source
# File lib/unihan2.rb, line 64
def ver_bsearch(code, start, stop)
  return nil if start > stop
  middle = (stop - start) / 2 + start
  h = @vers[middle]
  if h[:range].include?(code)
    return h[:age]
  elsif middle == start
    return nil if code < h[:range].begin
    return ver_bsearch(code, middle+1, stop)
  else
    if code < h[:range].begin
      return ver_bsearch(code, start, middle)
    else
      return ver_bsearch(code, middle, stop)
    end
  end
end