class Unihan2
Constants
- DATA_DIR
Public Class Methods
new()
click to toggle source
# File lib/unihan2.rb, line 6 def initialize read_strokes read_version end
Public Instance Methods
strokes(char)
click to toggle source
return total strokes of the character char @param char [String] the character @return [Integer] the total strokes
# File lib/unihan2.rb, line 15 def strokes(char) @strokes[char] end
ver(code)
click to toggle source
return unicode version of specific character @param code [String] character or codepoing @return [Float] unicode version
# File lib/unihan2.rb, line 22 def ver(code) return nil if code.nil? if code.is_a? Integer i = code elsif code.size == 1 i = code.codepoints.first else i = code.hex end ver_bsearch(i, 0, @vers.size-1) end
Private Instance Methods
read_strokes()
click to toggle source
# File lib/unihan2.rb, line 38 def read_strokes fn = File.join(DATA_DIR, 'Unihan_DictionaryLikeData.txt') @strokes = {} File.foreach(fn) do |line| next if line.start_with? '#' line.chomp! cells = line.split("\t") if cells[1] == 'kTotalStrokes' c = [cells[0].sub(/^U\+(.*)$/, '\1').hex].pack('U') i = cells[2].to_i @strokes[c] = i end end end
read_version()
click to toggle source
# File lib/unihan2.rb, line 53 def read_version @vers = [] fn = File.join(DATA_DIR, 'unicode-chars-ver.csv') CSV.foreach(fn, headers: true) do |row| @vers << { range: (row['cp1'].hex..row['cp2'].hex), age: row['age'].to_f } end end
ver_bsearch(code, start, stop)
click to toggle source
# File lib/unihan2.rb, line 64 def ver_bsearch(code, start, stop) return nil if start > stop middle = (stop - start) / 2 + start h = @vers[middle] if h[:range].include?(code) return h[:age] elsif middle == start return nil if code < h[:range].begin return ver_bsearch(code, middle+1, stop) else if code < h[:range].begin return ver_bsearch(code, start, middle) else return ver_bsearch(code, middle, stop) end end end