module HangulTools
Courtesy of algorithms described at: gernot-katzers-spice-pages.com/var/korean_hangul_unicode.html
Constants
- BLENDS
- LEADS
- TAILS
- VOWELS
Public Class Methods
decompose(text)
click to toggle source
it is assumed that `text` contains nothing but hangul codepoints
# File lib/hangul_tools.rb, line 22 def self.decompose(text) text.codepoints.map do |point| tail = (point - 44032) % 28 vowel = 1 + ((point - 44032 - tail) % 588) / 28 lead = 1 + (point - 44032) / 588 [lead, vowel, tail] end end
matrices()
click to toggle source
# File lib/hangul_tools.rb, line 71 def self.matrices @matrices ||= {}.tap do |hash| raw = File.read(__FILE__).lines split_at = raw.index("__END__\n") key = lines = nil raw[(split_at+1)..-1].each do |line| if line =~ /^(\w+):$/ hash[key.to_sym] = parse_matrix(lines) if lines key = $1 lines = [] elsif line !~ /^$/ lines << line end end hash[key.to_sym] = parse_matrix(lines) if lines end end
parse_matrix(lines)
click to toggle source
# File lib/hangul_tools.rb, line 91 def self.parse_matrix(lines) lead = lines.first.split(/\s+/)[1..-1].map do |v| if v == '_' nil elsif v == 'final' :final else v end end matrix = {} lines[1..-1].each do |line| tail, *sounds = line.split(/\s+/) if tail == 'initial' tail = :initial elsif tail == 'voiced' tail = :voiced elsif tail == '_' tail = nil end sounds.map! { |s| s == '_' ? nil : s } matrix[tail] = Hash[lead.zip(sounds)] end matrix end
romanize(text, system: :revised, initial: :initial)
click to toggle source
# File lib/hangul_tools.rb, line 5 def self.romanize(text, system: :revised, initial: :initial) matrix = matrices[system] vowels = VOWELS[system] text.scan(/[\uAC00-\uD7a3]+|[^\uAC00-\uD7a3]+/).map.with_index do |string, idx| if string =~ /[\uAC00-\uD7a3]/ romanize_with_system(string, system, idx > 0 ? :voiced : initial) else string end end.join end
romanize_with_system(text, system, voiced)
click to toggle source
# File lib/hangul_tools.rb, line 32 def self.romanize_with_system(text, system, voiced) matrix = matrices[system] vowels = VOWELS[system] blends = BLENDS[system] syllables = decompose(text) phonemes = [] syllables.each.with_index do |(lead, vowel, tail), idx| prior = (idx > 0) ? TAILS[syllables[idx-1][2].to_i] : voiced final = syllables[idx+1] ? false : true phonemes << (matrix[prior] || {})[LEADS[lead]] phonemes << vowels[vowel] if final phonemes << (matrix[TAILS[tail]] || {})[:final] end end result = phonemes.compact.join blends.each do |pattern, blend| result = result.gsub(pattern, blend) end result end