class ThaiLang::Royin

Ported from github.com/PyThaiNLP/pythainlp/blob/dev/pythainlp/transliterate/royin.py (be1265d)

Constants

CONSONANTS

พยัญชนะ ต้น สะกด

VOWELS
WORDCUT

Public Instance Methods

normalize(word) click to toggle source
# File lib/thailang4r/roman.rb, line 117
def normalize(word)
  word.gsub(/จน์|มณ์|ณฑ์|ทร์|ตร์|[ก-ฮ]์|[ก-ฮ]ะ-ู์|[ฯๆ่-๏๚๛]/, "")
end
replace_consonants(word, consonants) click to toggle source
# File lib/thailang4r/roman.rb, line 126
def replace_consonants(word, consonants)
  return word unless consonants
  return word.gsub(consonants[0], CONSONANTS[consonants[0]][0]) if consonants.length == 1
  consonants.reduce({rom: "", th: word}) do |w, consonant|
    non_thai = w[:th].match(/^[^ก-์]+/)
    if non_thai
      w[:rom] += non_thai.to_s
      w[:th] = w[:th][non_thai.to_s.length..-1]
    end
    if w[:skip]
      {rom: w[:rom], th: w[:th]}
    elsif w[:rom] == "" and w[:th] == "ห"
      {rom: "", th: w[:th][1..-1]}
    elsif w[:rom] == ""
      {rom: CONSONANTS[consonant][0], th: w[:th][consonant.length..-1]}
    elsif consonant == "ร" and w[:th] == "รร"
      {rom: w[:rom] + "an", th: w[:th][2..-1], skip: true}
    elsif consonant == "ร" and w[:th][0..1] == "รร"
      {rom: w[:rom] + "a", th: w[:th][2..-1], skip: true}
    else
      {rom: w[:rom] + CONSONANTS[consonant][1], th: w[:th][consonant.length..-1]}
    end
  end[:rom]
end
replace_vowel(word) click to toggle source
# File lib/thailang4r/roman.rb, line 121
def replace_vowel(word)
  VOWELS.each { word.gsub!(_1, _2) }
  return word
end
romanize(text, delim = "") click to toggle source
# File lib/thailang4r/roman.rb, line 165
def romanize(text, delim = "")
  WORDCUT.break_into_words(text).map { romanize_word _1 }.join(delim)
end
romanize_word(word) click to toggle source
# File lib/thailang4r/roman.rb, line 151
def romanize_word(word)
  word = replace_vowel(normalize(word))
  consonants = word.scan(/[ก-ฮ]/)
  if word.length == 2 and consonants.length == 2
    word = word.chars
    word.insert(1, "o")
    word = word.join("")      
  end
  word = replace_consonants(word, consonants)
  return word
end