class ThaiLang::Royin
Ported from github.com/PyThaiNLP/pythainlp/blob/dev/pythainlp/transliterate/royin.py (be1265d)
Constants
- CONSONANTS
พยัญชนะ ต้น สะกด
- VOWELS
- WORDCUT
Public Instance Methods
normalize(word)
click to toggle source
# File lib/thailang4r/roman.rb, line 117 def normalize(word) word.gsub(/จน์|มณ์|ณฑ์|ทร์|ตร์|[ก-ฮ]์|[ก-ฮ]ะ-ู์|[ฯๆ่-๏๚๛]/, "") end
replace_consonants(word, consonants)
click to toggle source
# File lib/thailang4r/roman.rb, line 126 def replace_consonants(word, consonants) return word unless consonants return word.gsub(consonants[0], CONSONANTS[consonants[0]][0]) if consonants.length == 1 consonants.reduce({rom: "", th: word}) do |w, consonant| non_thai = w[:th].match(/^[^ก-์]+/) if non_thai w[:rom] += non_thai.to_s w[:th] = w[:th][non_thai.to_s.length..-1] end if w[:skip] {rom: w[:rom], th: w[:th]} elsif w[:rom] == "" and w[:th] == "ห" {rom: "", th: w[:th][1..-1]} elsif w[:rom] == "" {rom: CONSONANTS[consonant][0], th: w[:th][consonant.length..-1]} elsif consonant == "ร" and w[:th] == "รร" {rom: w[:rom] + "an", th: w[:th][2..-1], skip: true} elsif consonant == "ร" and w[:th][0..1] == "รร" {rom: w[:rom] + "a", th: w[:th][2..-1], skip: true} else {rom: w[:rom] + CONSONANTS[consonant][1], th: w[:th][consonant.length..-1]} end end[:rom] end
replace_vowel(word)
click to toggle source
# File lib/thailang4r/roman.rb, line 121 def replace_vowel(word) VOWELS.each { word.gsub!(_1, _2) } return word end
romanize(text, delim = "")
click to toggle source
# File lib/thailang4r/roman.rb, line 165 def romanize(text, delim = "") WORDCUT.break_into_words(text).map { romanize_word _1 }.join(delim) end
romanize_word(word)
click to toggle source
# File lib/thailang4r/roman.rb, line 151 def romanize_word(word) word = replace_vowel(normalize(word)) consonants = word.scan(/[ก-ฮ]/) if word.length == 2 and consonants.length == 2 word = word.chars word.insert(1, "o") word = word.join("") end word = replace_consonants(word, consonants) return word end