class Ve::Parse::JapaneseTransliterators
Constants
- HIRA_TO_LATN
- H_SMALL_TSU
- H_SYLLABIC_N
- LATN_TO_HIRA
Attributes
text[R]
tokens[R]
Public Class Methods
new(text)
click to toggle source
# File lib/providers/japanese_transliterators.rb, line 148 def initialize(text) @tokens = [] @text = text end
Public Instance Methods
transliterate_from_fullwidth_to_halfwidth()
click to toggle source
# File lib/providers/japanese_transliterators.rb, line 262 def transliterate_from_fullwidth_to_halfwidth res = transpose_codepoints_in_range(@text, -65248, 65281..65374) transpose_codepoints_in_range(res, -12256, 12288..12288) end
transliterate_from_halfwidth_to_fullwidth()
click to toggle source
# File lib/providers/japanese_transliterators.rb, line 267 def transliterate_from_halfwidth_to_fullwidth res = transpose_codepoints_in_range(@text, 65248, 33..126) transpose_codepoints_in_range(res, 12256, 32..32) end
transliterate_from_hira_to_kana()
click to toggle source
# File lib/providers/japanese_transliterators.rb, line 258 def transliterate_from_hira_to_kana transpose_codepoints_in_range(@text, 96, 12353..12438) end
transliterate_from_hira_to_latn()
click to toggle source
# File lib/providers/japanese_transliterators.rb, line 158 def transliterate_from_hira_to_latn # Hepburn style romaji kana = @text.dup romaji = '' geminate = false while kana.length > 0 [2, 1].each do |length| mora = '' for_conversion = kana[0, length] if for_conversion == H_SMALL_TSU geminate = true kana[0, length] = '' break elsif for_conversion == H_SYLLABIC_N && kana[1, 1].match(/[やゆよ]/) # Syllabic N before ya, yu or yo mora = "n'" elsif HIRA_TO_LATN[for_conversion] # Generic cases mora = HIRA_TO_LATN[for_conversion] end if mora.length > 0 if geminate geminate = false romaji << mora[0, 1] end romaji << mora kana[0, length] = '' break elsif length == 1 # Nothing found romaji << for_conversion kana[0, length] = '' end end end return romaji end
transliterate_from_hrkt_to_latn()
click to toggle source
# File lib/providers/japanese_transliterators.rb, line 153 def transliterate_from_hrkt_to_latn @text = transliterate_from_kana_to_hira transliterate_from_hira_to_latn end
transliterate_from_kana_to_hira()
click to toggle source
# File lib/providers/japanese_transliterators.rb, line 254 def transliterate_from_kana_to_hira transpose_codepoints_in_range(@text, -96, 12449..12534) end
transliterate_from_latn_to_hrkt()
click to toggle source
# File lib/providers/japanese_transliterators.rb, line 200 def transliterate_from_latn_to_hrkt romaji = @text.dup kana = '' romaji.gsub!(/m([BbPp])/, 'n\1') romaji.gsub!(/M([BbPp])/, 'N\1') while romaji.length > 0 [3, 2, 1].each do |length| mora = '' for_removal = length for_conversion = romaji[0, length] is_upper = !!(for_conversion.match(/^\p{Upper}/)) for_conversion.downcase! if for_conversion.match(/nn[aiueo]/) # nna should kanafy to んな instead of んあ # This is what people expect for words like konna, anna, zannen mora = H_SYLLABIC_N for_removal = 1 elsif LATN_TO_HIRA[for_conversion] # Generic cases mora = LATN_TO_HIRA[for_conversion] elsif for_conversion == 'tch' || ( length == 2 && for_conversion.match(/([kgsztdnbpmyrlwchf])\1/)) # tch and double-consonants for small tsu mora = H_SMALL_TSU for_removal = 1 end if mora.length > 0 if is_upper # Dance so we can call transliterate_from_hira_to_kana on internal data # TODO: Need a better way for this temp_text = @text @text = mora.dup kana << transliterate_from_hira_to_kana @text = temp_text else kana << mora end romaji[0, for_removal] = '' break elsif length == 1 # Nothing found kana << for_conversion romaji[0, 1] = '' end end end return kana end
Private Instance Methods
transpose_codepoints_in_range(text, distance, range)
click to toggle source
# File lib/providers/japanese_transliterators.rb, line 274 def transpose_codepoints_in_range(text, distance, range) result = '' text.each_codepoint do |c| if c >= range.first and c <= range.last result << (c + distance).chr(Encoding::UTF_8) else result << c.chr(Encoding::UTF_8) end end return result end