class Ve::Parse::JapaneseTransliterators

Constants

HIRA_TO_LATN
H_SMALL_TSU
H_SYLLABIC_N
LATN_TO_HIRA

Attributes

text[R]
tokens[R]

Public Class Methods

new(text) click to toggle source
# File lib/providers/japanese_transliterators.rb, line 148
def initialize(text)
  @tokens = []
  @text = text
end

Public Instance Methods

transliterate_from_fullwidth_to_halfwidth() click to toggle source
# File lib/providers/japanese_transliterators.rb, line 262
def transliterate_from_fullwidth_to_halfwidth
  res = transpose_codepoints_in_range(@text, -65248, 65281..65374)
  transpose_codepoints_in_range(res, -12256, 12288..12288)
end
transliterate_from_halfwidth_to_fullwidth() click to toggle source
# File lib/providers/japanese_transliterators.rb, line 267
def transliterate_from_halfwidth_to_fullwidth
  res = transpose_codepoints_in_range(@text, 65248, 33..126)
  transpose_codepoints_in_range(res, 12256, 32..32)
end
transliterate_from_hira_to_kana() click to toggle source
# File lib/providers/japanese_transliterators.rb, line 258
def transliterate_from_hira_to_kana
  transpose_codepoints_in_range(@text, 96, 12353..12438)
end
transliterate_from_hira_to_latn() click to toggle source
# File lib/providers/japanese_transliterators.rb, line 158
def transliterate_from_hira_to_latn
  # Hepburn style romaji
  kana = @text.dup
  romaji = ''
  geminate = false

  while kana.length > 0
    [2, 1].each do |length|
      mora = ''
      for_conversion = kana[0, length]

      if for_conversion == H_SMALL_TSU
        geminate = true
        kana[0, length] = ''
        break
      elsif for_conversion == H_SYLLABIC_N && kana[1, 1].match(/[やゆよ]/)
        # Syllabic N before ya, yu or yo
        mora = "n'"
      elsif HIRA_TO_LATN[for_conversion]
        # Generic cases
        mora = HIRA_TO_LATN[for_conversion]
      end

      if mora.length > 0
        if geminate
          geminate = false
          romaji << mora[0, 1]
        end
        romaji << mora
        kana[0, length] = ''
        break
      elsif length == 1
        # Nothing found
        romaji << for_conversion
        kana[0, length] = ''
      end
    end
  end

  return romaji
end
transliterate_from_hrkt_to_latn() click to toggle source
# File lib/providers/japanese_transliterators.rb, line 153
def transliterate_from_hrkt_to_latn
  @text = transliterate_from_kana_to_hira
  transliterate_from_hira_to_latn
end
transliterate_from_kana_to_hira() click to toggle source
# File lib/providers/japanese_transliterators.rb, line 254
def transliterate_from_kana_to_hira
  transpose_codepoints_in_range(@text, -96, 12449..12534)
end
transliterate_from_latn_to_hrkt() click to toggle source
# File lib/providers/japanese_transliterators.rb, line 200
def transliterate_from_latn_to_hrkt
  romaji = @text.dup
  kana = ''

  romaji.gsub!(/m([BbPp])/, 'n\1')
  romaji.gsub!(/M([BbPp])/, 'N\1')

  while romaji.length > 0
    [3, 2, 1].each do |length|
      mora = ''
      for_removal = length
      for_conversion = romaji[0, length]
      is_upper = !!(for_conversion.match(/^\p{Upper}/))
      for_conversion.downcase!

      if for_conversion.match(/nn[aiueo]/)
        # nna should kanafy to んな instead of んあ
        # This is what people expect for words like konna, anna, zannen
        mora = H_SYLLABIC_N
        for_removal = 1
      elsif LATN_TO_HIRA[for_conversion]
        # Generic cases
        mora = LATN_TO_HIRA[for_conversion]
      elsif for_conversion == 'tch' || ( length == 2 && for_conversion.match(/([kgsztdnbpmyrlwchf])\1/))
        # tch and double-consonants for small tsu
        mora = H_SMALL_TSU
        for_removal = 1
      end

      if mora.length > 0
        if is_upper
          # Dance so we can call transliterate_from_hira_to_kana on internal data
          # TODO: Need a better way for this
          temp_text = @text
          @text = mora.dup
          kana << transliterate_from_hira_to_kana
          @text = temp_text
        else
          kana << mora
        end

        romaji[0, for_removal] = ''
        break
      elsif length == 1
        # Nothing found
        kana << for_conversion
        romaji[0, 1] = ''
      end
    end
  end

  return kana
end

Private Instance Methods

transpose_codepoints_in_range(text, distance, range) click to toggle source
# File lib/providers/japanese_transliterators.rb, line 274
def transpose_codepoints_in_range(text, distance, range)
  result = ''

  text.each_codepoint do |c|
    if c >= range.first and c <= range.last
      result << (c + distance).chr(Encoding::UTF_8)
    else
      result << c.chr(Encoding::UTF_8)
    end
  end

  return result
end