module Mojinizer

Constants

KANA_TO_ROM
KANA_TO_ROM2
ROM_TO_KATA1

1 character romaji patterns

ROM_TO_KATA2

2 character romaji patterns

ROM_TO_KATA3

3 character romaji patterns

VERSION

Public Instance Methods

ascii_zenkaku?() click to toggle source
# File lib/mojinizer/detection.rb, line 27
def ascii_zenkaku?
  moji_type?(Moji::ZEN_ALNUM | Moji::ZEN_ASYMBOL)
end
contains_ascii_zenkaku?() click to toggle source
# File lib/mojinizer/detection.rb, line 64
def contains_ascii_zenkaku?
  contains_moji_type?(Moji::ZEN_ALNUM | Moji::ZEN_ASYMBOL)
end
contains_hankaku?() click to toggle source
# File lib/mojinizer/detection.rb, line 56
def contains_hankaku?
  contains_moji_type?(Moji::HAN_KATA | Moji::HAN_JSYMBOL)
end
contains_hiragana?() click to toggle source
# File lib/mojinizer/detection.rb, line 40
def contains_hiragana?
  contains_moji_type?(Moji::HIRA)
end
contains_japanese?() click to toggle source
# File lib/mojinizer/detection.rb, line 68
def contains_japanese?
  contains_moji_type?(Moji::ZEN | Moji::JSYMBOL | Moji::HAN_KATA)
end
contains_kana?() click to toggle source
# File lib/mojinizer/detection.rb, line 44
def contains_kana?
  contains_moji_type?(Moji::KANA)
end
contains_kanji?() click to toggle source
# File lib/mojinizer/detection.rb, line 52
def contains_kanji?
  contains_moji_type?(Moji::KANJI)
end
contains_katakana?() click to toggle source
# File lib/mojinizer/detection.rb, line 48
def contains_katakana?
  contains_moji_type?(Moji::KATA)
end
contains_moji_type?(type) click to toggle source
# File lib/mojinizer/detection.rb, line 72
def contains_moji_type?(type)
  self.each_char { |c| return true if Moji.type?(c, type) }
  return false
end
contains_zenkaku?() click to toggle source
# File lib/mojinizer/detection.rb, line 60
def contains_zenkaku?
  contains_moji_type?(Moji::ZEN)
end
han_to_zen() click to toggle source
# File lib/mojinizer/conversion.rb, line 76
def han_to_zen
  Moji.han_to_zen(self)
end
hankaku?() click to toggle source
# File lib/mojinizer/detection.rb, line 19
def hankaku?
  moji_type?(Moji::HAN_KATA | Moji::HAN_JSYMBOL)
end
hira_to_kata() click to toggle source
# File lib/mojinizer/conversion.rb, line 68
def hira_to_kata
  Moji.hira_to_kata(self)
end
hiragana() click to toggle source
# File lib/mojinizer/conversion.rb, line 60
def hiragana
  self.roma_to_kata.kata_to_hira
end
hiragana?() click to toggle source
# File lib/mojinizer/detection.rb, line 3
def hiragana?
  moji_type?(Moji::HIRA)
end
japanese?() click to toggle source
# File lib/mojinizer/detection.rb, line 31
def japanese?
  moji_type?(Moji::ZEN | Moji::JSYMBOL | Moji::HAN_KATA)
end
kana?() click to toggle source
# File lib/mojinizer/detection.rb, line 11
def kana?
  return (hiragana? || katakana?)
end
kanji?() click to toggle source
# File lib/mojinizer/detection.rb, line 15
def kanji?
  moji_type?(Moji::KANJI)
end
kata_to_hira() click to toggle source
# File lib/mojinizer/conversion.rb, line 72
def kata_to_hira
  Moji.kata_to_hira(self)
end
katakana() click to toggle source
# File lib/mojinizer/conversion.rb, line 64
def katakana
  self.hira_to_kata.roma_to_kata
end
katakana?() click to toggle source
# File lib/mojinizer/detection.rb, line 7
def katakana?
  moji_type?(Moji::KATA)
end
moji_type?(type) click to toggle source
# File lib/mojinizer/detection.rb, line 35
def moji_type?(type)
  self.each_char { |c| return false unless Moji.type?(c, type) }
  return true
end
normalize_zen_han() click to toggle source
# File lib/mojinizer/conversion.rb, line 84
def normalize_zen_han
  Moji.normalize_zen_han(self)
end
roma_to_kata() click to toggle source
# File lib/mojinizer/conversion.rb, line 88
def roma_to_kata

  result=""
  word_buffer=[]
  chars=self.each_char.collect{|c| c}
  loop do
    case word_buffer.size
      ##### When 0 characters in the buffer
    when 0 then
      if chars.size > 0
        word_buffer.push(chars.shift)
      else
        return result
      end
      ##### Patterns with 1 roman character
    when 1 then
      if word_buffer[0] =~ /[aiueo-]/
        result += ROM_TO_KATA1[word_buffer[0]]
        word_buffer = [] # a-->ア
      elsif word_buffer[0] =~ /[xkcgszjtdnhbpvfmyrlw']/
        if chars.size > 0
          word_buffer.push(chars.shift)
        else
          return result + (word_buffer[0].gsub(/n/,"ン"))
        end
      else
        result += word_buffer.shift
      end
      ##### Patterns with 2 roman characters
    when 2 then
      if ROM_TO_KATA2.key?(word_buffer.join)
        result += ROM_TO_KATA2[word_buffer.join]
        word_buffer = []
      elsif word_buffer.join =~ /([kgszjtcdnhbpmrl]y)|([stcd]h)|ts|(x[wytk])/ # goto 3
        if chars.size > 0
          # Consume next letter from source array
          word_buffer.push(chars.shift)
        else
          return result + (word_buffer.join.gsub(/n/,"ン"))
        end
      elsif word_buffer.join == "n'"
        result += "ン"
        word_buffer.shift(2) # n'--> ン
      elsif word_buffer[0] == "n"
        result += "ン"
        word_buffer.shift # nk-->ンk
      elsif word_buffer[0] == word_buffer[1]
        result += "ッ"
        word_buffer.shift # kk-->ッk
      else
        result += word_buffer.shift;
      end
      ##### Patterns with 3 roman characters
    when 3 then
      if ROM_TO_KATA3.key?(word_buffer.join)
        result += ROM_TO_KATA3[word_buffer.join]
        word_buffer=[]
      elsif word_buffer[0] == "n"
        result += "ン"
        word_buffer.shift
      else
        result += word_buffer.shift
      end
    end
  end
end
romaji() click to toggle source
# File lib/mojinizer/conversion.rb, line 4
def romaji
  s=""
  self.each_char do |c|
    if (KANA_TO_ROM.key?(c))
      s += KANA_TO_ROM[c]
    else
      s += c
    end
  end

  s=s.gsub(/(k)([aiueo])(")/,'g\2').gsub(/(s)([aiueo])(")/,'z\2').gsub(/(t)([aiueo])(")/,'d\2')
  s=s.gsub(/(h)([aiueo])(")/,'b\2').gsub(/([fh])([aiueo])(')/,'p\2').gsub(/u"/,'vu') # [半]濁点゛゜
  #---------------------------------------------------------
  s=s.gsub(/\s(xtsu)?\s/,'xtsu')                            # Remove spaces before/after hanging 'っ'
  #---------------------------------------------------------
  sw=s;
  while nil!=sw.gsub!(/(xtsu)([ckgszjtdhfbpmyrwnv])/,'\2\2') do; s=sw; end # ッカ-->xtsuka-->kka
  #---------------------------------------------------------
  # Compound Phoneme Pattern Rollbacks
  # NB: Uses regex backrefs like "\1y\3" where \1 = 1st capture grp, y='y' and \3 = 3rd capture grp
  #---------------------------------------------------------
  s=s.gsub(/( +x)(.*)/,'x\2')                               # Avoid hanging chisaii moji due to leading spaces
  s=s.gsub(/(ch)(ixy)([aueo])/,'\1\3')                      # チョ-->chixyo-->cho
  s=s.gsub(/([kgszjtdnhfbpmr])(ixy)([auo])/,'\1y\3')        # キャ-->kixya-->kya
  s=s.gsub(/([kgszjtdnhfbpmr])(ix)([ie])/,'\1y\3')          # キィ-->kixi-->kyi
  #---------------------------------------------------------
  s=s.gsub(/(sh)(y)([aueo])/,'\1\3')                        # シュ-->shyu-->shu
  s=s.gsub(/(j)(y)([aueo])/,'\1\3')                         # ジュ-->jyu-->ju
  #---------------------------------------------------------
  s=s.gsub(/([td])(exy)([aueo])/,'\1h\3')                   # テャ-->texya-->tha
  s=s.gsub(/([td])(ex)([ie])/,'\1\3')                       # ティ-->texi-->ti
  s=s.gsub(/([td])(oxu)/,'\1oo')                            # ドゥ-->toxu-->too
  s=s.gsub(/(tsu)(x)([aiueo])/,'ts\3')                      # ツァ-->tsuxa-->tsa
  s=s.gsub(/([d])(oxy)/,'\1o\'y')                           # ドュ-->doxyu-->doyu
  #---------------------------------------------------------
  s=s.gsub(/(vux)([aieo])/ ,'v\2')                          # ヴァヴィヴェヴォ, ヴァ-->vuxa-->va
  s=s.gsub(/(vuxy)([aueo])/ ,'vy\2')                        # ヴュ-->vuxyu-->vyu
  s=s.gsub(/(ixe)/ ,'iye')                                  # イェ-->ixe-->iye
  s=s.gsub(/(hoxe)/ ,'howe')                                # ホェ-->hoxe-->howe
  s=s.gsub(/(fux)([aieo])/ ,'f\2')                          # ファフィフェフォ, ファ-->fuxa-->fa
  s=s.gsub(/(fuxy)([aueo])/,'fy\2')                         # フュ-->fuxyu-->fyu
  s=s.gsub(/(ux)([ieo])/, 'w\2')                            # ウァウィウェ, ウァ-->uxa-->wa
  #---------------------------------------------------------
  s=s.strip.gsub(/(xtsu)$/,'h!')                            # Recombine hanging 'っ' followed by EOL
  s=s.gsub(/([aiueo]?)(\-)/, '\1\1')                        # Replace boubiki chars and double preceding vowel
  #---------------------------------------------------------
  # Cleanup specifically for source strings that contain spaces!
  s=s.gsub(/( +)([^a-z|A-z])/, '\2')                        # Remove spaces before any non-alphabetical char
  s=s.gsub(/(n')/,'n')                                      # ン-->nn-->n
  s=s.gsub(/(nn)/,'n')                                      # ン-->nn-->n
  s=s.gsub(/( n)[^a-z|A-Z]?$/,'n')                          # Fix "n" appearing as separate word
  s=s.gsub(/\s{2,}/, ' ')                                   # Remove duplicate spaces!
  #---------------------------------------------------------
  return s
end
zen_to_han() click to toggle source
# File lib/mojinizer/conversion.rb, line 80
def zen_to_han
  Moji.zen_to_han(self)
end
zenkaku?() click to toggle source
# File lib/mojinizer/detection.rb, line 23
def zenkaku?
  moji_type?(Moji::ZEN)
end