module Mojinizer
Constants
- KANA_TO_ROM
- KANA_TO_ROM2
- ROM_TO_KATA1
1 character romaji patterns
- ROM_TO_KATA2
2 character romaji patterns
- ROM_TO_KATA3
3 character romaji patterns
- VERSION
Public Instance Methods
ascii_zenkaku?()
click to toggle source
# File lib/mojinizer/detection.rb, line 27 def ascii_zenkaku? moji_type?(Moji::ZEN_ALNUM | Moji::ZEN_ASYMBOL) end
contains_ascii_zenkaku?()
click to toggle source
# File lib/mojinizer/detection.rb, line 64 def contains_ascii_zenkaku? contains_moji_type?(Moji::ZEN_ALNUM | Moji::ZEN_ASYMBOL) end
contains_hankaku?()
click to toggle source
# File lib/mojinizer/detection.rb, line 56 def contains_hankaku? contains_moji_type?(Moji::HAN_KATA | Moji::HAN_JSYMBOL) end
contains_hiragana?()
click to toggle source
# File lib/mojinizer/detection.rb, line 40 def contains_hiragana? contains_moji_type?(Moji::HIRA) end
contains_japanese?()
click to toggle source
# File lib/mojinizer/detection.rb, line 68 def contains_japanese? contains_moji_type?(Moji::ZEN | Moji::JSYMBOL | Moji::HAN_KATA) end
contains_kana?()
click to toggle source
# File lib/mojinizer/detection.rb, line 44 def contains_kana? contains_moji_type?(Moji::KANA) end
contains_kanji?()
click to toggle source
# File lib/mojinizer/detection.rb, line 52 def contains_kanji? contains_moji_type?(Moji::KANJI) end
contains_katakana?()
click to toggle source
# File lib/mojinizer/detection.rb, line 48 def contains_katakana? contains_moji_type?(Moji::KATA) end
contains_moji_type?(type)
click to toggle source
# File lib/mojinizer/detection.rb, line 72 def contains_moji_type?(type) self.each_char { |c| return true if Moji.type?(c, type) } return false end
contains_zenkaku?()
click to toggle source
# File lib/mojinizer/detection.rb, line 60 def contains_zenkaku? contains_moji_type?(Moji::ZEN) end
han_to_zen()
click to toggle source
# File lib/mojinizer/conversion.rb, line 76 def han_to_zen Moji.han_to_zen(self) end
hankaku?()
click to toggle source
# File lib/mojinizer/detection.rb, line 19 def hankaku? moji_type?(Moji::HAN_KATA | Moji::HAN_JSYMBOL) end
hira_to_kata()
click to toggle source
# File lib/mojinizer/conversion.rb, line 68 def hira_to_kata Moji.hira_to_kata(self) end
hiragana()
click to toggle source
# File lib/mojinizer/conversion.rb, line 60 def hiragana self.roma_to_kata.kata_to_hira end
hiragana?()
click to toggle source
# File lib/mojinizer/detection.rb, line 3 def hiragana? moji_type?(Moji::HIRA) end
japanese?()
click to toggle source
# File lib/mojinizer/detection.rb, line 31 def japanese? moji_type?(Moji::ZEN | Moji::JSYMBOL | Moji::HAN_KATA) end
kana?()
click to toggle source
# File lib/mojinizer/detection.rb, line 11 def kana? return (hiragana? || katakana?) end
kanji?()
click to toggle source
# File lib/mojinizer/detection.rb, line 15 def kanji? moji_type?(Moji::KANJI) end
kata_to_hira()
click to toggle source
# File lib/mojinizer/conversion.rb, line 72 def kata_to_hira Moji.kata_to_hira(self) end
katakana()
click to toggle source
# File lib/mojinizer/conversion.rb, line 64 def katakana self.hira_to_kata.roma_to_kata end
katakana?()
click to toggle source
# File lib/mojinizer/detection.rb, line 7 def katakana? moji_type?(Moji::KATA) end
moji_type?(type)
click to toggle source
# File lib/mojinizer/detection.rb, line 35 def moji_type?(type) self.each_char { |c| return false unless Moji.type?(c, type) } return true end
normalize_zen_han()
click to toggle source
# File lib/mojinizer/conversion.rb, line 84 def normalize_zen_han Moji.normalize_zen_han(self) end
roma_to_kata()
click to toggle source
# File lib/mojinizer/conversion.rb, line 88 def roma_to_kata result="" word_buffer=[] chars=self.each_char.collect{|c| c} loop do case word_buffer.size ##### When 0 characters in the buffer when 0 then if chars.size > 0 word_buffer.push(chars.shift) else return result end ##### Patterns with 1 roman character when 1 then if word_buffer[0] =~ /[aiueo-]/ result += ROM_TO_KATA1[word_buffer[0]] word_buffer = [] # a-->ア elsif word_buffer[0] =~ /[xkcgszjtdnhbpvfmyrlw']/ if chars.size > 0 word_buffer.push(chars.shift) else return result + (word_buffer[0].gsub(/n/,"ン")) end else result += word_buffer.shift end ##### Patterns with 2 roman characters when 2 then if ROM_TO_KATA2.key?(word_buffer.join) result += ROM_TO_KATA2[word_buffer.join] word_buffer = [] elsif word_buffer.join =~ /([kgszjtcdnhbpmrl]y)|([stcd]h)|ts|(x[wytk])/ # goto 3 if chars.size > 0 # Consume next letter from source array word_buffer.push(chars.shift) else return result + (word_buffer.join.gsub(/n/,"ン")) end elsif word_buffer.join == "n'" result += "ン" word_buffer.shift(2) # n'--> ン elsif word_buffer[0] == "n" result += "ン" word_buffer.shift # nk-->ンk elsif word_buffer[0] == word_buffer[1] result += "ッ" word_buffer.shift # kk-->ッk else result += word_buffer.shift; end ##### Patterns with 3 roman characters when 3 then if ROM_TO_KATA3.key?(word_buffer.join) result += ROM_TO_KATA3[word_buffer.join] word_buffer=[] elsif word_buffer[0] == "n" result += "ン" word_buffer.shift else result += word_buffer.shift end end end end
romaji()
click to toggle source
# File lib/mojinizer/conversion.rb, line 4 def romaji s="" self.each_char do |c| if (KANA_TO_ROM.key?(c)) s += KANA_TO_ROM[c] else s += c end end s=s.gsub(/(k)([aiueo])(")/,'g\2').gsub(/(s)([aiueo])(")/,'z\2').gsub(/(t)([aiueo])(")/,'d\2') s=s.gsub(/(h)([aiueo])(")/,'b\2').gsub(/([fh])([aiueo])(')/,'p\2').gsub(/u"/,'vu') # [半]濁点゛゜ #--------------------------------------------------------- s=s.gsub(/\s(xtsu)?\s/,'xtsu') # Remove spaces before/after hanging 'っ' #--------------------------------------------------------- sw=s; while nil!=sw.gsub!(/(xtsu)([ckgszjtdhfbpmyrwnv])/,'\2\2') do; s=sw; end # ッカ-->xtsuka-->kka #--------------------------------------------------------- # Compound Phoneme Pattern Rollbacks # NB: Uses regex backrefs like "\1y\3" where \1 = 1st capture grp, y='y' and \3 = 3rd capture grp #--------------------------------------------------------- s=s.gsub(/( +x)(.*)/,'x\2') # Avoid hanging chisaii moji due to leading spaces s=s.gsub(/(ch)(ixy)([aueo])/,'\1\3') # チョ-->chixyo-->cho s=s.gsub(/([kgszjtdnhfbpmr])(ixy)([auo])/,'\1y\3') # キャ-->kixya-->kya s=s.gsub(/([kgszjtdnhfbpmr])(ix)([ie])/,'\1y\3') # キィ-->kixi-->kyi #--------------------------------------------------------- s=s.gsub(/(sh)(y)([aueo])/,'\1\3') # シュ-->shyu-->shu s=s.gsub(/(j)(y)([aueo])/,'\1\3') # ジュ-->jyu-->ju #--------------------------------------------------------- s=s.gsub(/([td])(exy)([aueo])/,'\1h\3') # テャ-->texya-->tha s=s.gsub(/([td])(ex)([ie])/,'\1\3') # ティ-->texi-->ti s=s.gsub(/([td])(oxu)/,'\1oo') # ドゥ-->toxu-->too s=s.gsub(/(tsu)(x)([aiueo])/,'ts\3') # ツァ-->tsuxa-->tsa s=s.gsub(/([d])(oxy)/,'\1o\'y') # ドュ-->doxyu-->doyu #--------------------------------------------------------- s=s.gsub(/(vux)([aieo])/ ,'v\2') # ヴァヴィヴェヴォ, ヴァ-->vuxa-->va s=s.gsub(/(vuxy)([aueo])/ ,'vy\2') # ヴュ-->vuxyu-->vyu s=s.gsub(/(ixe)/ ,'iye') # イェ-->ixe-->iye s=s.gsub(/(hoxe)/ ,'howe') # ホェ-->hoxe-->howe s=s.gsub(/(fux)([aieo])/ ,'f\2') # ファフィフェフォ, ファ-->fuxa-->fa s=s.gsub(/(fuxy)([aueo])/,'fy\2') # フュ-->fuxyu-->fyu s=s.gsub(/(ux)([ieo])/, 'w\2') # ウァウィウェ, ウァ-->uxa-->wa #--------------------------------------------------------- s=s.strip.gsub(/(xtsu)$/,'h!') # Recombine hanging 'っ' followed by EOL s=s.gsub(/([aiueo]?)(\-)/, '\1\1') # Replace boubiki chars and double preceding vowel #--------------------------------------------------------- # Cleanup specifically for source strings that contain spaces! s=s.gsub(/( +)([^a-z|A-z])/, '\2') # Remove spaces before any non-alphabetical char s=s.gsub(/(n')/,'n') # ン-->nn-->n s=s.gsub(/(nn)/,'n') # ン-->nn-->n s=s.gsub(/( n)[^a-z|A-Z]?$/,'n') # Fix "n" appearing as separate word s=s.gsub(/\s{2,}/, ' ') # Remove duplicate spaces! #--------------------------------------------------------- return s end
zen_to_han()
click to toggle source
# File lib/mojinizer/conversion.rb, line 80 def zen_to_han Moji.zen_to_han(self) end
zenkaku?()
click to toggle source
# File lib/mojinizer/detection.rb, line 23 def zenkaku? moji_type?(Moji::ZEN) end