module Kana2rom

USAGE

Include Kana2rom

to_romaji(str)     かな --> ロ-マ字 変換    /   hira/katakana ->> romaji conv
to_katakana(str)     ロ-マ字 --> 片仮名 変換   /   romaji --> katakana conv
to_hiragana(str)     ロ-マ字 --> 平仮名 変換   /   romaji --> hiragana conv
hira_to_kata(str)    平仮名 --> 片仮名 変換    /   hiragana --> katakana conv
kata_to_hira(str)    片仮名 --> 平仮名 変換    /   katakana ->> hiragana conv
kana2kana(str)   attempts either to either, returns unique strings only

Constants

Hira2kataH
HiraganaCharacters
Kana2romH
Kana2romH2
Kata2hiraH
KatakanaCharacters
NotKanaCharacters
Rom2KataH1

1 character romaji patterns

Rom2KataH2

2 character romaji patterns

Rom2KataH3

3 character romaji patterns

Public Instance Methods

contains_kana?() click to toggle source
# File lib/romajinizer.rb, line 373
def contains_kana?
  self.each_char do |character|
    if HiraganaCharacters.include?(character) || KatakanaCharacters.include?(character)
      return true
    end
  end
  return false
end
hira_to_kata(str) click to toggle source
# File lib/romajinizer.rb, line 321
def hira_to_kata(str)
  s=""; str.each_char{|c|if(Hira2kataH.key?(c))then s+=Hira2kataH[c];else s+=c; end}
  return s
end
is_kana?() click to toggle source
# File lib/romajinizer.rb, line 350
def is_kana?
  if HiraganaCharacters.include?(self) || !KatakanaCharacters.include?(self)
    return true
  end
  return false
end
is_kanji?() click to toggle source
# File lib/romajinizer.rb, line 357
def is_kanji?
  if !HiraganaCharacters.include?(self) && !KatakanaCharacters.include?(self) && !NotKanaCharacters.include?(self)
    return true
  end
  return false
end
is_only_kana?() click to toggle source
# File lib/romajinizer.rb, line 364
def is_only_kana?
  self.each_char do |character|
    if !HiraganaCharacters.include?(character) && !KatakanaCharacters.include?(character)
      return false
    end
  end
  return true
end
kana2kana(str1) click to toggle source

Added by Paul 2009-05-12 22:31

# File lib/romajinizer.rb, line 336
def kana2kana(str1)
  result = []
  str2 = Kana2rom::hira_to_kata(str1)
  str3 = Kana2rom::kata_to_hira(str1)
  result << str1
  result << str2 if str2.length > 0 and str1 !=str2
  result << str3 if str3.length > 0 and str2 !=str3 and str3 != str1
  return result
end
kata_to_hira(str) click to toggle source
# File lib/romajinizer.rb, line 315
def kata_to_hira(str)
  s=""; str.each_char{|c| s+=( Kata2hiraH.key?(c) ? Kata2hiraH[c] : c )}
  s.normalize_double_n!
  return s
end
normalize_double_n() click to toggle source
# File lib/romajinizer.rb, line 326
def normalize_double_n
  self.gsub(/n\'(?=[^aiueoyn]|$)/, "n")
end
normalize_double_n!() click to toggle source
# File lib/romajinizer.rb, line 330
def normalize_double_n!
  self.gsub!(/n\'(?=[^aiueoyn]|$)/, "n")
  self
end
to_hiragana() click to toggle source
# File lib/romajinizer.rb, line 346
def to_hiragana
  return kata_to_hira(to_katakana)
end
to_katakana() click to toggle source
# File lib/romajinizer.rb, line 248
def to_katakana
  ## THIS LINE DOES NOT WORK IN RECENT RUBY VERSIONS!!!    r=""; w=[]; chars=str.split(//e)
  result=""
  word_buffer=[]
  chars=self.each_char.collect{|c| c}
  loop do
    case word_buffer.size
      ##### When 0 characters in the buffer
    when 0 then
      if chars.size > 0
        word_buffer.push(chars.shift)
      else
        return result
      end
      ##### Patterns with 1 roman character
    when 1 then
      if word_buffer[0] =~ /[aiueo-]/
        result += Rom2KataH1[word_buffer[0]]
        word_buffer = [] # a-->ア
      elsif word_buffer[0] =~ /[xkcgszjtdnhbpvfmyrlw']/
        if chars.size > 0
          word_buffer.push(chars.shift)
        else
          return result + (word_buffer[0].gsub(/n/,"ン"))
        end
      else
        result += word_buffer.shift
      end
      ##### Patterns with 2 roman characters
    when 2 then
      if Rom2KataH2.key?(word_buffer.join)
        result += Rom2KataH2[word_buffer.join]
        word_buffer = []
      elsif word_buffer.join =~ /([kgszjtcdnhbpmrl]y)|([stcd]h)|ts|(x[wytk])/ # goto 3
        if chars.size > 0
          # Consume next letter from source array
          word_buffer.push(chars.shift)
        else
          return result + (word_buffer.join.gsub(/n/,"ン"))
        end
      elsif word_buffer.join == "n'"
        result += "ン"
        word_buffer.shift(2) # n'--> ン
      elsif word_buffer[0] == "n"
        result += "ン"
        word_buffer.shift # nk-->ンk
      elsif word_buffer[0] == word_buffer[1]
        result += "ッ"
        word_buffer.shift # kk-->ッk
      else
        result += word_buffer.shift;
      end
      ##### Patterns with 3 roman characters
    when 3 then
      if Rom2KataH3.key?(word_buffer.join)
        result += Rom2KataH3[word_buffer.join]
        word_buffer=[]
      elsif word_buffer[0] == "n"
        result += "ン"
        word_buffer.shift
      else
        result += word_buffer.shift
      end
    end
  end
end
to_romaji() click to toggle source
# File lib/romajinizer.rb, line 192
def to_romaji
  s=""
  self.each_char do |c|
    if (Kana2romH.key?(c))
      s += Kana2romH[c]
    else
      s += c
    end
  end

  s=s.gsub(/(k)([aiueo])(")/,'g\2').gsub(/(s)([aiueo])(")/,'z\2').gsub(/(t)([aiueo])(")/,'d\2')
  s=s.gsub(/(h)([aiueo])(")/,'b\2').gsub(/([fh])([aiueo])(')/,'p\2').gsub(/u"/,'vu') # [半]濁点゛゜
  #---------------------------------------------------------
  s=s.gsub(/\s(xtsu)?\s/,'xtsu')                            # Remove spaces before/after hanging 'っ'
  #---------------------------------------------------------
  sw=s;
  while nil!=sw.gsub!(/(xtsu)([ckgszjtdhfbpmyrwnv])/,'\2\2') do; s=sw; end # ッカ-->xtsuka-->kka
  #---------------------------------------------------------
  # Compound Phoneme Pattern Rollbacks
  # NB: Uses regex backrefs like "\1y\3" where \1 = 1st capture grp, y='y' and \3 = 3rd capture grp
  #---------------------------------------------------------
  s=s.gsub(/( +x)(.*)/,'x\2')                               # Avoid hanging chisaii moji due to leading spaces
  s=s.gsub(/(ch)(ixy)([aueo])/,'\1\3')                      # チョ-->chixyo-->cho
  s=s.gsub(/([kgszjtdnhfbpmr])(ixy)([auo])/,'\1y\3')        # キャ-->kixya-->kya
  s=s.gsub(/([kgszjtdnhfbpmr])(ix)([ie])/,'\1y\3')          # キィ-->kixi-->kyi
  #---------------------------------------------------------
  s=s.gsub(/(sh)(y)([aueo])/,'\1\3')                        # シュ-->shyu-->shu
  s=s.gsub(/(j)(y)([aueo])/,'\1\3')                         # ジュ-->jyu-->ju
  #---------------------------------------------------------
  s=s.gsub(/([td])(exy)([aueo])/,'\1h\3')                   # テャ-->texya-->tha
  s=s.gsub(/([td])(ex)([ie])/,'\1\3')                       # ティ-->texi-->ti
  s=s.gsub(/([td])(oxu)/,'\1oo')                            # ドゥ-->toxu-->too
  s=s.gsub(/(tsu)(x)([aiueo])/,'ts\3')                      # ツァ-->tsuxa-->tsa
  s=s.gsub(/([d])(oxy)/,'\1o\'y')                           # ドュ-->doxyu-->doyu
  #---------------------------------------------------------
  s=s.gsub(/(vux)([aieo])/ ,'v\2')                          # ヴァヴィヴェヴォ, ヴァ-->vuxa-->va
  s=s.gsub(/(vuxy)([aueo])/ ,'vy\2')                        # ヴュ-->vuxyu-->vyu
  s=s.gsub(/(ixe)/ ,'iye')                                  # イェ-->ixe-->iye
  s=s.gsub(/(hoxe)/ ,'howe')                                # ホェ-->hoxe-->howe
  s=s.gsub(/(fux)([aieo])/ ,'f\2')                          # ファフィフェフォ, ファ-->fuxa-->fa
  s=s.gsub(/(fuxy)([aueo])/,'fy\2')                         # フュ-->fuxyu-->fyu
  s=s.gsub(/(ux)([ieo])/, 'w\2')                            # ウァウィウェ, ウァ-->uxa-->wa
  #---------------------------------------------------------
  s=s.strip.gsub(/(xtsu)$/,'h!')                            # Recombine hanging 'っ' followed by EOL
  s=s.gsub(/([aiueo]?)(\-)/, '\1\1')                        # Replace boubiki chars and double preceding vowel
  #---------------------------------------------------------
  # Cleanup specifically for source strings that contain spaces!
  s=s.gsub(/( +)([^a-z|A-z])/, '\2')                        # Remove spaces before any non-alphabetical char
  s=s.gsub(/(n')/,'n')                                      # ン-->nn-->n
  s=s.gsub(/(nn)/,'n')                                      # ン-->nn-->n
  s=s.gsub(/( n)[^a-z|A-Z]?$/,'n')                          # Fix "n" appearing as separate word
  s=s.gsub(/\s{2,}/, ' ')                                   # Remove duplicate spaces!
  #---------------------------------------------------------
  return s
end