module Taxamatch::Phonetizer

Public Class Methods

near_match(a_word, normalize_ending = false) click to toggle source
# File lib/taxamatch_rb/phonetizer.rb, line 10
def self.near_match(a_word, normalize_ending = false)
  a_word = a_word.strip rescue ''
  return '' if a_word == ''
  a_word = Taxamatch::Normalizer.normalize a_word
  case a_word
    when /^AE/
      a_word = 'E' + a_word[2..-1]
    when /^CN/
      a_word = 'N' + a_word[2..-1]
    when /^CT/
      a_word = 'T' + a_word[2..-1]
    when /^CZ/
      a_word = 'C' + a_word[2..-1]
    when /^DJ/
      a_word = 'J' + a_word[2..-1]
    when /^EA/
      a_word = 'E' + a_word[2..-1]
    when /^EU/
      a_word = 'U' + a_word[2..-1]
    when /^GN/
      a_word = 'N' + a_word[2..-1]
    when /^KN/
      a_word = 'N' + a_word[2..-1]
    when /^MC/
      a_word = 'MAC' + a_word[2..-1]
    when /^MN/
      a_word = 'N' + a_word[2..-1]
    when /^OE/
      a_word = 'E' + a_word[2..-1]
    when /^QU/
      a_word = 'Q' + a_word[2..-1]
    when /^PS/
      a_word = 'S' + a_word[2..-1]
    when /^PT/
      a_word = 'T' + a_word[2..-1]
    when /^TS/
      a_word = 'S' + a_word[2..-1]
    when /^WR/
      a_word = 'R' + a_word[2..-1]
    when /^X/
      a_word = 'Z' + a_word[1..-1]
  end
  first_char = a_word.split('')[0]
  rest_chars = a_word.split('')[1..-1].join('')
  rest_chars.gsub!('AE', 'I')
  rest_chars.gsub!('IA', 'A')
  rest_chars.gsub!('OE', 'I')
  rest_chars.gsub!('OI', 'A')
  rest_chars.gsub!('SC', 'S')
  rest_chars.gsub!('H', '')
  rest_chars.tr!('EOUYKZ', 'IAIICS')
  a_word = (first_char + rest_chars).squeeze

  if normalize_ending && a_word.size > 4
    a_word = self.normalize_ending(a_word)
  end
  a_word
end
normalize_ending(a_word) click to toggle source
# File lib/taxamatch_rb/phonetizer.rb, line 69
def self.normalize_ending(a_word)
    # -- deal with variant endings
    # -is (includes -us, -ys, -es), -im (was -um), -as (-os)
    # -- at the end of a string translate all to -a
    a_word.gsub!(/IS$/, 'A')
    a_word.gsub!(/IM$/, 'A')
    a_word.gsub(/AS$/, 'A')
end
phonetize(a_word, normalize_ending = false) click to toggle source
# File lib/taxamatch_rb/phonetizer.rb, line 6
def self.phonetize(a_word, normalize_ending = false)
  self.near_match(a_word, normalize_ending)
end