module Taxamatch::Normalizer
Public Class Methods
normalize(string)
click to toggle source
# File lib/taxamatch_rb/normalizer.rb, line 6 def self.normalize(string) utf8_to_ascii(string.strip.upcase).gsub(/[^\x00-\x7F]/,'?') end
normalize_word(word)
click to toggle source
# File lib/taxamatch_rb/normalizer.rb, line 10 def self.normalize_word(word) self.normalize(word).gsub(/[^A-Z0-9\-]/, '').strip end
normalize_year(year_string)
click to toggle source
# File lib/taxamatch_rb/normalizer.rb, line 18 def self.normalize_year(year_string) year_int = year_string.gsub(/[^\d]/, '').to_i year_int = nil unless year_int.between?(1757, Time.now.year + 1) year_int end
Private Class Methods
utf8_to_ascii(string)
click to toggle source
# File lib/taxamatch_rb/normalizer.rb, line 26 def self.utf8_to_ascii(string) string = string.gsub(/\s{2,}/, ' ') string = string.gsub("×", "x") string = string.gsub(/[ÀÂÅÃÄÁẤẠÁáàâåãäăãắảạậầằá]/, "A") string = string.gsub(/[ÉÈÊËéèêëĕěếệểễềẻ]/, "E") string = string.gsub(/[ÍÌÎÏíìîïǐĭīĩỉï]/, "I") string = string.gsub(/[ÓÒÔØÕÖỚỔóòôøõöŏỏỗộơọỡốơồờớổő]/, "O") string = string.gsub(/[ÚÙÛÜúùûüůưừựủứụű]/, "U") string = string.gsub(/[Ýýÿỹ]/, "Y") string = string.gsub(/[Ææ]/, "AE") string = string.gsub(/[ČÇčćç]/, "C") string = string.gsub(/[ŠŞśšşſ]/, "S") string = string.gsub(/[Đđð]/, "D") string = string.gsub(/Žžź/, "Z") string = string.gsub(/[Ññńň]/, "N") string = string.gsub(/[Œœ]/, "OE") string = string.gsub(/ß/, "B") string = string.gsub(/Ķ/, "K") string = string.gsub(/ğ/, "G") string = string.gsub(/[Řř]/, "R") end