class Indirizzo::Helper

Public Class Methods

clean(value) click to toggle source
# File lib/indirizzo/helper.rb, line 24
def self.clean(value)
  value.strip \
       .gsub(/[^a-z0-9 ,'&@\/-]+/io, "") \
       .gsub(/\s+/o, " ")
end
remove_noise_words(strings) click to toggle source
# File lib/indirizzo/helper.rb, line 3
def self.remove_noise_words(strings)
  # Don't return strings that consist solely of abbreviations.
  # NOTE: Is this a micro-optimization that has edge cases that will break?
  # Answer: Yes, it breaks on simple things like "Prairie St" or "Front St"
  prefix = Regexp.new("^" + Prefix_Type.regexp.source + "\s*", Regexp::IGNORECASE)
  suffix = Regexp.new("\s*" + Suffix_Type.regexp.source + "$", Regexp::IGNORECASE)
  predxn = Regexp.new("^" + Directional.regexp.source + "\s*", Regexp::IGNORECASE)
  sufdxn = Regexp.new("\s*" + Directional.regexp.source + "$", Regexp::IGNORECASE)
  good_strings = strings.map {|s|
    s = s.clone
    s.gsub!(predxn, "")
    s.gsub!(sufdxn, "")
    s.gsub!(prefix, "")
    s.gsub!(suffix, "")
    s
  }
  good_strings.reject! {|s| s.empty?}
  strings = good_strings if !good_strings.empty? {|s| not Std_Abbr.key?(s) and not Name_Abbr.key?(s)}
  strings
end