class Indirizzo::Parser

Public Class Methods

new(text, options={}) click to toggle source
# File lib/indirizzo/parser.rb, line 8
def initialize(text, options={})
  @text = text
  @options = options
end

Public Instance Methods

parse() click to toggle source
# File lib/indirizzo/parser.rb, line 13
def parse
  text = @text.clone.downcase

  @zip, @plus4, zip_index, zip_end_index = extract_zip_from_text(text)

  @country = @text[zip_end_index+1..-1].sub(/^\s*,\s*/, '').strip
  @country = nil if @country == text

  @state, @full_state, @city, state_index = extract_state_from_text(text)

  @prenum, @number, @sufnum, number_end_index = process_number(text)

  # FIXME: special case: Name_Abbr gets a bit aggressive
  # about replacing St with Saint. exceptional case:
  # Sault Ste. Marie

  # FIXME: PO Box should geocode to ZIP
  street_search_end_index = [state_index,zip_index,text.length].reject(&:nil?).min-1
  @street = text[number_end_index+1..street_search_end_index].scan(Match[:street]).map { |s| s and s.strip }

  @street = Street.expand(@street) if @options[:expand_streets]
  # SPECIAL CASE: 1600 Pennsylvania 20050
  @street << @full_state if @street.empty? and @state.downcase != @full_state.downcase

  street_end_index = @street.map { |s| text.rindex(s) }.reject(&:nil?).min||0

  process_city(text, street_end_index, street_search_end_index)

  return @city, @street, @number, @prenum, @sufnum, @full_state, @state, @zip, @plus4, @country
end

Private Instance Methods

extract_state_from_text(text) click to toggle source
# File lib/indirizzo/parser.rb, line 61
def extract_state_from_text(text)
  state = text.scan(Match[:state]).last
  if state
    last_match = $&
      state_index = text.rindex(last_match)
    idx = text.rindex(last_match)
    full_state = state[0].strip # special case: New York
    state = State[full_state]
    city = "Washington" if state == "DC" && text[idx...idx+last_match.length] =~ /washington\s+d\.?c\.?/i
  else
    full_state = ""
    state = ""
  end
  return state, full_state, city, state_index
end
extract_zip_from_text(text) click to toggle source
# File lib/indirizzo/parser.rb, line 46
def extract_zip_from_text(text)
  zip = text.scan(Match[:zip]).last
  if zip
    last_match = $&
      zip_index = text.rindex(last_match)
    zip_end_index = zip_index + last_match.length - 1
    zip, plus4 = zip.map {|s| s and s.strip }
  else
    zip = plus4 = ""
    zip_index = text.length
    zip_end_index = -1
  end
  return zip, plus4, zip_index, zip_end_index
end
process_city(text, street_end_index, street_search_end_index) click to toggle source
# File lib/indirizzo/parser.rb, line 77
def process_city(text, street_end_index, street_search_end_index)
  if @city.nil? || @city.empty?
    @city = text[street_end_index..street_search_end_index+1].scan(Match[:city])
    if !@city.empty?
      #@city = [@city[-1].strip]
      @city = [@city.last.strip]
      add = @city.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}}
      @city |= add
      @city.map! {|s| s.downcase}
      @city.uniq!
    else
      @city = []
    end

    # SPECIAL CASE: no city, but a state with the same name. e.g. "New York"
    @city << @full_state if @state.downcase != @full_state.downcase
  end
end
process_number(text) click to toggle source
# File lib/indirizzo/parser.rb, line 96
def process_number(text)
  number = text.scan(Match[:number]).first
  # FIXME: 230 Fish And Game Rd, Hudson NY 12534
  if number # and not intersection?
    last_match = $&
      number_index = text.index(last_match)
    number_end_index = number_index + last_match.length - 1
    prenum, number, sufnum = number.map {|s| s and s.strip}
  else
    number_end_index = -1
    prenum = number = sufnum = ""
  end
  return prenum, number, sufnum, number_end_index
end