module Nomener::Parser

Class containing the blades for carving a string into a name

The two significant methods are:

parse returning a hash or nil
parse! returning a hash or raising an exception

Constants

FIRSTLAST_MATCHER

regex for matching last names in a “first last” pattern

LASTCOMFIRST_MATCHER

regex for matching last names in a “last, first” pattern

LASTFIRST_MATCHER

regex for matching last names in a “last first” pattern

NICKNAME

regex for matching enclosed nicknames

NICKNAME_LEFTOVER

regex for boundaries we’ll use to find leftover nickname boundaries

Public Class Methods

parse(name, format = { order: :auto, spacelimit: 1 }) click to toggle source

Public: parse a string into name parts

name - a string to get the name from format - hash of options to parse name

default {:order => :fl, :spacelimit => 0}
:order - format the name. defaults to "last first" of the available
  :fl - presumes the name is in the format of "first last"
  :lf - presumes the name is in the format of "last first"
  :lcf - presumes the name is in the format of "last, first"
:spacelimit - the number of spaces to consider in the first name

Returns a Nomener::Name of a parsed name of the string or nil

# File lib/nomener/parser.rb, line 49
def self.parse(name, format = { order: :auto, spacelimit: 1 })
  self.parse!(name, format)
  rescue
    nil
end
parse!(name, format = { order: :auto, spacelimit: 0 }) click to toggle source

Public: parse a string into name parts

name - string to parse a name from format - has of options to parse name. See parse()

Returns a hash of name parts or nil Raises ArgumentError if ‘name’ is not a string or is empty

# File lib/nomener/parser.rb, line 62
def self.parse!(name, format = { order: :auto, spacelimit: 0 })
  raise ArgumentError,
    'Name to parse not provided' if name.to_s.empty?

  name = Cleaner.reformat name

  # we want the hash in this order as it helps with parsing out pieces
  newname = { first: '', middle: '', last: '' }
  newname[:nick] = parse_nick!(name) # grab any identified nickname
  newname[:suffix] = Suffixes.parse_suffix!(name) # grab any suffix'
  newname[:title] = Titles.parse_title!(name)

  # stop here if we know we'll be confused
  raise ParseError,
    "Could not decipher commas in \"#{name}\"" if name.count(',') > 1

  newname[:last] = dustoff name # possibly mononyms

  if name.count(',') > 0
    newname[:last], newname[:first] = splitcomma(name)
    # titles which are part of the first name...
    newname[:title] = Titles.parse_title!(newname[:first]) if newname[:title].empty?
  else
    newname[:last] = parse_last!(name, format[:order])
    newname[:first], newname[:middle] = parse_first!(name, format[:spacelimit])
  end

  Cleaner.cleanup! newname[:last], newname[:first], newname[:middle]
  newname[:first] = dustoff newname[:first]

  newname
end
parse_first!(nm, namecount = 0) click to toggle source

Internal: parse the first name, and middle name if any

Modifies given string in place.

nm - the string to get the first name from namecount - the number of spaces in the first name to consider

Returns an array containing the first name and middle name if any

# File lib/nomener/parser.rb, line 172
def self.parse_first!(nm, namecount = 0)
  nm.tr! '.', ' '
  nm.squeeze! ' '
  first, middle = nm.split ' ', namecount

  [first || '', middle || '']
end
parse_last!(nm, format = :fl) click to toggle source

Internal: parse last name from string

Modifies given string in place.

nm - string to get the last name from format - symbol defaulting to “first last”. See parse()

Returns string of the last name found or an empty string

# File lib/nomener/parser.rb, line 140
def self.parse_last!(nm, format = :fl)
  last = ''

  format = :fl  if format == :auto
  format = :lcf if format == :auto && nm.index(',')

  # these constants should have the named match :fam
  nomen = case format
    when :fl
      nm.match FIRSTLAST_MATCHER
    when :lf
      nm.match LASTFIRST_MATCHER
    when :lcf
      nm.match LASTCOMFIRST_MATCHER
  end

  unless nomen.nil? || nomen[:fam].nil?
    last = nomen[:fam].strip
    nm.sub!(last, '')
    nm.sub!(',', '')
  end

  last
end
parse_nick!(nm) click to toggle source

Internal: parse nickname out of string. presuming it’s in quotes

Modifies given string in place.

nm - string of the name to parse

Returns string of the nickname found or and empty string

# File lib/nomener/parser.rb, line 124
def self.parse_nick!(nm)
  return '' if nm.to_s.empty?

  nick = dustoff gut!(nm, NICKNAME)
  nm.sub! NICKNAME_LEFTOVER, ''
  Cleaner.cleanup! nm
  nick
end
splitcomma(str) click to toggle source

Internal split on the comma to get the first and last names

str - the name

Returns an array of the last and first names found

# File lib/nomener/parser.rb, line 100
def self.splitcomma(str)
  last, first = str.split(',').each(&:strip!)

  # check the last by comparing a re-ordering of the name
  # Mies van der Rohe, Ludwig
  # Snepscheut, Jan L. A. van de
  unless first.to_s.count(' ') == 0
    check = parse_last!("#{first} #{last}", :fl)

    # trust the full name and remove the parsed last
    if check != last
      first = "#{first} #{last}".sub(check, '').strip
      last = check
    end
  end
  [last, first]
end