module MailAddress

Constants

ADDRESS_SEPARATORS_

SPECIAL_CHARS = '()<>@:\".[]'

CLOSERS_
DOMAIN_PART_REGEXP_STR_
EMAIL_ADDRESS_
ESCAPED_BACKSLASHES_
ESCAPED_DOUBLE_QUOTES_

CHARS_REQUIRE_QUOTES_ = SPECIAL_CHARS + ADDRESS_SEPARATORS_

LOCAL_PART_REGEXP_STR_
OPENERS_

This module is ported from Google Closure JavaScript Library

-> https://github.com/google/closure-library/blob/master/closure/goog/format/emailaddress.js

QUOTED_REGEX_STR_
UNQUOTED_REGEX_STR_
VERSION

Public Class Methods

collapse_whitespace(str) click to toggle source
# File lib/mail_address/simple_parser.rb, line 121
def self.collapse_whitespace(str)
  str.gsub(/[\s\xc2\xa0]+/, ' ').strip
end
g_parse(str)
Alias for: parse_simple
get_token(str, pos) click to toggle source
# File lib/mail_address/simple_parser.rb, line 87
def self.get_token(str, pos)
  ch = str[pos]
  p = OPENERS_.index(ch)
  return ch unless p

  if (self.is_escaped_dbl_quote(str, pos))
    # If an opener is an escaped quote we do not treat it as a real opener
    # and keep accumulating the token.
    return ch
  end
  closer_char = CLOSERS_[p]
  end_pos = str.index(closer_char, pos + 1)

  # If the closer is a quote we go forward skipping escaped quotes until we
  # hit the real closing one.
  while (end_pos && end_pos >= 0 && self.is_escaped_dbl_quote(str, end_pos))
    end_pos = str.index(closer_char, end_pos + 1)
  end

  token = (end_pos && end_pos >= 0) ? str[pos .. end_pos] : ch
  return token
end
is_address_separator(ch) click to toggle source
# File lib/mail_address/simple_parser.rb, line 129
def self.is_address_separator(ch)
  ADDRESS_SEPARATORS_.include? ch
end
is_empty_or_whitespace(str) click to toggle source
# File lib/mail_address/simple_parser.rb, line 125
def self.is_empty_or_whitespace(str)
  /\A[\s\xc2\xa0]*\z/ =~ str
end
is_escaped_dbl_quote(str, pos) click to toggle source
# File lib/mail_address/simple_parser.rb, line 110
def self.is_escaped_dbl_quote(str, pos)
  return false if str[pos] != '"'
  slash_count = 0

  for idx in (pos - 1).downto(0)
    break unless str[idx] == '\\'
    slash_count += 1
  end
  (slash_count % 2) != 0
end
is_valid(address) click to toggle source
# File lib/mail_address/simple_parser.rb, line 133
def self.is_valid(address)
  EMAIL_ADDRESS_ =~ address.address
end
parse(*addresses) click to toggle source
# File lib/mail_address/mail_address.rb, line 8
def self.parse(*addresses)
  lines = addresses.grep(String)
  line = lines.join('').strip

  # empty or <> or < or >
  if line.empty? || line.match(/\A[<>;, \\]+\z/)
    return [ MailAddress::Address.new(line, nil, line) ]
  end

  # undisclosed-recipient
  if line.match(/undisclosed[ \-]recipients?: ?;?/i)
    return [ MailAddress::Address.new(line, nil, line) ]
  end

  phrase, address, objs = [], [], []
  original = ''
  depth, idx, end_paren_idx = 0, 0, 0

  tokens = _tokenize lines
  len    = tokens.length
  _next  = _find_next idx, tokens, len

  for idx in 0 ... len do

    token = tokens[idx]
    substr = token[0, 1]
    original << token

    if (end_paren_idx > 0 && end_paren_idx >= idx)
      next
    end

    if (substr == '(' && !address.empty?)
      end_paren_idx = _find_next_paren(idx, tokens, len)
      if end_paren_idx == -1
        # end paren doesn't exist
        # but nothing to do
      end
      rem = tokens[idx .. end_paren_idx]
      phrase.push(rem.join(''))
    elsif (substr == '<')
      depth += 1
    elsif (substr == '>')
      depth -= 1 if depth > 0
    elsif (substr == ',' || substr == ';')
      original.sub!(/[,;]\s*\z/, '')

      if depth > 0
        # raise "Unmatched '<>' in line"
        o = MailAddress::Address.new(original, nil, original)
        phrase.clear; address.clear
      else
        o = _complete(phrase, address, original)
      end

      objs.push(o) if o
      depth = 0
      end_paren_idx = 0
      original = ''
      _next = _find_next idx+1, tokens, len
    elsif (depth > 0)
      token.strip!
      address.push(token)
    elsif (_next == '<')
      phrase.push(token)
    elsif ( token.match(/^[.\@:;]/) || address.empty? || address[-1].match(/^[.\@:;]/) )
      token.strip!
      address.push(token)
    else
      phrase.push(token)
    end
  end
  objs
end
parse_first(*addresses) click to toggle source
# File lib/mail_address/mail_address.rb, line 4
def self.parse_first(*addresses)
  self.parse(*addresses).first
end
parse_internal(addr) click to toggle source
# File lib/mail_address/simple_parser.rb, line 51
def self.parse_internal(addr)
  name = ''
  address = ''
  i = 0
  while (i < addr.length)
    token = get_token(addr, i)
    if (token[0] == '<' && token.index('>'))
      end_i = token.index('>')
      address = token[1, end_i - 1]
    elsif (address == '')
      name << token
    end
    i += token.length
  end

  # Check if it's a simple email address of the form "jlim@google.com".
  if (address == '' && name.index('@'))
    address = name
    name = ''
  end

  name = self.collapse_whitespace(name)
  name = name[1 .. -2] if name.start_with?('\'') && name.end_with?('\'')
  name = name[1 .. -2] if name.start_with?('"') && name.end_with?('"')

  # Replace escaped quotes and slashes.
  name = name.gsub(ESCAPED_DOUBLE_QUOTES_, '"')
  name = name.gsub(ESCAPED_BACKSLASHES_, '\\')

  #address = goog.string.collapseWhitespace(address);
  address.strip!

  addr = addr.strip
  MailAddress::Address.new(name, address, addr)
end
parse_simple(str) click to toggle source
# File lib/mail_address/simple_parser.rb, line 21
def self.parse_simple(str)
  result = []
  email = token = ''

  # Remove non-UNIX-style newlines that would otherwise cause getToken_ to
  # choke. Remove multiple consecutive whitespace characters for the same
  # reason.
  str = self.collapse_whitespace(str)
  i = 0
  while (i < str.length)
    token = get_token(str, i)
    if self.is_address_separator(token) || (token == ' ' && self.is_valid(self.parse_internal(email)))
      if !self.is_empty_or_whitespace(email)
        result.push(self.parse_internal(email))
      end
      email = ''
      i += 1
      next
    end
    email << token
    i += token.length
  end

  # Add the final token.
  if (!self.is_empty_or_whitespace(email))
    result.push(self.parse_internal(email))
  end
  return result
end
Also aliased as: g_parse

Private Class Methods

_complete(phrase, address, original) click to toggle source
# File lib/mail_address/mail_address.rb, line 129
def self._complete (phrase, address, original)
  phrase.length > 0 || address.length > 0 or return nil

  name = phrase.join('').strip

  name = self.collapse_whitespace(name)
  name = name[1 .. -2] if name.start_with?('\'') && name.end_with?('\'')
  name = name[1 .. -2] if name.start_with?('"') && name.end_with?('"')

  # Replace escaped quotes and slashes.
  name = name.gsub(ESCAPED_DOUBLE_QUOTES_, '"')
  name = name.gsub(ESCAPED_BACKSLASHES_, '\\')

  new_address = MailAddress::Address.new(name, address.join(''), original)
  phrase.clear; address.clear
  new_address
end
_find_next(idx, tokens, len) click to toggle source
# File lib/mail_address/mail_address.rb, line 110
def self._find_next(idx, tokens, len)
  while (idx < len)
    c = tokens[idx].strip
    return c if c == ',' || c == ';' || c == '<'
    idx += 1
  end
  ""
end
_find_next_paren(idx, tokens, len) click to toggle source

find next ending parenthesis

# File lib/mail_address/mail_address.rb, line 120
def self._find_next_paren(idx, tokens, len)
  while (idx < len)
    c = tokens[idx].strip
    return idx if c.include?(')')
    idx += 1
  end
  -1
end
_tokenize(addresses) click to toggle source
# File lib/mail_address/mail_address.rb, line 85
def self._tokenize(addresses)
  line = addresses.join(',') # $_
  words = []

  line.gsub!(/\\/, '')
  line.sub!(/\A\s+/, '')
  line.gsub!(/[\r\n]+/,' ')

  while (line != '')
    tmp = nil
    if (
        line.match(/"[^"]+"/) && line.sub!(/\A(\\?"(?:[^"\\]+|\\.)*")(\s*)/, '')  || # "..."
        line.sub!(/\A([^\s()<>\@,;:\\".\[\]]+)(\s*)/, '') ||
        line.sub!(/\A([()<>\@,;:\\".\[\]])(\s*)/, '')
        )
      words.push("#{$1}#{$2}")
      next
    end
    raise "Unrecognized line: #{line}"
  end

  words.push(',')
  words
end