class GEDCOM_DATE_PARSER::DateParser

Constants

GEDFBETWEEN
GEDFFROM
GEDFINTERP
GEDFMONTH
GEDFNONE
GEDFNUMBER
GEDFSLASH

Public Class Methods

build_gedcom_date_part_string( date ) click to toggle source
# File lib/gedcom_ruby/date_parser.rb, line 938
def self.build_gedcom_date_part_string( date )
  # Stringify a GEDCOM date part (class method)
  # Inputs:  date      -  date part (GEDDate)
  # Outputs: buffer    -  output string
  buffer = ""
  buffer += get_date_text( date )
  buffer
end
build_gedcom_date_string( date ) click to toggle source
# File lib/gedcom_ruby/date_parser.rb, line 894
def self.build_gedcom_date_string( date )
  # Stringify a GEDCOM date (class method)
  # Inputs:  date      -  date (GEDDateValue)
  # Outputs: buffer    -  output string
  buffer = ""

  case ( date.flags )
    when GCABOUT then       buffer += "abt "
    when GCCALCULATED then  buffer += "cal "
    when GCESTIMATED then   buffer += "est "
    when GCBEFORE then      buffer += "bef "
    when GCAFTER then       buffer += "aft "
    when GCBETWEEN then     buffer += "bet "
    when GCFROM then
    when GCFROMTO then      buffer += "from "
    when GCTO then          buffer += "to "
    when GCINTERPRETED then buffer += "int "

    when GCCHILD then       buffer += "child"; return
    when GCCLEARED then     buffer += "cleared"; return
    when GCCOMPLETED then   buffer += "completed"; return
    when GCINFANT then      buffer += "infant"; return
    when GCPRE1970 then     buffer += "pre-1970"; return
    when GCQUALIFIED then   buffer += "qualified"; return
    when GCSTILLBORN then   buffer += "stillborn"; return
    when GCSUBMITTED then   buffer += "submitted"; return
    when GCUNCLEARED then   buffer += "uncleared"; return
    when GCBIC then         buffer += "BIC"; return
    when GCDNS then         buffer += "DNS"; return
    when GCDNSCAN then      buffer += "DNSCAN"; return
    when GCDEAD then        buffer += "dead"; return
  end

  buffer += get_date_text( date.date1 ) if (date.date1)

  case ( date.flags )
    when GCBETWEEN then buffer += " and "
    when GCFROMTO then  buffer += " to "
  end

  buffer += get_date_text( date.date2 ) if (date.date2)
  buffer
end
get_date_text( date ) click to toggle source
# File lib/gedcom_ruby/date_parser.rb, line 516
def self.get_date_text( date )
  # Stringify the GEDCOM Date (class method)
  # Inputs:  date      -  Date Part  (GEDDate)
  # Outputs: buffer    -  Output string
  buffer = ""

  if ( (date.flags & (GFPHRASE | GFNONSTANDARD)) != 0)
    buffer += date.data
    return buffer
  end

  case ( date.type )
    when GCTHEBREW
      months = Hebrew_Months
    when GCTFRENCH
      months = French_Months
    else
      months = Default_Months
  end

  return buffer if not (date.data)

  if ( date.data.flags && (( date.data.flags & GFNODAY ) == 0) )
    buffer += date.data.day.to_s
    buffer += " " if ( (( date.data.flags & GFNOMONTH ) == 0) || (( date.data.flags & GFNOYEAR ) == 0) )
  end

  if ( date.data.flags && (( date.data.flags & GFNOMONTH ) == 0) )
    buffer += months[ date.data.month - 1 ]
    buffer += " " if( ( date.data.flags & GFNOYEAR ) == 0 )
  end

  if ( date.data.flags && (( date.data.flags & GFNOYEAR ) == 0) )
    buffer += date.data.year.to_s
    if ( ( date.data.flags & GFYEARSPAN ) != 0 )
      buffer += "-"
      buffer += date.data.year2.to_s
    end
  end

  buffer += " BC" if ( (date.type == GCTGREGORIAN) && (date.data.adbc != GEDADBCAD) )
  buffer
end
get_token( parser ) click to toggle source
# File lib/gedcom_ruby/date_parser.rb, line 429
def self.get_token( parser )
  # Get a single token from this parser state (class method)
  # Inputs:  parser    -  parser state  (GEDParserState)
  # Outputs: general   -  general token
  #          specific  -  specific token
  startPos = parser.pos

  # if we've got a token saved in the parser, return it
  if ( parser.lastGeneralToken != TKNONE )
    general = parser.lastGeneralToken
    specific = parser.lastSpecificToken
    parser.lastGeneralToken = TKNONE
    parser.lastSpecificToken = TKNONE
    return general, specific
  end

  #eat leading white-space
 parser.pos+=1 while ( parser.buffer[ parser.pos, 1 ]==" " )

  #if the buffer is empty, return TKEOF
  if ( parser.buffer[ parser.pos, 1 ] == nil || parser.buffer[parser.pos, 1] == "")
    specific = TKNONE
    general = TKEOF
    return general, specific
  end

  lexeme = ""
  # if it's a number, parse it out and return it
  if ( parser.buffer[ parser.pos, 1 ] =~ /[0-9]/ )
    while ( parser.buffer[ parser.pos, 1 ] =~ /[0-9]/)
      lexeme << parser.buffer[ parser.pos, 1 ]
      parser.pos+=1
    end
    specific = lexeme.to_i
    general = TKNUMBER
    return general, specific
  end

  currentToken = 0
  lexPos = 0
  # if it is not a number, incrementally look at each token in the table
  while ( TokenTable[ currentToken ].lexeme != 0 )
    lexeme << parser.buffer[ parser.pos, 1 ].upcase
    lexPos+=1
    parser.pos+=1

    if( lexeme[ lexPos-1, 1 ] != TokenTable[ currentToken ].lexeme[ lexPos-1, 1 ] )
      currentToken+=1 while( ( TokenTable[ currentToken ].lexeme != 0 ) &&
             ( (TokenTable[ currentToken ].lexeme[0, lexPos] <=> lexeme[0, lexPos] ) < 0 ) )

      #if the lexeme does not appear in the table, exit with an error
      break if ( TokenTable[ currentToken ].lexeme == 0 || \
          (TokenTable[ currentToken ].lexeme[0, lexPos] <=> lexeme[0, lexPos] ) != 0 )

    end

    #if the lexeme terminates, return the value of the current token
    if( ( ( lexeme[0,1] =~ /[a-zA-Z]/) && ( parser.buffer[ parser.pos, 1 ] !~ /[0-9a-zA-Z]/) ) ||
        ( ( lexeme[0,1] !~ /[a-zA-Z]/ ) && ( lexPos >= TokenTable[ currentToken ].lexeme.length ) ) )
      specific = TokenTable[ currentToken ].specific
      general = TokenTable[ currentToken ].general
      return general, specific
    end

    #if the current token terminates before the lexeme, then we have an error
    break if ( TokenTable[ currentToken ].lexeme[ lexPos, 1 ] == nil )

  end

  parser.pos = startPos

  specific = TKNONE
  general = TKERROR

  return general, specific
end
parse_date_part( parser, datePart, type ) click to toggle source
# File lib/gedcom_ruby/date_parser.rb, line 578
def self.parse_date_part( parser, datePart, type )
  # Parse out a date part (class method)
  # Inputs:  parser    -  parser state
  #          datePart  -  date part (GEDDate)
  #          type      -  calendar type
  # Outputs: None  (updated date part)
  state = ST_DT_START
  flags = GEDFNONE

  # Initialize the datePart, in case it contains old data
  datePart.type = type
  datePart.flags = GFNONE
  if (type == GCTGREGORIAN)
    datePart.data = GEDDateGreg.new(flags, 0, 0, 0, 0, GEDADBCAD)
  else
    datePart.data = GEDDateGeneral.new(flags, 0, 0, 0)
  end
  number = 0

  while ( ( state != ST_DT_END ) && ( state != ST_DT_ERROR ) )
    general, specific = get_token( parser )
    raise DateParseException, "error parsing datepart, pre-transition" if (general == TKERROR)
    transitionFound = 0

    case ( general )
      when TKNUMBER
      when TKMONTH
      when TKSLASH
      when TKBC
      when TKEOF
      when TKERROR
      when TKTO, TKAND
        put_token( parser, general, specific)
      else
        put_token( parser, general, specific )
        general = TKEOF
        specific = TKNONE
        break
    end

    DateStateTable.each do |dateState|
      break if dateState.state < 1

      if( ( dateState.state == state ) && ( dateState.input == general ) )
        state = dateState.nextState
        transitionFound = 1

        case dateState.action
          # 0: store number, set NUMBER
          when 0
            number = specific
            flags |= GEDFNUMBER

          # 1: if MONTH, then error, else set number to be day, set month, set MONTH
          when 1
            if ( type == GCTFRENCH )
              # if the token is "JOUR", make sure they also typed at least
              # part of "COMPLIMENTAIRES"

              case specific
                when TKJOUR
                  general, specific = get_token( parser )
                  raise DateParseException, "error parsing datepart, post-JOUR (french calendar)" if (general == TKERROR)
                  if ( general != TKMONTH && specific != TKCOMP )
                    state = ST_DT_ERROR
                    put_token( parser, general, specific )
                  end #fall through

                when TKCOMP
                  specific = TKJOUR_COMP
              end
            elsif ( type == GCTHEBREW )
              # if the token is "ADAR", see if it is followed by "SHENI",
              # and if it is, change the month to "ADAR SHENI"

              if( specific == TKADAR )
                general, specific = get_token( parser )
                raise DateParseException, "error parsing datepart, post-ADAR" if (general == TKERROR)
                if( general == TKMONTH && specific == TKSHENI )
                  specific = TKADAR_SHENI
                else
                  put_token( parser, general, specific )
                end
              end
            end

            if ( ( flags & GEDFMONTH ) != 0 )
              state = ST_DT_ERROR
            else
              month = validate_month_for_type( specific, type )
              if ( month < 1 )
                state = ST_DT_ERROR
              else
                datePart.data.day = number
                datePart.data.month = month
              end
              flags |= GEDFMONTH
              number = 0
            end

          # 2: if SLASH, then error, else set SLASH, set number to be year
          when 2
            if ( ( ( flags & GEDFSLASH ) != 0 ) || ( type != GCTGREGORIAN ) )
              state = ST_DT_ERROR
            else
              datePart.data.year = number if ( number > 0 )

              datePart.data.flags |= GFYEARSPAN
              number = 0
              flags |= GEDFSLASH
            end

          # 3: if not SLASH set number to be year, set bc
          # 4: if not SLASH set number to be year, terminate
          # 6: terminate
          when 3, 4, 6
            if (dateState.action == 3)
              if( type != GCTGREGORIAN )
                state = ST_DT_ERROR
                break
              end
              datePart.data.adbc = GEDADBCBC
            end

            if (dateState.action == 3 || dateState.action == 4)
              if( ( number > 0 ) && ( ( flags & GEDFSLASH ) == 0 ) )
                datePart.data.year = number
                number = 0
              end
            end


            datePart.data.flags |= GFNODAY if( datePart.data.day < 1 )

            datePart.data.flags |= GFNOMONTH if( datePart.data.month < 1 )

            datePart.data.flags |= GFNOYEAR if( datePart.data.year < 1 )


          # 5: if NUMBER, set number to be day.  set number to be year, store number, set NUMBER
          when 5
            datePart.data.day = number if( ( number > 0 ) && ( ( flags & GEDFNUMBER ) != 0 ) )

            datePart.data.year = specific

            number = 0
            flags |= GEDFNUMBER

          # 7: set number to be year2  (Gregorian Calendar)
          when 7
            datePart.data.year2 = ( specific % 100 )
            number = 0
        end

        break
      end
    end

    state = ST_DT_ERROR if( transitionFound == 0 )
  end

  raise DateParseException, "error parsing datepart, general" if( state == ST_DT_ERROR )

end
parse_gedcom_date( dateString, date, type = GCTDEFAULT ) click to toggle source
# File lib/gedcom_ruby/date_parser.rb, line 744
def self.parse_gedcom_date( dateString, date, type = GCTDEFAULT )
  # Parse out a GEDCOM date (class method)
  # Inputs:  dateString    - String containing GEDCOM date
  #          date          -  date  (GEDDateValue)
  #          type          -  calendar type
  # Outputs: None  (updated date)

  parser = GEDParserState.new( "", 0, 0, 0 )
  parser.buffer = dateString

  # New date 1 if it's nil
  date.date1 = GEDDate.new( type, GFNONE, nil ) if not date.date1
  datePart = date.date1

  state = ST_DV_START
  flags = GEDFNONE
  datesRead = 0

  while ( ( state != ST_DV_END ) && ( state != ST_DV_ERROR ) )
    savePos = parser.pos
    general, specific = get_token( parser )
    raise DateParseException, "error parsing date" if (general == TKERROR)
    transitionFound = 0

    DateValueStateTable.each do |dateValueState|
      break if dateValueState.state < 1

      if( ( dateValueState.state == state ) && ( dateValueState.input == general ) )

        transitionFound = 1
        state = dateValueState.nextState

        case ( dateValueState.action )
          # 0: inc dates read, parse a date
          when 0
            put_token( parser, general, specific )
            begin
              if (datesRead != 0)
                # New date 2 if it's nil
                date.date2 = GEDDate.new( type, GFNONE, nil ) if not date.date2
                datePart = date.date2
              end
              parse_date_part( parser, datePart, type )
              datesRead+=1
            rescue
              state = ST_DV_ERROR
            end

          # 1: set the approx type
          when 1
            date.flags = case specific
              when TKABOUT      then GCABOUT
              when TKCALCULATED then GCCALCULATED
              when TKESTIMATED  then GCESTIMATED
            end

          # 2: set the range type
          when 2
            date.flags = case specific
            when TKBEFORE then GCBEFORE
            when TKAFTER  then GCAFTER
            when TKBETWEEN
              flags |= GEDFBETWEEN
              GCBETWEEN
            end

          # 3: set the period type
          when 3
            if general == TKTO
              date.flags = GCTO
            elsif specific == TKFROM
              date.flags = GCFROM
              flags |= GEDFFROM
            end

          # 4: set interpreted
          when 4
            date.flags = GCINTERPRETED
            flags |= GEDFINTERP

          # 5: get remaining buffer as phrase
          # 7: if 'interpreted', get remaining buffer as phrase
          when 5, 7
            # This is kind of a sucky way to handle this, but the shared functionality
            # between action 5 and 7 doesn't seem like enough to warrant breaking out
            # into it's own method.
            if dateValueState.action == 7 && ( flags & GEDFINTERP ) == 0
              state = ST_DV_ERROR
              break
            end

            # Strip off trailing whitespace and closing parenthesis
            buffer = parser.buffer.slice( parser.pos, parser.buffer.length ).rstrip.split( ')' )[0]
            datePart.data = buffer
            datePart.flags = GFPHRASE
            parser.pos = parser.buffer.length

          # 6: if 'between' and not second date read, error, else terminate
          when 6
            state = ST_DV_ERROR if( ( ( flags & GEDFBETWEEN ) != 0 ) && datesRead < 2 )

          # else -- nextState is ST_DV_END, so we're done!

          # 7: see above 5

          # 8: if 'between', prepare to read next date
          when 8
            state = ST_DV_ERROR if( ( flags & GEDFBETWEEN ) == 0 )

          # 9: if 'from', set FROMTO, prepare to read next date
          when 9
            if( ( flags & GEDFFROM ) == 0 )
              state = ST_DV_ERROR
            else
              date.flags = GCFROMTO
            end

          # 10: set status
          when 10
            date.flags = case specific
            when TKCHILD     then GCCHILD
            when TKCLEARED   then GCCLEARED
            when TKCOMPLETED then GCCOMPLETED
            when TKINFANT    then GCINFANT
            when TKPRE1970   then GCPRE1970
            when TKQUALIFIED then GCQUALIFIED
            when TKSTILLBORN then GCSTILLBORN
            when TKSUBMITTED then GCSUBMITTED
            when TKUNCLEARED then GCUNCLEARED
            when TKBIC       then GCBIC
            when TKDNS       then GCDNS
            when TKDNSCAN    then GCDNSCAN
            when TKDEAD      then GCDEAD
            end
        end
        break  # ... Out of the DateValueStateTable.each block
      end
    end

    state = ST_DV_ERROR if( transitionFound == 0 )
  end

  if( state == ST_DV_ERROR )
    parser.pos = savePos
    datePart.flags = GFNONSTANDARD
    datePart.data = parser.buffer.slice( parser.pos, parser.buffer.length )
    raise DateParseException, "error parsing date, general"
  end
end
put_token( parser, general, specific ) click to toggle source
# File lib/gedcom_ruby/date_parser.rb, line 506
def self.put_token( parser, general, specific )
  # Update the parser state (class method)
  # Inputs:  parser    -  parser state  (GEDParserState)
  #          general   -  general token
  #          specific  -  specific token
  # Outputs: None
  parser.lastGeneralToken = general
  parser.lastSpecificToken = specific
end
validate_month_for_type( month, calType ) click to toggle source
# File lib/gedcom_ruby/date_parser.rb, line 560
def self.validate_month_for_type( month, calType )
  # Make sure this is a valid month for this calendar type (class method)
  # Inputs:  parser    -  parser state
  # Outputs: general   -  general token
  #          specific  -  specific token
  case calType
    when GCTGREGORIAN || GCTJULIAN
      return ( month - TKJANUARY + 1 ) if( month >= TKJANUARY && month <= TKDECEMBER )

    when GCTHEBREW
      return ( month - TKTISHRI + 1 ) if( month >= TKTISHRI && month <= TKELUL )

    when GCTFRENCH
      return ( month - TKVENDEMIAIRE + 1 )if( month >= TKVENDEMIAIRE && month <= TKJOUR_COMP )
  end
  return -1
end