class BibTeX::Lexer

The BibTeX::Lexer handles the lexical analysis of BibTeX bibliographies.

Constants

MODE

Attributes

defaults[R]
patterns[R]
mode[RW]
options[R]
scanner[R]
stack[R]

Public Class Methods

new(options = {}) click to toggle source

Creates a new instance. Possible options and their respective default values are:

  • :include => [:errors] A list that may contain :meta_content, and :errors; depending on whether or not these are present, the respective tokens are included in the parse tree.

  • :strict => true In strict mode objects can start anywhere; therefore the `@' symbol is not possible except inside literals or @comment objects; for a more lenient lexer set to false and objects are expected to start after a new line (leading white space is permitted).

  • :strip => true When enabled, newlines will be stripped from quoted string values.

# File lib/bibtex/lexer.rb, line 91
def initialize(options = {})
  @options = Lexer.defaults.merge(options)
  reset
end

Public Instance Methods

active?(object) click to toggle source

Returns true if the lexer is currently parsing the given object type.

# File lib/bibtex/lexer.rb, line 127
def active?(object)
  @active_object == object
end
allow_missing_keys?() click to toggle source
# File lib/bibtex/lexer.rb, line 136
def allow_missing_keys?
  !!@options[:allow_missing_keys]
end
analyse(string = nil) click to toggle source

Start the lexical analysis.

# File lib/bibtex/lexer.rb, line 168
def analyse(string = nil)
  raise(ArgumentError, 'Lexer: failed to start analysis: no source given!') unless
    string || @scanner

  self.data = string || @scanner.string

  until @scanner.eos?
    send("parse_#{MODE[@mode]}")
  end

  push([false, '$end'])
end
bibtex_mode?() click to toggle source

Returns true if the lexer is currenty parsing a BibTeX object.

# File lib/bibtex/lexer.rb, line 118
def bibtex_mode?
  MODE[@mode] == :bibtex
end
data=(data) click to toggle source

Sets the source for the lexical analysis and resets the internal state.

# File lib/bibtex/lexer.rb, line 107
def data=(data)
  @scanner = StringScanner.new(data)
  reset
end
next_token() click to toggle source

Returns the next token from the parse stack.

# File lib/bibtex/lexer.rb, line 115
def next_token; @stack.shift; end
push(value) click to toggle source

Pushes a value onto the parse stack. Returns the Lexer.

# File lib/bibtex/lexer.rb, line 145
def push(value)
  case value[0]
  when :CONTENT, :STRING_LITERAL
    value[1].gsub!(/\n\s*/, ' ') if strip_line_breaks?

    if !@stack.empty? && value[0] == @stack[-1][0]
      @stack[-1][1] << value[1]
    else
      @stack.push(value)
    end
  when :ERROR
    @stack.push(value) if @include_errors
    leave_object
  when :META_CONTENT
    @stack.push(value) if @include_meta_content
  else
    @stack.push(value)
  end

  self
end
reset() click to toggle source
# File lib/bibtex/lexer.rb, line 96
def reset
  @stack, @brace_level, @mode, @active_object = [], 0, :meta, nil

  # cache options for speed
  @include_meta_content = @options[:include].include?(:meta_content)
  @include_errors = @options[:include].include?(:errors)

  self
end
strict?() click to toggle source

Returns true if the lexer is currently in strict mode.

# File lib/bibtex/lexer.rb, line 132
def strict?
  !!@options[:strict]
end
strip_line_breaks?() click to toggle source
# File lib/bibtex/lexer.rb, line 140
def strip_line_breaks?
  !!options[:strip] && !active?(:comment)
end
symbols() click to toggle source
# File lib/bibtex/lexer.rb, line 112
def symbols; @stack.map(&:first); end

Private Instance Methods

backtrace(error) click to toggle source
# File lib/bibtex/lexer.rb, line 349
def backtrace(error)
  bt = []
  bt.unshift(@stack.pop) until @stack.empty? || (!bt.empty? && [:AT,:META_CONTENT].include?(bt[0][0]))
  bt << error
  push [:ERROR,bt]
end
enter_object() click to toggle source

Called when the lexer encounters a new BibTeX object.

# File lib/bibtex/lexer.rb, line 285
def enter_object
  @brace_level = 0
  push [:AT,'@']

  case
  when @scanner.scan(Lexer.patterns[:string])
    @mode = @active_object = :string
    push [:STRING, @scanner.matched]
  when @scanner.scan(Lexer.patterns[:preamble])
    @mode = @active_object = :preamble
    push [:PREAMBLE, @scanner.matched]
  when @scanner.scan(Lexer.patterns[:comment])
    @mode = @active_object = :comment
    push [:COMMENT, @scanner.matched]
  when @scanner.scan(Lexer.patterns[:entry])
    @mode = @active_object = :entry
    push [:NAME, @scanner.matched]

    # TODO: DRY - try to parse key
    if @scanner.scan(Lexer.patterns[:lbrace])
      @brace_level += 1
      push([:LBRACE,'{'])
      @mode = :content if @brace_level > 1 || @brace_level == 1 && active?(:comment)

      if @scanner.scan(Lexer.patterns[allow_missing_keys? ? :optional_key : :key])
        push [:KEY, @scanner.matched.chop.strip]
      end
    end

  else
    error_unexpected_object
  end
end
error_unbalanced_braces() click to toggle source
# File lib/bibtex/lexer.rb, line 324
def error_unbalanced_braces
  BibTeX.log.warn("Lexer: unbalanced braces at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
  backtrace [:E_UNBALANCED, @scanner.matched]
end
error_unexpected_object() click to toggle source
# File lib/bibtex/lexer.rb, line 344
def error_unexpected_object
  BibTeX.log.warn("Lexer: unexpected object at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
  backtrace [:E_UNEXPECTED_OBJECT, '@']
end
error_unexpected_token() click to toggle source
# File lib/bibtex/lexer.rb, line 339
def error_unexpected_token
  BibTeX.log.warn("Lexer: unexpected token `#{@scanner.matched}' at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
  backtrace [:E_UNEXPECTED_TOKEN, @scanner.matched]
end
error_unterminated_content() click to toggle source
# File lib/bibtex/lexer.rb, line 334
def error_unterminated_content
  BibTeX.log.warn("Lexer: unterminated content at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
  backtrace [:E_UNTERMINATED_CONTENT, @scanner.matched]
end
error_unterminated_string() click to toggle source
# File lib/bibtex/lexer.rb, line 329
def error_unterminated_string
  BibTeX.log.warn("Lexer: unterminated string at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
  backtrace [:E_UNTERMINATED_STRING, @scanner.matched]
end
leave_object() click to toggle source

Called when parser leaves a BibTeX object.

# File lib/bibtex/lexer.rb, line 320
def leave_object
  @mode, @active_object, @brace_level = :meta, nil, 0
end
parse_bibtex() click to toggle source
# File lib/bibtex/lexer.rb, line 183
def parse_bibtex
  case
  when @scanner.scan(Lexer.patterns[:lbrace])
    @brace_level += 1
    push([:LBRACE,'{'])
    @mode = :content if @brace_level > 1 || @brace_level == 1 && active?(:comment)
  when @scanner.scan(Lexer.patterns[:rbrace])
    @brace_level -= 1
    push([:RBRACE,'}'])
    return leave_object if @brace_level == 0
    return error_unbalanced_braces if @brace_level < 0
  when @scanner.scan(Lexer.patterns[:eq])
    push([:EQ,'='])
  when @scanner.scan(Lexer.patterns[:comma])
    push([:COMMA,','])
  when @scanner.scan(Lexer.patterns[:number])
    push([:NUMBER,@scanner.matched])
  when @scanner.scan(Lexer.patterns[:name])
    push([:NAME,@scanner.matched.rstrip])
  when @scanner.scan(Lexer.patterns[:quote])
    @mode = :literal
  when @scanner.scan(Lexer.patterns[:sharp])
    push([:SHARP,'#'])
  when @scanner.scan(Lexer.patterns[:object])
    enter_object
  when @scanner.scan(Lexer.patterns[:space])
    # skip
  when @scanner.scan(Lexer.patterns[:period])
    error_unexpected_token
  end
end
parse_content() click to toggle source
# File lib/bibtex/lexer.rb, line 226
def parse_content
  match = @scanner.scan_until(Lexer.patterns[:braces])
  case @scanner.matched
  when '{'
    @brace_level += 1
    push([:CONTENT,match])
  when '}'
    @brace_level -= 1
    case
    when @brace_level == 0
      push([:CONTENT,match.chop])
      push([:RBRACE,'}'])
      leave_object
    when @brace_level == 1 && !active?(:comment)
      push([:CONTENT,match.chop])
      push([:RBRACE,'}'])
      @mode = :bibtex
    when @brace_level < 0
      push([:CONTENT,match.chop])
      error_unbalanced_braces
    else
      push([:CONTENT,match])
    end
  else
    push([:CONTENT,@scanner.rest])
    @scanner.terminate
    error_unterminated_content
  end
end
parse_literal() click to toggle source
# File lib/bibtex/lexer.rb, line 256
def parse_literal
  match = @scanner.scan_until(Lexer.patterns[:unquote])
  case @scanner.matched
  when '{'
    @brace_level += 1
    push([:STRING_LITERAL,match])
  when '}'
    @brace_level -= 1
    if @brace_level < 1
      push([:STRING_LITERAL,match.chop])
      error_unbalanced_braces
    else
      push([:STRING_LITERAL,match])
    end
  when '"'
    if @brace_level == 1
      push([:STRING_LITERAL,match.chop])
      @mode = :bibtex
    else
      push([:STRING_LITERAL,match])
    end
  else
    push([:STRING_LITERAL,@scanner.rest])
    @scanner.terminate
    error_unterminated_string
  end
end
parse_meta() click to toggle source
# File lib/bibtex/lexer.rb, line 215
def parse_meta
  match = @scanner.scan_until(Lexer.patterns[strict? ? :strict_next : :next])
  if @scanner.matched
    push([:META_CONTENT,match.chop])
    enter_object
  else
    push([:META_CONTENT,@scanner.rest])
    @scanner.terminate
  end
end