class Parser

Public Class Methods

new(rae_data) click to toggle source
# File lib/nebrija/parser.rb, line 4
def initialize(rae_data)
  @doc = Nokogiri::HTML(rae_data)
end

Public Instance Methods

parse() click to toggle source
# File lib/nebrija/parser.rb, line 8
def parse
  if valid?
    {
      status: 'success',
      type: single? ? 'single' : 'multiple',
      response: parse_single
    }
  else
    {
      status: 'error',
      message: 'Word/id does not exist. Sorry.'
    }
  end
end

Private Instance Methods

clean!(response) click to toggle source
# File lib/nebrija/parser.rb, line 54
def clean!(response)
  parsed_meanings = []
  state = :EXPR
  temp = nil

  response[:other_meanings].each do |type, text|
    state = :EXPR if type == :expression
    if state == :EXPR
      parsed_meanings << temp unless temp.nil?
      temp = {
        expression: text,
        meanings: []
      }
      state = :MEAN
    elsif state == :MEAN
      temp[:meanings] << metadata(text)
    end
  end
  response[:other_meanings] = parsed_meanings

  response
end
delete_pending?() click to toggle source
# File lib/nebrija/parser.rb, line 95
def delete_pending?
  # TODO(Check how does it work in the new api)
  raise NotImplementedError
end
metadata(text) click to toggle source
# File lib/nebrija/parser.rb, line 85
def metadata(text)
  # TODO
  # The idea would be to split the text in metadata
  # and real text. It's seems quite tricky.
  {
    meaning: text,
    meta: nil
  }
end
parse_single() click to toggle source
# File lib/nebrija/parser.rb, line 25
def parse_single
  response = {
    core_meanings: [],
    other_meanings: []
  }

  response[:word] = @doc.css('header')
                        .inner_text.sub('.', '')
                        .capitalize!

  @doc.css('body > div > article > p').each_with_index do |entry, index|
    if index.zero?
      # Parsing etymology
      response[:etymology] = entry.inner_text
    elsif entry['class'] =~ /j[0-9]*/
      # Parsing first meaning
      response[:core_meanings] << metadata(entry.inner_text)
    elsif entry['class'] == 'm' || entry['class'] =~ /k[0-9]*/
      # Parsing other meanings
      #   k: expression with 1 element
      #   m: is the meaning with >= elements
      type = (:meaning if entry['class'] == 'm') || :expression
      response[:other_meanings] << [type, entry.inner_text]
    end
  end

  clean! response
end
single?() click to toggle source
# File lib/nebrija/parser.rb, line 77
def single?
  @doc.css('article').length == 1
end
valid?() click to toggle source
# File lib/nebrija/parser.rb, line 81
def valid?
  !@doc.css('article').length.zero? # delete_pending?
end