class Parser
Public Class Methods
new(rae_data)
click to toggle source
# File lib/nebrija/parser.rb, line 4 def initialize(rae_data) @doc = Nokogiri::HTML(rae_data) end
Public Instance Methods
parse()
click to toggle source
# File lib/nebrija/parser.rb, line 8 def parse if valid? { status: 'success', type: single? ? 'single' : 'multiple', response: parse_single } else { status: 'error', message: 'Word/id does not exist. Sorry.' } end end
Private Instance Methods
clean!(response)
click to toggle source
# File lib/nebrija/parser.rb, line 54 def clean!(response) parsed_meanings = [] state = :EXPR temp = nil response[:other_meanings].each do |type, text| state = :EXPR if type == :expression if state == :EXPR parsed_meanings << temp unless temp.nil? temp = { expression: text, meanings: [] } state = :MEAN elsif state == :MEAN temp[:meanings] << metadata(text) end end response[:other_meanings] = parsed_meanings response end
delete_pending?()
click to toggle source
# File lib/nebrija/parser.rb, line 95 def delete_pending? # TODO(Check how does it work in the new api) raise NotImplementedError end
metadata(text)
click to toggle source
# File lib/nebrija/parser.rb, line 85 def metadata(text) # TODO # The idea would be to split the text in metadata # and real text. It's seems quite tricky. { meaning: text, meta: nil } end
parse_single()
click to toggle source
# File lib/nebrija/parser.rb, line 25 def parse_single response = { core_meanings: [], other_meanings: [] } response[:word] = @doc.css('header') .inner_text.sub('.', '') .capitalize! @doc.css('body > div > article > p').each_with_index do |entry, index| if index.zero? # Parsing etymology response[:etymology] = entry.inner_text elsif entry['class'] =~ /j[0-9]*/ # Parsing first meaning response[:core_meanings] << metadata(entry.inner_text) elsif entry['class'] == 'm' || entry['class'] =~ /k[0-9]*/ # Parsing other meanings # k: expression with 1 element # m: is the meaning with >= elements type = (:meaning if entry['class'] == 'm') || :expression response[:other_meanings] << [type, entry.inner_text] end end clean! response end
single?()
click to toggle source
# File lib/nebrija/parser.rb, line 77 def single? @doc.css('article').length == 1 end
valid?()
click to toggle source
# File lib/nebrija/parser.rb, line 81 def valid? !@doc.css('article').length.zero? # delete_pending? end