class HocrReader::Reader

class reader

Attributes

parts[RW]

Public Class Methods

new(str) click to toggle source
# File lib/hocr_reader/reader.rb, line 13
def initialize(str)
  @string = str
  @html = Nokogiri::HTML(@string)
end

Public Instance Methods

convert_to_string() click to toggle source

rubocop:enable Metrics/MethodLength, Metrics/AbcSize

# File lib/hocr_reader/reader.rb, line 52
def convert_to_string
  s = ''
  @parts.each { |part| s += part.text + ' ' }
  s
end
extract_parts(part_name) click to toggle source

rubocop:disable Metrics/MethodLength, Metrics/AbcSize

# File lib/hocr_reader/reader.rb, line 34
def extract_parts(part_name)
  @parts = []
  tag = TAGS[part_name]
  tag_pair = tag + ', ' + tag
  # example tags 'span.ocrx_word, span.ocrx_word'
  @html.css(tag_pair)
       .reject { |part| part.text.strip.empty? }
       .each do |part|
    title_attributes = part.attributes['title'].value.to_s
                           .split(';')
    language_attribute = part.attributes['lang'].value.to_s if part.attributes['lang']
    this_part = Part.new(part_name, part, title_attributes, language_attribute)
    @parts.push this_part
  end
  @parts
end
method_missing(name, *args, &block) click to toggle source
Calls superclass method
# File lib/hocr_reader/reader.rb, line 18
def method_missing(name, *args, &block)
  if TAGS[name]
    extract_parts name
  else
    super
  end
end
respond_to_missing?(name, *) click to toggle source
Calls superclass method
# File lib/hocr_reader/reader.rb, line 26
def respond_to_missing?(name, *)
  if TAGS[name]
  else
    super
  end
end