class Telegraph::Parser::Parser

Attributes

parsed_data[R]

Public Class Methods

new(article_id, image_prefix) click to toggle source
# File lib/telegraph/parser/parser.rb, line 8
def initialize(article_id, image_prefix)
  @fetcher = Fetcher.new
  @article_id = article_id
  @image_prefix = image_prefix
end

Public Instance Methods

fetch_and_parse!() click to toggle source
# File lib/telegraph/parser/parser.rb, line 14
def fetch_and_parse!
  load_page!
  load_images!
  compile_parsed_data!
end

Private Instance Methods

compile_parsed_data!() click to toggle source
# File lib/telegraph/parser/parser.rb, line 26
def compile_parsed_data!
  @parsed_data = {
    article_id: @article_id,
    title:      tag_content(:h1),
    author:     tag_content(:address),
    content:    without_tags,
    images:     @images
  }
end
load_images!() click to toggle source
# File lib/telegraph/parser/parser.rb, line 36
def load_images!
  @images = {}

  @page.search('.//img').each do |image|
    i = @fetcher.fetch_image(image.attributes['src'].value, @image_prefix)
    @images.merge!(i)
  end
end
load_page!() click to toggle source
# File lib/telegraph/parser/parser.rb, line 22
def load_page!
  @page = Nokogiri::HTML(@fetcher.fetch(@article_id)).css('article')
end
tag_content(tag) click to toggle source
# File lib/telegraph/parser/parser.rb, line 45
def tag_content(tag)
  @page.search(".//#{tag}").text
end
without_tags() click to toggle source
# File lib/telegraph/parser/parser.rb, line 49
def without_tags
  page_copy = @page.dup
  %i(h1 address).each { |tag| @page.search(".//#{tag}").remove }

  page_copy.search('.//img').each do |image|
    image.attributes['src'].value =
      @fetcher.image_id(image.attributes['src'].value, @image_prefix)
  end

  page_copy.remove_attr('id').remove_class.to_s
end