class Newly::PageCrawler

Public Class Methods

new(host, document) click to toggle source
# File lib/newly/page_crawler.rb, line 3
def initialize(host, document)
  @host = host
  @document = document
end

Public Instance Methods

image(element) click to toggle source
# File lib/newly/page_crawler.rb, line 28
def image(element)
  image = find(element, 'src')
  if (image && image.include?("==/"))
    image = "http://#{image.split("==/").last}"
  end
  image = "#{@host}/#{image}".gsub('../', '') if image && image.include?('../')
  image
end
text(element) click to toggle source
# File lib/newly/page_crawler.rb, line 15
def text(element)
  if valid?(element)
    text = get(element).text
    text if valid?(text)
  end
end
titleize(element) click to toggle source
# File lib/newly/page_crawler.rb, line 8
def titleize(element)
  title = text(element)
  title[0] = title.capitalize[0] if title

  title
end

Private Instance Methods

find(element, type) click to toggle source
# File lib/newly/page_crawler.rb, line 46
def find(element, type)
  get(element).map { |doc| doc[type] }.first if valid?(element)
end
get(element) click to toggle source
# File lib/newly/page_crawler.rb, line 42
def get(element)
  @document.css(element)
end
valid?(str) click to toggle source
# File lib/newly/page_crawler.rb, line 38
def valid?(str)
  str && !str.empty?
end