module Crawlers::Helpers::Content

Public Instance Methods

extract_primary_content(html_text) click to toggle source
# File lib/crawlers/helpers/content.rb, line 7
def extract_primary_content(html_text)
  content = Readability::Document.new(html_text).content
  sanitized_content = Sanitize.clean(content)
  remove_trailing_spaces(sanitized_content)
end

Private Instance Methods

remove_trailing_spaces(text) click to toggle source
# File lib/crawlers/helpers/content.rb, line 15
def remove_trailing_spaces(text)
  text.strip.gsub(/(?<=\n)\s+/, '')
end