module Crawlers::Helpers::Content
Public Instance Methods
extract_primary_content(html_text)
click to toggle source
# File lib/crawlers/helpers/content.rb, line 7 def extract_primary_content(html_text) content = Readability::Document.new(html_text).content sanitized_content = Sanitize.clean(content) remove_trailing_spaces(sanitized_content) end
Private Instance Methods
remove_trailing_spaces(text)
click to toggle source
# File lib/crawlers/helpers/content.rb, line 15 def remove_trailing_spaces(text) text.strip.gsub(/(?<=\n)\s+/, '') end