class Boilerpipe::Extractors::LargestContentExtractor

Public Class Methods

process(doc) click to toggle source
# File lib/boilerpipe/extractors/largest_content_extractor.rb, line 9
def self.process(doc)
  filters = ::Boilerpipe::Filters
  filters::NumWordsRulesClassifier.process doc
  filters::BlockProximityFusion::MAX_DISTANCE_1.process doc
  filters::KeepLargestBlockFilter::INSTANCE.process doc

  doc
end
text(contents) click to toggle source
# File lib/boilerpipe/extractors/largest_content_extractor.rb, line 3
def self.text(contents)
  doc = ::Boilerpipe::SAX::BoilerpipeHTMLParser.parse(contents)
  ::Boilerpipe::Extractors::LargestContentExtractor.process doc
  doc.content
end