class Boilerpipe::Extractors::KeepEverythingWithKMinWordsExtractor

Public Class Methods

process(min, doc) click to toggle source
# File lib/boilerpipe/extractors/keep_everything_with_k_min_words_extractor.rb, line 13
def self.process(min, doc)
  ::Boilerpipe::Filters::SimpleBlockFusionProcessor.process doc
  ::Boilerpipe::Filters::MarkEverythingContentFilter.process doc
  ::Boilerpipe::Filters::MinWordsFilter.process min, doc
  doc
end
text(min, contents) click to toggle source
# File lib/boilerpipe/extractors/keep_everything_with_k_min_words_extractor.rb, line 7
def self.text(min, contents)
  doc = ::Boilerpipe::SAX::BoilerpipeHTMLParser.parse(contents)
  ::Boilerpipe::Extractors::KeepEverythingWithKMinWordsExtractor.process min, doc
  doc.content
end