class Boilerpipe::Filters::NumWordsRulesClassifier
Public Class Methods
process(doc)
click to toggle source
# File lib/boilerpipe/filters/num_words_rules_classifier.rb, line 8 def self.process(doc) empty = Boilerpipe::Document::TextBlock.empty_start text_blocks = [empty] + doc.text_blocks + [empty] text_blocks.each_cons(3) do |slice| prev, current, nxt = *slice current.content = classify(prev, current, nxt) end doc end
Private Class Methods
classify(prev, current, nxt)
click to toggle source
# File lib/boilerpipe/filters/num_words_rules_classifier.rb, line 22 def self.classify(prev, current, nxt) return false if current.link_density > 0.333333 if prev.link_density <= 0.555556 return true if current.num_words > 16 return true if nxt.num_words > 15 return true if prev.num_words > 4 else return true if current.num_words > 40 return true if nxt.num_words > 17 end false end