class Boilerpipe::Filters::IgnoreBlocksAfterContentFilter
Public Class Methods
process(doc, min_num_words = 60)
click to toggle source
# File lib/boilerpipe/filters/ignore_blocks_after_content_filter.rb, line 8 def self.process(doc, min_num_words = 60) found_end_of_text = false num_words = 0 doc.text_blocks.each do |tb| end_of_text = tb.has_label? :INDICATES_END_OF_TEXT num_words += num_full_text_words(tb) if tb.is_content? found_end_of_text = true if end_of_text && num_words >= min_num_words tb.content = false if found_end_of_text end doc end