class Boilerpipe::Filters::BoilerplateBlockFilter
Constants
- INSTANCE_KEEP_TITLE
Public Class Methods
new(label)
click to toggle source
# File lib/boilerpipe/filters/boilerplate_block_filter.rb, line 5 def initialize(label) @label_to_keep = label end
Public Instance Methods
process(doc)
click to toggle source
# File lib/boilerpipe/filters/boilerplate_block_filter.rb, line 10 def process(doc) combined = doc.text_blocks.delete_if do |tb| if tb.is_not_content? && (@label_to_keep.nil? || !tb.has_label?(:TITLE)) true else false end end doc.replace_text_blocks!(combined) doc end