class Boilerpipe::Filters::BoilerplateBlockFilter

Constants

INSTANCE_KEEP_TITLE

Public Class Methods

new(label) click to toggle source
# File lib/boilerpipe/filters/boilerplate_block_filter.rb, line 5
def initialize(label)
  @label_to_keep = label
end

Public Instance Methods

process(doc) click to toggle source
# File lib/boilerpipe/filters/boilerplate_block_filter.rb, line 10
def process(doc)
  combined = doc.text_blocks.delete_if do |tb|
    if tb.is_not_content? &&
       (@label_to_keep.nil? || !tb.has_label?(:TITLE))
      true
    else
      false
    end
  end
  doc.replace_text_blocks!(combined)
  doc
end