class Boilerpipe::Filters::SimpleBlockFusionProcessor

Public Class Methods

process(doc) click to toggle source
# File lib/boilerpipe/filters/simple_block_fusion_processor.rb, line 5
def self.process(doc)
  tbs = doc.text_blocks
  return doc if tbs.size < 2

  blocks_to_remove = []
  tb1 = tbs.first
  tbs.drop(1).each do |tb|
    if tb1.text_density == tb.text_density
      tb1.merge_next(tb)
      blocks_to_remove << tb
    else
      tb1 = tb
    end
  end

  doc.replace_text_blocks!(tbs - blocks_to_remove)
  doc
end