class Boilerpipe::Filters::SimpleBlockFusionProcessor
Public Class Methods
process(doc)
click to toggle source
# File lib/boilerpipe/filters/simple_block_fusion_processor.rb, line 5 def self.process(doc) tbs = doc.text_blocks return doc if tbs.size < 2 blocks_to_remove = [] tb1 = tbs.first tbs.drop(1).each do |tb| if tb1.text_density == tb.text_density tb1.merge_next(tb) blocks_to_remove << tb else tb1 = tb end end doc.replace_text_blocks!(tbs - blocks_to_remove) doc end