class Boilerpipe::Filters::SplitParagraphBlocksFilter
Public Class Methods
process(doc)
click to toggle source
# File lib/boilerpipe/filters/split_paragraph_blocks_filter.rb, line 10 def self.process(doc) tbs = doc.text_blocks new_blocks = [] changes = false tbs.each do |tb| paragraphs = tb.text.split(/[\n\r]+/) if paragraphs.size < 2 new_blocks << tb next end is_content = tb.is_content? labels = tb.labels paragraphs.each do |paragraph| tbP = ::Boilerpipe::Document::TextBlock.new(paragraph) tbP.content = is_content tbP.add_labels(labels) new_blocks << tbP changes = true end end doc.replace_text_blocks!(new_blocks) if changes doc end