class Splitta::Doc

Constants

FRAG_SPLITTER
SEGMENT_THRESHOLD

Attributes

frags[R]

Public Class Methods

new(text, model:) click to toggle source
# File lib/splitta/doc.rb, line 23
def initialize(text, model:)
  @frags = []
  text.split(FRAG_SPLITTER).each_slice(2) do |frag_text|
    frag = Frag.new(frag_text.join, previous_frag: @frags.last)
    @frags << frag
  end
  model.classify(self)
end

Public Instance Methods

segments() click to toggle source

output all the text, split according to predictions

# File lib/splitta/doc.rb, line 35
def segments
  Enumerator.new do |y|
    io = StringIO.new
    frags.each do |frag|
      io << frag.orig
      if frag.over?(SEGMENT_THRESHOLD)
        y << io.string
        io.string = ''
      end
    end
    y << io.string unless io.string.empty?
  end
end