module NlpPure::Segmenting::DefaultWord

Constants

DEFAULT_OPTIONS

Public Instance Methods

clean_input(text = nil) click to toggle source
# File lib/nlp_pure/segmenting/default_word.rb, line 27
def clean_input(text = nil)
  input = text.to_s
  # perform replacements to work around the limitations of the splitting regexp
  options.fetch(:gsub, []).each do |gsub_pair|
    input.gsub!(gsub_pair[0], gsub_pair[1])
  end
  # NOTE: leading whitespace is problematic; ref #12
  input.strip
end
options() click to toggle source

NOTE: exposed as a method for easy mock/stub

# File lib/nlp_pure/segmenting/default_word.rb, line 38
def options
  DEFAULT_OPTIONS
end
parse(*args) click to toggle source
# File lib/nlp_pure/segmenting/default_word.rb, line 22
def parse(*args)
  return nil if args.nil? || args.empty?
  clean_input(args[0]).split(options.fetch(:split, nil))
end