class Natural

Constants

CLEAR
DEFAULT_EXPANSIONS
DEFAULT_MATCHING
DEFAULT_SPELLINGS
DEFAULT_SYNONYMS
GREEN
MATCHING_OPTIONS
RED
YELLOW

Public Class Methods

new(text, options={}) click to toggle source
# File lib/natural.rb, line 28
def initialize(text, options={})
  @text = text.squeeze(' ').strip
  @options = options

  if options[:logger]
    @logger = options[:logger]
  else
    @logger = Logger.new(STDOUT)
    @logger.level = Logger::DEBUG
  end
  
  @parse        = parse
end

Public Instance Methods

answer() click to toggle source
# File lib/natural.rb, line 129
def answer
  result = @parse.children.map_by_data(@options[:context]).select{|a| !a.blank?}.flatten
  @parse.children.map_by_all_filters.select{|a| !a.blank?}.each {|f| result = eval("result.#{f}")}
  @parse.children.map_by_aggregator.select{|a| !a.blank?}.each {|a| result = eval("result.#{a}")}
  result
end
options=(options) click to toggle source
# File lib/natural.rb, line 47
def options=(options)
  @options = options
  parse
end
parse() click to toggle source
# File lib/natural.rb, line 52
def parse
  return @parse if @parse

  start_at = Time.now

  # search for all possible matches using all the different fragment classes
  matches_by_class = {}
  fragment_classes = @options[:fragment_classes] || ObjectSpace.each_object(Class)
  fragment_classes = fragment_classes.select {|a| a < Natural::Fragment && a != Natural::Unused}
  find_options = {
    :text => @text, 
    :matches => matches_by_class, 
    :matching => @options[:matching] || DEFAULT_MATCHING,
    :spellings => @options[:spellings] || DEFAULT_SPELLINGS, 
    :synonyms => @options[:synonyms] || DEFAULT_SYNONYMS, 
    :expansions => @options[:expansions] || DEFAULT_EXPANSIONS
  }

  if find_options[:matching] == :first_match
    # once a match has been found, exclude those words from further consideration
    # can help speed things up, but requires you order the candidate fragment_classes carefully
    fragment_classes.each do |klass|
      new_options = find_options.dup
      new_options[:ignore] = matches_by_class.values.flatten.select{|a| a}.map_by_ids.flatten.uniq.sort
      matches_by_class[klass] = klass.find(new_options)[klass] if klass.find(new_options)[klass]
    end
  else
    ObjectSpace.each_object(Class).select {|a| a < Natural::Alternative}.each do |klass| 
      matches_by_class = klass.find(find_options)
    end
    fragment_classes.each do |klass|
      matches_by_class = klass.find(find_options)
    end
  end

  matching_at = Time.now
  @logger.debug "[n][perf] matching took #{(matching_at - start_at).seconds.round(1)} seconds"

  # find all valid combinations, choose the one with the highest score
  sequences = []
  sequences = assemble_sequences(matches_by_class.values.flatten)
  sequences = sequences.uniq.sort {|a,b| b.map_by_score.sum <=> a.map_by_score.sum}
  fragments = sequences.first || []

  scoring_at = Time.now
  @logger.debug "[n][perf] scoring took #{(scoring_at - matching_at).seconds.round(1)} seconds"
  @logger.debug "[n]"

  # tag the leftover words as unused
  remaining_words = (0..@text.split(' ').size-1).to_a - (!fragments.blank? ? fragments.map_by_ids.flatten : [])
  remaining_words.each do |id|
    tag_match = Unused.new(:ids => [id], :text => @text.split(' ')[id])
    fragments << tag_match
  end

  # put the fragments we are using in order and assemble the final tree
  fragments = fragments.sort {|a,b| a.ids.first <=> b.ids.first}
  @parse = Fragment.new(:ids => (0..@text.split(' ').size-1).to_a, :text => @text)
  fragments.each {|a| @parse << a}
  
  sequences.each {|a| @logger.debug "[n][scor] #{a.map_by_score.sum.to_s.rjust(2, '0')} #{a.sort{|b,c| b.ids.first <=> c.ids.first}.join(' | ')}"}
  @logger.debug("[n]")
  @parse.pretty_to_s.each_line do |line|
    @logger.debug("[n][tree] #{line.gsub("\n", '')}")
  end
  @logger.debug("[n]")
  @logger.info("[n][orig] #{@text}" + (@options[:context] ? " (#{@options[:context]})" : ""))
  @logger.info("[n][used] #{interpretation}" + (@options[:context] ? " (#{@options[:context]})" : ""))

  @parse
end
parse!() click to toggle source
# File lib/natural.rb, line 124
def parse!
  @parse = nil
  parse
end
text=(text) click to toggle source
# File lib/natural.rb, line 42
def text=(text)
  @text = text
  parse
end

Private Instance Methods

assemble_sequences(left_to_try, sequence_so_far=[]) click to toggle source
# File lib/natural.rb, line 154
def assemble_sequences(left_to_try, sequence_so_far=[])
  sequences = []

  new_left_to_try = left_to_try.dup.select{|a| (a.ids & sequence_so_far.map_by_ids.flatten.uniq).blank?}
  new_left_to_try.each do |fragment|
    new_sequence_so_far = sequence_so_far.dup << fragment
    sequences << new_sequence_so_far.sort{|a,b| a.ids.first <=> b.ids.first}
    sequences += assemble_sequences(new_left_to_try, new_sequence_so_far)
  end

  return sequences
end
interpretation(crossout=true) click to toggle source
# File lib/natural.rb, line 138
def interpretation(crossout=true)
  result = ''
  @parse.children.each do |node|
    result += ' '
    # result += YELLOW if @automatic_words && !(@automatic_words & node.ids).blank?
    if !node.all_filters.blank? || node.data(@options[:context]) || node.aggregator
      result += node.to_s(:without_edits => true)
    elsif crossout == true
      result += node.to_s.gsub(/[a-zA-Z]/,'-')
    end
    # result += CLEAR if @automatic_words && !(@automatic_words & node.ids).blank?
  end

  result.strip
end