class Abstractor::Parser
Attributes
abstractor_text[RW]
sentences[RW]
Public Class Methods
new(abstractor_text, options = {})
click to toggle source
# File lib/abstractor/parser.rb, line 6 def initialize(abstractor_text, options = {}) options = { new_line_is_sentence_break: true }.merge(options) @abstractor_text = abstractor_text if options[:new_line_is_sentence_break] StanfordCoreNLP.custom_properties['ssplit.newlineIsSentenceBreak'] = 'always' else StanfordCoreNLP.custom_properties['ssplit.newlineIsSentenceBreak'] = 'two' end pipeline = StanfordCoreNLP.load(:tokenize, :ssplit) t = StanfordCoreNLP::Annotation.new(@abstractor_text) pipeline.annotate(t) if @abstractor_text @sentences = t.get(:sentences).to_a.map do |s| { :range => s.get(:character_offset_begin).to_s.to_i..s.get(:character_offset_end).to_s.to_i, :begin_position => s.get(:character_offset_begin).to_s.to_i, :end_position => s.get(:character_offset_end).to_s.to_i, :sentence => @abstractor_text[s.get(:character_offset_begin).to_s.to_i..s.get(:character_offset_end).to_s.to_i].downcase } end end end
Public Instance Methods
find_sentence(range)
click to toggle source
# File lib/abstractor/parser.rb, line 83 def find_sentence(range) @sentences.detect { |sentence| sentence[:range].include?(range) } end
match(token)
click to toggle source
# File lib/abstractor/parser.rb, line 63 def match(token) regular_expression = prepare_token(token) prepare_abstractor_text.match(regular_expression) unless regular_expression.nil? end
match_position(match)
click to toggle source
# File lib/abstractor/parser.rb, line 74 def match_position(match) match.pre_match.size end
match_sentence(sentence, token)
click to toggle source
# File lib/abstractor/parser.rb, line 78 def match_sentence(sentence, token) regular_expression = prepare_token(token) sentence.match(prepare_token(token)) unless regular_expression.nil? end
range_all(token, options = {})
click to toggle source
# File lib/abstractor/parser.rb, line 68 def range_all(token, options = {}) options[:word_boundary] = true if options[:word_boundary].nil? regular_expression = prepare_token(token, options) prepare_abstractor_text.range_all(regular_expression) unless regular_expression.nil? end
scan(token, options = {})
click to toggle source
# File lib/abstractor/parser.rb, line 31 def scan(token, options = {}) options[:word_boundary] = true if options[:word_boundary].nil? regular_expression = prepare_token(token, options) at = prepare_abstractor_text if (regular_expression.nil? || at.nil?) [] else at.scan(regular_expression) end end
sentence_match_scan(sentence, token, options = {})
click to toggle source
# File lib/abstractor/parser.rb, line 52 def sentence_match_scan(sentence, token, options = {}) options[:word_boundary] = true if options[:word_boundary].nil? regular_expression = prepare_token(token, options) if (regular_expression.nil? || sentence.nil?) [] else # http://stackoverflow.com/questions/6804557/how-do-i-get-the-match-data-for-all-occurrences-of-a-ruby-regular-expression-in sentence.to_enum(:scan,regular_expression).map{ Regexp.last_match } end end
sentence_scan(sentence, token, options = {})
click to toggle source
# File lib/abstractor/parser.rb, line 42 def sentence_scan(sentence, token, options = {}) options[:word_boundary] = true if options[:word_boundary].nil? regular_expression = prepare_token(token, options) if (regular_expression.nil? || sentence.nil?) [] else sentence.scan(regular_expression) end end
Private Instance Methods
prepare_abstractor_text()
click to toggle source
# File lib/abstractor/parser.rb, line 88 def prepare_abstractor_text @abstractor_text.downcase unless @abstractor_text.nil? end
prepare_token(token, options = {})
click to toggle source
# File lib/abstractor/parser.rb, line 92 def prepare_token(token, options = {}) options[:word_boundary] = true if options[:word_boundary].nil? begin if options[:word_boundary] Regexp.new('\b' + token.downcase + '\b') else Regexp.new(token.downcase) end rescue nil end end