class FreelingClient::Analyzer
Public Class Methods
new(opt={})
click to toggle source
# File lib/freeling_client/analyzer.rb, line 13 def initialize(opt={}) @config = opt.fetch(:config, 'config/freeling/analyzer.cfg') @timeout = opt.fetch(:timeout, 60) # Three hours end
Public Instance Methods
build_regexp(ne_text)
click to toggle source
# File lib/freeling_client/analyzer.rb, line 109 def build_regexp(ne_text) begin if ne_text =~ /\_/ /#{ne_text.split('_').join('\W+')}/i else /#{ne_text}/i end rescue RegexpError => e /./ end end
call(cmd, text)
click to toggle source
# File lib/freeling_client/analyzer.rb, line 71 def call(cmd, text) valide_command!(cmd) output = [] file = Tempfile.new('foo', encoding: 'utf-8') begin file.write(text) file.close stdin, stdout, stderr = Open3.popen3(command(cmd, file.path)) Timeout::timeout(@timeout) { until (line = stdout.gets).nil? output << line.chomp end message = stderr.readlines unless message.empty? raise ExtractionError, message.join("\n") end } rescue Timeout::Error raise ExtractionError, "Timeout" ensure file.close file.unlink end output end
command(cmd, file_path)
click to toggle source
# File lib/freeling_client/analyzer.rb, line 121 def command(cmd, file_path) self.send("command_#{cmd}", file_path) end
command_morfo(file_path)
click to toggle source
# File lib/freeling_client/analyzer.rb, line 125 def command_morfo(file_path) "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf morfo < #{file_path}" end
command_tagged(file_path)
click to toggle source
# File lib/freeling_client/analyzer.rb, line 129 def command_tagged(file_path) "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf tagged < #{file_path}" end
command_tagged_nec(file_path)
click to toggle source
# File lib/freeling_client/analyzer.rb, line 133 def command_tagged_nec(file_path) "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf tagged --nec --noflush < #{file_path}" end
command_tagged_sense(file_path)
click to toggle source
# File lib/freeling_client/analyzer.rb, line 137 def command_tagged_sense(file_path) "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf sense --sense all < #{file_path}" end
freeling_bin()
click to toggle source
# File lib/freeling_client/analyzer.rb, line 145 def freeling_bin "/usr/local/bin/analyzer" end
parse_token_line(str)
click to toggle source
# File lib/freeling_client/analyzer.rb, line 99 def parse_token_line(str) form, lemma, tag, prob = str.split(' ')[0..3] FreelingClient::Token.new({ :form => form, :lemma => lemma, :tag => tag, :prob => prob.nil? ? nil : prob.to_f, }.reject { |k, v| v.nil? }) end
ptokens(cmd, text)
click to toggle source
Generate ptokens for a given text ptokens: Tokens with position
Example:
>> analyzer = FreelingClient::Analyzer.new >> analyzer.ptoken(:morfo, "Este texto está en español.")
Arguments:
cmd: (Symbol) text: (String)
# File lib/freeling_client/analyzer.rb, line 50 def ptokens(cmd, text) Enumerator.new do |yielder| pos = 0 tokens(cmd, text).each do |token| ne_text = token['form'].dup ne_regexp = build_regexp(ne_text) token_pos = text.index(ne_regexp, pos) if token_pos && token_pos < (pos + 5) token.pos = token_pos yielder << token pos = token_pos + ne_text.length else pos = pos + ne_text.length end end end end
tokens(cmd, text)
click to toggle source
Generate tokens for a given text
Example:
>> analyzer = FreelingClient::Analyzer.new >> analyzer.token(:morfo, "Este texto está en español.")
Arguments:
cmd: (Symbol) text: (String)
# File lib/freeling_client/analyzer.rb, line 29 def tokens(cmd, text) valide_command!(cmd) Enumerator.new do |yielder| call(cmd, text).each do |freeling_line| yielder << parse_token_line(freeling_line) unless freeling_line.empty? end end end
valide_command!(cmd)
click to toggle source
# File lib/freeling_client/analyzer.rb, line 149 def valide_command!(cmd) unless [:morfo, :tagged, :tagged_nec, :tagged_sense].include?(cmd) raise CommandError, "#{cmd} does not exist" end end