module Linguakit
Constants
- DEFAULT_COMMAND
- DEFAULT_COMMAND_STR
Public Class Methods
get_phrases(item)
click to toggle source
# File lib/linguakit_ruby.rb, line 90 def get_phrases item case item_config(item)[:type] when :str items_to_array keyword(item[:data]) when :arr item_config(item)[:data] end end
get_score(principal_items, secondary_items, **args)
click to toggle source
# File lib/linguakit_ruby.rb, line 99 def get_score(principal_items, secondary_items, **args) _options = { score: args[:score] || 0.8 } principal_phrases = get_phrases principal_items secondary_phrases = get_phrases secondary_items final_score = secondary_phrases.map{ |phrase| match = FuzzyMatch.new(principal_phrases).find(phrase, {find_with_score: true}) match[1] if match && match[1] >= _options[:score] }.reject(&:nil?).sum (final_score * 100) / principal_phrases.length end
item_config(item)
click to toggle source
# File lib/linguakit_ruby.rb, line 83 def item_config item { data: item[:data] || "", type: item[:type] || :str } end
items_to_array(items)
click to toggle source
# File lib/linguakit_ruby.rb, line 72 def items_to_array items items.map{|item| item[:phrase]} end
keyphrases(input, **args)
click to toggle source
# File lib/linguakit_ruby.rb, line 29 def keyphrases input, **args # -s = input is a string and not a file # -chi = chi-square co-occurrence measure # -log = loglikelihood # -scp = symmetrical conditional probability # -mi = mutual information # -cooc = co-occurrence counting config = { module: 'mwe', input: str_to_file(input), lang: args[:lang] || 'es', options: args[:opts] || '-chi' } result = Open3.capture3 DEFAULT_COMMAND % config items = result[0].split("\n") items.map{|item| object = item.split("\t") { phrase: object[0], rank: object[1].to_f, composition: object[2] } } end
keyword(input, **args)
click to toggle source
# File lib/linguakit_ruby.rb, line 54 def keyword input, **args config = { module: 'key', input: str_to_file(input), lang: args[:lang] || 'es' } result = Open3.capture3 DEFAULT_COMMAND % config items = result[0].split("\n") items.map{|item| object = item.split("\t") { phrase: object[0], rank: object[1].to_f, composition: object[2] } } end
sentiment(input, **args)
click to toggle source
# File lib/linguakit_ruby.rb, line 13 def sentiment input, **args # -s = input is a string and not a file config = { module: 'sent', input: input, lang: args[:lang] || 'es', options: args[:opts] } command = args[:opts] == '-s' ? DEFAULT_COMMAND_STR : DEFAULT_COMMAND result = Open3.capture3 command % config { emotion: result[0].split("\t")[1], point: result[0].split("\t")[2].split("\n")[0].to_f } end
str_to_file(str)
click to toggle source
# File lib/linguakit_ruby.rb, line 76 def str_to_file str file = Tempfile.new(['data', '.txt'], "#{ Dir.pwd}/tmp", encoding: 'utf-8') file.write str file.close file.path end