class TwitterKorean::Processor

Ruby interface to Scala TwitterKoreanProcessor

Attributes

java_convertor[R]
jvm_processor[R]

Public Class Methods

new(*jvmargs) click to toggle source
# File lib/twitter_korean/processor.rb, line 8
def initialize(*jvmargs)
  bridge = TwitterKorean::JvmBridge.new(jvmargs)
  @jvm_processor = bridge.scala_twitter_korean_processor
end

Public Instance Methods

extract_phrases(text, options = {}) click to toggle source
# File lib/twitter_korean/processor.rb, line 32
def extract_phrases(text, options = {})
  return unless text
  filter_spam = options[:filter_spam] || false
  including_hashtags = options[:including_hashtags] || true
  converto_to_korean_tokens do
    jvm_processor.extractPhrases(jvm_processor.tokenize(text), filter_spam, including_hashtags)
  end
end
normalize(text) click to toggle source
# File lib/twitter_korean/processor.rb, line 13
def normalize(text)
  return unless text
  jvm_processor.normalize(text).toString
end
stem(text) click to toggle source
# File lib/twitter_korean/processor.rb, line 25
def stem(text)
  return unless text
  converto_to_korean_tokens do
    jvm_processor.stem(jvm_processor.tokenize(text))
  end
end
tokenize(text) click to toggle source
# File lib/twitter_korean/processor.rb, line 18
def tokenize(text)
  return unless text
  converto_to_korean_tokens do
    jvm_processor.tokenize(text)
  end
end

Private Instance Methods

converto_to_korean_tokens(&block) click to toggle source
# File lib/twitter_korean/processor.rb, line 43
def converto_to_korean_tokens &block
  scala_list = block.call.toString
  token_strs = scala_list_to_array(scala_list)
  token_strs.map do |formed_token_str|
    TwitterKorean::KoreanToken.build_by_formed_str(formed_token_str.first)
  end
end
scala_list_to_array(result) click to toggle source
# File lib/twitter_korean/processor.rb, line 51
def scala_list_to_array(result)
  result.scan(/(?<=List\(|\,\s)(.*?\(\w+\:\s[0-9]+,\s[0-9]+\))/).to_a
end