class TwitterKorean::Processor
Ruby interface to Scala TwitterKoreanProcessor
Attributes
java_convertor[R]
jvm_processor[R]
Public Class Methods
new(*jvmargs)
click to toggle source
# File lib/twitter_korean/processor.rb, line 8 def initialize(*jvmargs) bridge = TwitterKorean::JvmBridge.new(jvmargs) @jvm_processor = bridge.scala_twitter_korean_processor end
Public Instance Methods
extract_phrases(text, options = {})
click to toggle source
# File lib/twitter_korean/processor.rb, line 32 def extract_phrases(text, options = {}) return unless text filter_spam = options[:filter_spam] || false including_hashtags = options[:including_hashtags] || true converto_to_korean_tokens do jvm_processor.extractPhrases(jvm_processor.tokenize(text), filter_spam, including_hashtags) end end
normalize(text)
click to toggle source
# File lib/twitter_korean/processor.rb, line 13 def normalize(text) return unless text jvm_processor.normalize(text).toString end
stem(text)
click to toggle source
# File lib/twitter_korean/processor.rb, line 25 def stem(text) return unless text converto_to_korean_tokens do jvm_processor.stem(jvm_processor.tokenize(text)) end end
tokenize(text)
click to toggle source
# File lib/twitter_korean/processor.rb, line 18 def tokenize(text) return unless text converto_to_korean_tokens do jvm_processor.tokenize(text) end end
Private Instance Methods
converto_to_korean_tokens(&block)
click to toggle source
# File lib/twitter_korean/processor.rb, line 43 def converto_to_korean_tokens &block scala_list = block.call.toString token_strs = scala_list_to_array(scala_list) token_strs.map do |formed_token_str| TwitterKorean::KoreanToken.build_by_formed_str(formed_token_str.first) end end
scala_list_to_array(result)
click to toggle source
# File lib/twitter_korean/processor.rb, line 51 def scala_list_to_array(result) result.scan(/(?<=List\(|\,\s)(.*?\(\w+\:\s[0-9]+,\s[0-9]+\))/).to_a end