class TinyClassifier::Tokenizer
Constants
- TOKENIZERS
Attributes
type[RW]
Public Class Methods
new(params = nil)
click to toggle source
# File lib/tiny-classifier/tokenizer.rb, line 22 def initialize(params = nil) if params @type = params[:type] end @type ||= :none end
Public Instance Methods
tokenize(input)
click to toggle source
# File lib/tiny-classifier/tokenizer.rb, line 29 def tokenize(input) case @type.to_s.downcase.to_sym when :mecab tokenize_by_mecab(input) else input end end
Private Instance Methods
tokenize_by_mecab(input)
click to toggle source
# File lib/tiny-classifier/tokenizer.rb, line 39 def tokenize_by_mecab(input) require "natto" natto = Natto::MeCab.new terms = [] natto.parse(input) do |term| if term.feature =~ /\A(名詞|形容詞|動詞)/ terms << term.surface end end terms.join(" ").strip end