class NattoWrap::MeCabModel
NOTE: taku910.github.io/mecab/
Attributes
conjugation1[RW]
conjugation2[RW]
pronunciation[RW]
prototype[RW]
reading[RW]
word[RW]
word_class[RW]
word_subclass1[RW]
word_subclass2[RW]
word_subclass3[RW]
Public Class Methods
new(word: nil, word_class: nil, word_subclass1: nil, word_subclass2: nil, word_subclass3: nil, conjugation1: nil, conjugation2: nil, prototype: nil, reading: nil, pronunciation: nil)
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 11 def initialize(word: nil, word_class: nil, word_subclass1: nil, word_subclass2: nil, word_subclass3: nil, conjugation1: nil, conjugation2: nil, prototype: nil, reading: nil, pronunciation: nil) @word = word @word_class = get_object(word_class) @word_subclass1 = get_object(word_subclass1) @word_subclass2 = get_object(word_subclass2) @word_subclass3 = get_object(word_subclass3) @conjugation1 = get_object(conjugation1) @conjugation2 = get_object(conjugation2) @prototype = get_object(prototype) || word @reading = get_object(reading) || @prototype @pronunciation = get_object(pronunciation) || @reading end
Private Class Methods
convert_model(str)
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 97 def convert_model(str) @natto ||= Natto::MeCab.new natto_objects = @natto.parse(str) natto_objects.delete_suffix!("EOS\n") natto_objects.split("\n").map { |natto_object| create_from_natto_object(natto_object) } end
create_as_proper_noun(word, reading = nil, pronunciation = nil)
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 104 def create_as_proper_noun(word, reading = nil, pronunciation = nil) new( word: word, word_class: '名詞', word_subclass1: '固有名詞', word_subclass2: '一般', word_subclass3: nil, conjugation1: nil, conjugation2: nil, prototype: word, reading: reading || word, pronunciation: pronunciation || reading || word ) end
create_from_natto_object(natto_object)
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 130 def create_from_natto_object(natto_object) objects = natto_object.split("\t") word = objects.first word_class_objects = objects.last.split(',') new( word: word, word_class: word_class_objects[0], word_subclass1: word_class_objects[1], word_subclass2: word_class_objects[2], word_subclass3: word_class_objects[3], conjugation1: word_class_objects[4], conjugation2: word_class_objects[5], prototype: word_class_objects[6], reading: word_class_objects[7], pronunciation: word_class_objects[8] ) end
extract_nouns(str)
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 93 def extract_nouns(str) convert_model(str).select(&:noun?) end
load_from_csv(filename)
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 115 def load_from_csv(filename) CSV.open(filename, 'r') do |rows| rows.map do |row| new( word: row[0], word_class: row[4], word_subclass1: row[5], word_subclass2: row[6], word_subclass3: row[7], conjugation1: row[8], conjugation2: row[9], prototype: row[10], reading: row[11], pronunciation: row[12] ) end end end
to_reading(str, str_threshold = 20)
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 87 def to_reading(str, str_threshold = 20) # NOTE: 負荷とパフォーマンスが気になるので上限を超えている場合は切り詰める mecab_models = convert_model(str.slice(0, str_threshold).tr('ぁ-ん', 'ァ-ン')) mecab_models.inject('') { |reading, model| reading + model.reading } end
Public Instance Methods
adjective?()
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 35 def adjective? @word_class == '形容詞' end
adverb?()
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 39 def adverb? @word_class == '副詞' end
auxiliary_verb?()
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 51 def auxiliary_verb? @word_class == '助動詞' end
conjunction?()
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 47 def conjunction? @word_class == '接続詞' end
determiner?()
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 55 def determiner? @word_class == '連体詞' end
interjection?()
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 59 def interjection? @word_class == '感動詞' end
noun?()
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 27 def noun? @word_class == '名詞' end
postpositional_particle?()
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 43 def postpositional_particle? @word_class == '助詞' end
symbol?()
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 63 def symbol? @word_class == '記号' end
to_csv()
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 67 def to_csv [ @word, 0, 0, 0, @word_class, get_str(@word_subclass1), get_str(@word_subclass2), get_str(@word_subclass3), get_str(@conjugation1), get_str(@conjugation2), get_str(@prototype), get_str(@reading), get_str(@pronunciation) ] end
verb?()
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 31 def verb? @word_class == '動詞' end
Private Instance Methods
get_object(str)
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 78 def get_object(str) str != '*' ? str : nil end
get_str(object)
click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 82 def get_str(object) object || '*' end