class NattoWrap::MeCabModel

NOTE: taku910.github.io/mecab/

Attributes

conjugation1[RW]
conjugation2[RW]
pronunciation[RW]
prototype[RW]
reading[RW]
word[RW]
word_class[RW]
word_subclass1[RW]
word_subclass2[RW]
word_subclass3[RW]

Public Class Methods

new(word: nil, word_class: nil, word_subclass1: nil, word_subclass2: nil, word_subclass3: nil, conjugation1: nil, conjugation2: nil, prototype: nil, reading: nil, pronunciation: nil) click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 11
def initialize(word: nil, word_class: nil,
               word_subclass1: nil, word_subclass2: nil, word_subclass3: nil,
               conjugation1: nil, conjugation2: nil,
               prototype: nil, reading: nil, pronunciation: nil)
  @word = word
  @word_class = get_object(word_class)
  @word_subclass1 = get_object(word_subclass1)
  @word_subclass2 = get_object(word_subclass2)
  @word_subclass3 = get_object(word_subclass3)
  @conjugation1 = get_object(conjugation1)
  @conjugation2 = get_object(conjugation2)
  @prototype = get_object(prototype) || word
  @reading = get_object(reading) || @prototype
  @pronunciation = get_object(pronunciation) || @reading
end

Private Class Methods

convert_model(str) click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 97
def convert_model(str)
  @natto ||= Natto::MeCab.new
  natto_objects = @natto.parse(str)
  natto_objects.delete_suffix!("EOS\n")
  natto_objects.split("\n").map { |natto_object| create_from_natto_object(natto_object) }
end
create_as_proper_noun(word, reading = nil, pronunciation = nil) click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 104
def create_as_proper_noun(word, reading = nil, pronunciation = nil)
  new(
    word: word, word_class: '名詞',
    word_subclass1: '固有名詞', word_subclass2: '一般', word_subclass3: nil,
    conjugation1: nil, conjugation2: nil,
    prototype: word,
    reading: reading || word,
    pronunciation: pronunciation || reading || word
  )
end
create_from_natto_object(natto_object) click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 130
def create_from_natto_object(natto_object)
  objects = natto_object.split("\t")
  word = objects.first
  word_class_objects = objects.last.split(',')
  new(
    word: word, word_class: word_class_objects[0],
    word_subclass1: word_class_objects[1],
    word_subclass2: word_class_objects[2],
    word_subclass3: word_class_objects[3],
    conjugation1: word_class_objects[4],
    conjugation2: word_class_objects[5],
    prototype: word_class_objects[6],
    reading: word_class_objects[7],
    pronunciation: word_class_objects[8]
  )
end
extract_nouns(str) click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 93
def extract_nouns(str)
  convert_model(str).select(&:noun?)
end
load_from_csv(filename) click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 115
def load_from_csv(filename)
  CSV.open(filename, 'r') do |rows|
    rows.map do |row|
      new(
        word: row[0], word_class: row[4],
        word_subclass1: row[5], word_subclass2: row[6], word_subclass3: row[7],
        conjugation1: row[8], conjugation2: row[9],
        prototype: row[10], reading: row[11], pronunciation: row[12]
      )
    end
  end
end
to_reading(str, str_threshold = 20) click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 87
def to_reading(str, str_threshold = 20)
  # NOTE: 負荷とパフォーマンスが気になるので上限を超えている場合は切り詰める
  mecab_models = convert_model(str.slice(0, str_threshold).tr('ぁ-ん', 'ァ-ン'))
  mecab_models.inject('') { |reading, model| reading + model.reading }
end

Public Instance Methods

adjective?() click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 35
def adjective?
  @word_class == '形容詞'
end
adverb?() click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 39
def adverb?
  @word_class == '副詞'
end
auxiliary_verb?() click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 51
def auxiliary_verb?
  @word_class == '助動詞'
end
conjunction?() click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 47
def conjunction?
  @word_class == '接続詞'
end
determiner?() click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 55
def determiner?
  @word_class == '連体詞'
end
interjection?() click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 59
def interjection?
  @word_class == '感動詞'
end
noun?() click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 27
def noun?
  @word_class == '名詞'
end
postpositional_particle?() click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 43
def postpositional_particle?
  @word_class == '助詞'
end
symbol?() click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 63
def symbol?
  @word_class == '記号'
end
to_csv() click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 67
def to_csv
  [
    @word, 0, 0, 0, @word_class,
    get_str(@word_subclass1), get_str(@word_subclass2), get_str(@word_subclass3),
    get_str(@conjugation1), get_str(@conjugation2),
    get_str(@prototype), get_str(@reading), get_str(@pronunciation)
  ]
end
verb?() click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 31
def verb?
  @word_class == '動詞'
end

Private Instance Methods

get_object(str) click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 78
def get_object(str)
  str != '*' ? str : nil
end
get_str(object) click to toggle source
# File lib/natto_wrap/me_cab_model.rb, line 82
def get_str(object)
  object || '*'
end