class Juman

Constants

Category
Domain
Hinshi
Juman_Versin

Attributes

id[R]
ma_arr[R]
pos[R]
string[R]

Public Class Methods

new(string, id=nil, pos=nil) click to toggle source
# File lib/juman_knp.rb, line 15
def initialize(string, id=nil, pos=nil)
  @id = id # please use for tilte, id , etc...
  @string = string
  @pos = pos # parts of speech(pos)
  @ma_arr = ma(string)
  
  unless pos == nil
    @specific_pos = words_of(pos)
  end
end

Public Instance Methods

array_of(i) click to toggle source
# File lib/juman_knp.rb, line 26
def array_of(i)
  array_of_i = Array.new
  
  case i
  when 0..10
    @ma_arr.each{|e| array_of_i.push(e[i])}
  # 代表表記
  when 17
    @ma_arr.each{|e| array_of_i.push(get_info(e, "代表表記"))}
  # 漢字読み
  when 18
    @ma_arr.each{|e| array_of_i.push(get_info(e, "漢字読み"))}
  # Category
  when 19
    @ma_arr.each{|e| array_of_i.push(get_info(e, "カテゴリ"))}
  # Domain
  when 20
    @ma_arr.each{|e| array_of_i.push(get_info(e, "ドメイン"))}
  else

  end
  return array_of_i
end
words_of(pos) click to toggle source

filter of pos

# File lib/juman_knp.rb, line 51
def words_of(pos)
  hinshi_arr = Array.new
  pos.each do |h|
    @ma_arr.each{|array_of| hinshi_arr.push(array_of) if h == array_of[3]}
  end
  @ma_arr = hinshi_arr
  return hinshi_arr
end

Private Instance Methods

get_info(e, what) click to toggle source

Parameter > e:one array of @ma_arr

what:string of "カテゴリ" or "漢字読み" or ...

Return > info:sting of info related to what

# File lib/juman_knp.rb, line 87
def get_info(e, what)
      info = ""
      e.each do |elm|
      info = elm.gsub(/#{what}:/, "").delete("\"") if /#{what}:/ =~ elm
  end
  return info
end
ma(string) click to toggle source

morphological analysis(ma) Parameter > Stiring for ma Return > Array of console output

# File lib/juman_knp.rb, line 65
def ma(string)
  begin
  maarr = Array.new
  # Juman's input is only Shift-JIS(for Windos)
  string.encode!("Windows-31J", "UTF-8", :invalid => :replace, :undef => :replace, :replace => '') 
  
  # using open3, execute Juman
  out, err, status = Open3.capture3("juman -b", :stdin_data => string)
  out.each_line do |line|
    line.chomp!.encode!("UTF-16BE", "Windows-31J", :invalid => :replace, :undef => :replace, :replace => '').encode!("UTF-8")
    maarr.push(line.split(/\s/)) unless line == "EOS"
  end
  return maarr
  rescue
    print("[エラー]:JUMANへPathを通してください。\n")
    exit!
  end
end