class Juman
Constants
- Category
- Domain
- Hinshi
- Juman_Versin
Attributes
id[R]
ma_arr[R]
pos[R]
string[R]
Public Class Methods
new(string, id=nil, pos=nil)
click to toggle source
# File lib/juman_knp.rb, line 15 def initialize(string, id=nil, pos=nil) @id = id # please use for tilte, id , etc... @string = string @pos = pos # parts of speech(pos) @ma_arr = ma(string) unless pos == nil @specific_pos = words_of(pos) end end
Public Instance Methods
array_of(i)
click to toggle source
# File lib/juman_knp.rb, line 26 def array_of(i) array_of_i = Array.new case i when 0..10 @ma_arr.each{|e| array_of_i.push(e[i])} # 代表表記 when 17 @ma_arr.each{|e| array_of_i.push(get_info(e, "代表表記"))} # 漢字読み when 18 @ma_arr.each{|e| array_of_i.push(get_info(e, "漢字読み"))} # Category when 19 @ma_arr.each{|e| array_of_i.push(get_info(e, "カテゴリ"))} # Domain when 20 @ma_arr.each{|e| array_of_i.push(get_info(e, "ドメイン"))} else end return array_of_i end
words_of(pos)
click to toggle source
filter of pos
# File lib/juman_knp.rb, line 51 def words_of(pos) hinshi_arr = Array.new pos.each do |h| @ma_arr.each{|array_of| hinshi_arr.push(array_of) if h == array_of[3]} end @ma_arr = hinshi_arr return hinshi_arr end
Private Instance Methods
get_info(e, what)
click to toggle source
Parameter > e:one array of @ma_arr
what:string of "カテゴリ" or "漢字読み" or ...
Return > info:sting of info related to what
# File lib/juman_knp.rb, line 87 def get_info(e, what) info = "" e.each do |elm| info = elm.gsub(/#{what}:/, "").delete("\"") if /#{what}:/ =~ elm end return info end
ma(string)
click to toggle source
morphological analysis(ma) Parameter > Stiring for ma Return > Array of console output
# File lib/juman_knp.rb, line 65 def ma(string) begin maarr = Array.new # Juman's input is only Shift-JIS(for Windos) string.encode!("Windows-31J", "UTF-8", :invalid => :replace, :undef => :replace, :replace => '') # using open3, execute Juman out, err, status = Open3.capture3("juman -b", :stdin_data => string) out.each_line do |line| line.chomp!.encode!("UTF-16BE", "Windows-31J", :invalid => :replace, :undef => :replace, :replace => '').encode!("UTF-8") maarr.push(line.split(/\s/)) unless line == "EOS" end return maarr rescue print("[エラー]:JUMANへPathを通してください。\n") exit! end end