class Kanjidic2
Constants
- VERSION
Public Class Methods
new(filename)
click to toggle source
# File lib/kanjidic2.rb, line 7 def initialize(filename) @kanjidic2_file = File.open(filename) { |f| Nokogiri::XML(f) } end
Public Instance Methods
each_character() { |parse_chr(character)| ... }
click to toggle source
# File lib/kanjidic2.rb, line 11 def each_character() @kanjidic2_file.css("character").each do |character| yield(parse_chr(character)) end end
header()
click to toggle source
# File lib/kanjidic2.rb, line 17 def header return parse_header(@kanjidic2_file) end
Private Instance Methods
parse_chr(character)
click to toggle source
# File lib/kanjidic2.rb, line 32 def parse_chr(character) # This will be the parsed character. this_character = {} # character -> literal this_character["literal"] = character.css("literal").text # character -> codepoint this_character["codepoint"] = {} character.css("codepoint").css("cp_value").each do |cp_value| this_character["codepoint"][cp_value["cp_type"]] = cp_value.text.strip end # character -> radical this_character["radical"] = {} character.css("radical").css("rad_value").each do |rad_value| this_character["radical"][rad_value["rad_type"]] = rad_value.text.strip end # character -> misc misc_data = character.css("misc") this_character["misc"] = {} # character -> misc -> grade this_character["misc"]["grade"] = misc_data.css("grade").text # character -> misc -> stroke_count this_character["misc"]["stroke_count"] = misc_data.css("stroke_count").text # character -> misc -> variant this_character["misc"]["variant"] = {} misc_data.css("misc").css("variant").each do |variant| this_character["misc"]["variant"][variant["var_type"]] = variant.text.strip end # character -> misc -> freq this_character["misc"]["freq"] = misc_data.css("freq").text # character -> misc -> rad_name this_character["misc"]["rad_name"] = misc_data.css("rad_name").text # character -> misc -> jlpt this_character["misc"]["jlpt"] = misc_data.css("jlpt").text # character -> dic_number this_character["dic_number"] = {} character.css("dic_number").css("dic_ref").each do |dic_ref| unless dic_ref["dr_type"] == "moro" this_character["dic_number"][dic_ref["dr_type"]] = dic_ref.text.strip else this_character["dic_number"]["moro"] = {} this_character["dic_number"]["moro"]["m_vol"] = dic_ref["m_vol"] this_character["dic_number"]["moro"]["m_page"] = dic_ref["m_page"] this_character["dic_number"]["moro"]["value"] = dic_ref.text.strip end end # character -> query_code this_character["query_code"] = {} character.css("query_code").css("q_code").each do |q_code| this_character["query_code"][q_code["qc_type"]] = q_code.text.strip end # character -> reading_meaning reading_meaning_data = character.css("reading_meaning") this_character["reading_meaning"] = {} # character -> reading_meaning -> rmgroup this_character["reading_meaning"]["rmgroup"] = {} this_character["reading_meaning"]["rmgroup"]["reading"] = {} this_character["reading_meaning"]["rmgroup"]["meaning"] = {} # character -> reading_meaning -> rmgroup -> reading reading_meaning_data.css("rmgroup").css("reading").each do |reading| unless ["ja_on", "ja_kun"].include? reading["r_type"] this_character["reading_meaning"]["rmgroup"]["reading"][reading["r_type"]] = reading.text.strip else if reading["r_type"] == "ja_on" this_character["reading_meaning"]["rmgroup"]["reading"]["ja_on"] = {} this_character["reading_meaning"]["rmgroup"]["reading"]["ja_on"]["on_type"] = reading["on_type"] this_character["reading_meaning"]["rmgroup"]["reading"]["ja_on"]["r_status"] = reading["r_status"] this_character["reading_meaning"]["rmgroup"]["reading"]["ja_on"]["value"] = reading.text.strip else this_character["reading_meaning"]["rmgroup"]["reading"]["ja_kun"] = {} this_character["reading_meaning"]["rmgroup"]["reading"]["ja_kun"]["r_status"] = reading["r_status"] this_character["reading_meaning"]["rmgroup"]["reading"]["ja_kun"]["value"] = reading.text.strip end end end # character -> reading_meaning -> rmgroup -> meaning this_character["reading_meaning"]["rmgroup"]["meaning"]["en"] = [] this_character["reading_meaning"]["rmgroup"]["meaning"]["fr"] = [] this_character["reading_meaning"]["rmgroup"]["meaning"]["es"] = [] this_character["reading_meaning"]["rmgroup"]["meaning"]["pt"] = [] reading_meaning_data.css("rmgroup").css("meaning").each do |meaning| if meaning["m_lang"].nil? this_character["reading_meaning"]["rmgroup"]["meaning"]["en"] << meaning.text.strip else this_character["reading_meaning"]["rmgroup"]["meaning"][meaning["m_lang"]] << meaning.text.strip end end # character -> reading_meaning -> nanori this_character["reading_meaning"]["nanori"] = [] reading_meaning_data.css("nanori").each do |nanori| this_character["reading_meaning"]["nanori"] << nanori.text.strip end return this_character end
parse_header(kanjidic2_file)
click to toggle source
# File lib/kanjidic2.rb, line 23 def parse_header(kanjidic2_file) header = kanjidic2_file.css("header") parsed_header = {} ["file_version", "database_version", "date_of_creation"].each do |header_elem| parsed_header[header_elem] = header.css("#{header_elem}").text end return parsed_header end