class Anystyle::Parser::Dictionary
Dictionary
is a Singleton object that provides a key-value store of the Anystyle
Parser
dictionary required for feature elicitation. This dictionary acts essentially like a Ruby Hash object, but because of the dictionary's size it is not efficient to keep the entire dictionary in memory at all times. For that reason, Dictionary
creates a persistent data store on disk using Kyoto Cabinet; if Kyoto Cabinet is not installed a Ruby Hash is used as a fall-back.
Starting with version 0.1.0 Redis support was added. If you would like to use Redis as the dictionary data store you can do so by installing `redis' gem (and optionally the `hiredis' gem).
The database will be automatically created from the dictionary file using the best available DBM the first time it is accessed. Once database file exists, the database will be restored from file. Therefore, if you make changes to the dictionary file, you will have to delete the old database file for a new one to be created.
Database creation in Kyoto-Cabinet mode requires write permissions. By default, the database will be created in the support directory of the Parser
; if you have installed the gem version of the Parser
, you may not have write permissions, but you can change the path in the Dictionary's options.
## Configuration
To set the database mode:
Dictionary.instance.options[:mode] # => the database mode
For a list of database modes available in your environment consult:
Dictionary.modes # => [:kyoto, :redis, :hash]
Further options include:
Dictionary.instance.options[:source] # => the zipped dictionary file Dictionary.instance.options[:cabinet] # => the database file (kyoto) Dictionary.instance.options[:path] # => the database socket (redis) Dictionary.instance.options[:host] # => dictionary host (redis) Dictionary.instance.options[:part] # => dictionary port (redis)
Attributes
Public Class Methods
# File lib/anystyle/parser/dictionary.rb 96 def initialize 97 @options = Dictionary.defaults.dup 98 end
Public Instance Methods
# File lib/anystyle/parser/dictionary.rb 104 def [](key) 105 db[key.to_s].to_i 106 end
# File lib/anystyle/parser/dictionary.rb 108 def []=(key, value) 109 db[key.to_s] = value 110 end
# File lib/anystyle/parser/dictionary.rb 165 def close 166 case 167 when @db.respond_to?(:close) 168 @db.close 169 when @db.respond_to?(:quit) 170 @db.quit 171 end 172 173 @db = nil 174 end
# File lib/anystyle/parser/dictionary.rb 100 def config(&block) 101 block[options] 102 end
# File lib/anystyle/parser/dictionary.rb 112 def create 113 case options[:mode] 114 when :kyoto 115 truncate 116 @db = KyotoCabinet::DB.new 117 unless @db.open(path, KyotoCabinet::DB::OWRITER | KyotoCabinet::DB::OCREATE) 118 raise DatabaseError, "failed to create cabinet file #{path}: #{@db.error}" 119 end 120 populate 121 close 122 123 else 124 # nothing 125 end 126 end
# File lib/anystyle/parser/dictionary.rb 133 def open 134 case options[:mode] 135 when :kyoto 136 at_exit { Anystyle.dictionary.close } 137 138 create unless File.exists?(path) 139 140 @db = KyotoCabinet::DB.new 141 unless @db.open(path, KyotoCabinet::DB::OREADER) 142 raise DictionaryError, "failed to open cabinet file #{path}: #{@db.error}" 143 end 144 145 when :redis 146 at_exit { Anystyle.dictionary.close } 147 @db = Redis.new(options) 148 149 if options[:namespace] && defined?(Redis::Namespace) 150 @db = Redis::Namespace.new options[:namespace], :redis => @db 151 end 152 153 populate unless populated? 154 155 else 156 @db = Hash.new(0) 157 populate 158 end 159 160 @db 161 end
# File lib/anystyle/parser/dictionary.rb 163 def open?() !!@db end
# File lib/anystyle/parser/dictionary.rb 176 def path 177 case options[:mode] 178 when :kyoto 179 options[:cabinet] || options[:path] 180 when :redis 181 options[:path] || options.values_at(:host, :port).join(':') 182 else 183 'hash' 184 end 185 end
# File lib/anystyle/parser/dictionary.rb 187 def populated? 188 !!self['__created_at'] 189 end
# File lib/anystyle/parser/dictionary.rb 128 def truncate 129 close 130 File.unlink(path) if File.exists?(path) 131 end
Private Instance Methods
# File lib/anystyle/parser/dictionary.rb 193 def db 194 @db || open 195 end
# File lib/anystyle/parser/dictionary.rb 197 def populate 198 require 'zlib' 199 200 File.open(options[:source], 'r:UTF-8') do |f| 201 mode = 0 202 203 Zlib::GzipReader.new(f).each do |line| 204 line.strip! 205 206 if line.start_with?('#') 207 case line 208 when /^## male/i 209 mode = Dictionary.code[:male] 210 when /^## female/i 211 mode = Dictionary.code[:female] 212 when /^## (?:surname|last|chinese)/i 213 mode = Dictionary.code[:surname] 214 when /^## months/i 215 mode = Dictionary.code[:month] 216 when /^## place/i 217 mode = Dictionary.code[:place] 218 when /^## publisher/i 219 mode = Dictionary.code[:publisher] 220 when /^## journal/i 221 mode = Dictionary.code[:journal] 222 else 223 # skip comments 224 end 225 else 226 key = line.split(/\s+(\d+\.\d+)\s*$/)[0] 227 value = self[key] 228 self[key] = value + mode if value < mode 229 end 230 end 231 end 232 233 self['__created_at'] = Time.now.to_s 234 end