class Anystyle::Parser::Dictionary

Dictionary is a Singleton object that provides a key-value store of the Anystyle Parser dictionary required for feature elicitation. This dictionary acts essentially like a Ruby Hash object, but because of the dictionary's size it is not efficient to keep the entire dictionary in memory at all times. For that reason, Dictionary creates a persistent data store on disk using Kyoto Cabinet; if Kyoto Cabinet is not installed a Ruby Hash is used as a fall-back.

Starting with version 0.1.0 Redis support was added. If you would like to use Redis as the dictionary data store you can do so by installing `redis' gem (and optionally the `hiredis' gem).

The database will be automatically created from the dictionary file using the best available DBM the first time it is accessed. Once database file exists, the database will be restored from file. Therefore, if you make changes to the dictionary file, you will have to delete the old database file for a new one to be created.

Database creation in Kyoto-Cabinet mode requires write permissions. By default, the database will be created in the support directory of the Parser; if you have installed the gem version of the Parser, you may not have write permissions, but you can change the path in the Dictionary's options.

## Configuration

To set the database mode:

Dictionary.instance.options[:mode] # => the database mode

For a list of database modes available in your environment consult:

Dictionary.modes # => [:kyoto, :redis, :hash]

Further options include:

Dictionary.instance.options[:source] # => the zipped dictionary file
Dictionary.instance.options[:cabinet] # => the database file (kyoto)
Dictionary.instance.options[:path] # => the database socket (redis)
Dictionary.instance.options[:host] # => dictionary host (redis)
Dictionary.instance.options[:part] # => dictionary port (redis)

Attributes

code[R]
defaults[R]
keys[R]
modes[R]
options[R]

Public Class Methods

new() click to toggle source
   # File lib/anystyle/parser/dictionary.rb
96 def initialize
97   @options = Dictionary.defaults.dup
98 end

Public Instance Methods

[](key) click to toggle source
    # File lib/anystyle/parser/dictionary.rb
104 def [](key)
105   db[key.to_s].to_i
106 end
[]=(key, value) click to toggle source
    # File lib/anystyle/parser/dictionary.rb
108 def []=(key, value)
109   db[key.to_s] = value
110 end
close() click to toggle source
    # File lib/anystyle/parser/dictionary.rb
165 def close
166   case
167   when @db.respond_to?(:close)
168     @db.close
169   when @db.respond_to?(:quit)
170     @db.quit
171   end
172 
173   @db = nil
174 end
config(&block) click to toggle source
    # File lib/anystyle/parser/dictionary.rb
100 def config(&block)
101   block[options]
102 end
create() click to toggle source
    # File lib/anystyle/parser/dictionary.rb
112 def create
113   case options[:mode]
114   when :kyoto
115     truncate
116     @db = KyotoCabinet::DB.new
117     unless @db.open(path, KyotoCabinet::DB::OWRITER | KyotoCabinet::DB::OCREATE)
118       raise DatabaseError, "failed to create cabinet file #{path}: #{@db.error}"
119     end
120     populate
121     close
122 
123   else
124     # nothing
125   end
126 end
open() click to toggle source
    # File lib/anystyle/parser/dictionary.rb
133 def open
134   case options[:mode]
135   when :kyoto
136     at_exit { Anystyle.dictionary.close }
137 
138     create unless File.exists?(path)
139 
140     @db = KyotoCabinet::DB.new
141     unless @db.open(path, KyotoCabinet::DB::OREADER)
142       raise DictionaryError, "failed to open cabinet file #{path}: #{@db.error}"
143     end
144 
145   when :redis
146     at_exit { Anystyle.dictionary.close }
147     @db = Redis.new(options)
148 
149     if options[:namespace] && defined?(Redis::Namespace)
150       @db = Redis::Namespace.new options[:namespace], :redis => @db
151     end
152 
153     populate unless populated?
154 
155   else
156     @db = Hash.new(0)
157     populate
158   end
159 
160   @db
161 end
open?() click to toggle source
    # File lib/anystyle/parser/dictionary.rb
163 def open?() !!@db end
path() click to toggle source
    # File lib/anystyle/parser/dictionary.rb
176 def path
177   case options[:mode]
178   when :kyoto
179     options[:cabinet] || options[:path]
180   when :redis
181     options[:path] || options.values_at(:host, :port).join(':')
182   else
183     'hash'
184   end
185 end
populated?() click to toggle source
    # File lib/anystyle/parser/dictionary.rb
187 def populated?
188   !!self['__created_at']
189 end
truncate() click to toggle source
    # File lib/anystyle/parser/dictionary.rb
128 def truncate
129   close
130   File.unlink(path) if File.exists?(path)
131 end

Private Instance Methods

db() click to toggle source
    # File lib/anystyle/parser/dictionary.rb
193 def db
194   @db || open
195 end
populate() click to toggle source
    # File lib/anystyle/parser/dictionary.rb
197 def populate
198   require 'zlib'
199 
200   File.open(options[:source], 'r:UTF-8') do |f|
201     mode = 0
202 
203     Zlib::GzipReader.new(f).each do |line|
204       line.strip!
205 
206       if line.start_with?('#')
207         case line
208         when /^## male/i
209           mode = Dictionary.code[:male]
210         when /^## female/i
211           mode = Dictionary.code[:female]
212         when /^## (?:surname|last|chinese)/i
213           mode = Dictionary.code[:surname]
214         when /^## months/i
215           mode = Dictionary.code[:month]
216         when /^## place/i
217           mode = Dictionary.code[:place]
218         when /^## publisher/i
219           mode = Dictionary.code[:publisher]
220         when /^## journal/i
221           mode = Dictionary.code[:journal]
222         else
223           # skip comments
224         end
225       else
226         key = line.split(/\s+(\d+\.\d+)\s*$/)[0]
227         value = self[key]
228         self[key] = value + mode if value < mode
229       end
230     end
231   end
232 
233   self['__created_at'] = Time.now.to_s
234 end