class Athena::Formats::Ferret

Private Instance Methods

parse(input, &block) click to toggle source
   # File lib/athena/formats/ferret.rb
48 def parse(input, &block)
49   path = input.path
50 
51   # make sure the index can be opened
52   begin
53     File.open(File.join(path, 'segments')) {}
54   rescue Errno::ENOENT, Errno::EACCES => err
55     raise "can't open index at #{path} (#{err.to_s.sub(/ - .*/, '')})"
56   end
57 
58   index = ::Ferret::Index::IndexReader.new(path)
59   first, last = 0, index.max_doc - 1
60 
61   # make sure we can read from the index
62   begin
63     index[first]
64     index[last]
65   rescue StandardError  # EOFError, "Not available", ...
66     raise "possible Ferret version mismatch; try to set the " <<
67           "FERRET_VERSION environment variable to something " <<
68           "other than #{Ferret::VERSION}"
69   end
70 
71   first.upto(last) { |i|
72     unless index.deleted?(i)
73       doc = index[i]
74 
75       Athena::Record.new(doc[record_element], block) { |record|
76         config.each { |element, field_config|
77           record.update(element, doc[element], field_config)
78         }
79       }
80     end
81   }
82 
83   index.num_docs
84 end