class XapianDb::Indexer

The indexer creates a Xapian::Document from an object. They object must be an instance of a class that has a blueprint configuration. @author Gernot Kogler

Public Class Methods

new(database, document_blueprint) click to toggle source

Constructor @param [XapianDb::Database] database The database to use (needed to build a spelling index) @param [XapianDb::DocumentBlueprint] document_blueprint The blueprint to use

   # File lib/xapian_db/indexer.rb
13 def initialize(database, document_blueprint)
14   @database, @document_blueprint = database, document_blueprint
15 end

Public Instance Methods

build_document_for(obj) click to toggle source

Build the document for an object. The object must respond to ‘xapian_id’. The configured adapter should implement this method. @return [Xapian::Document] The xapian document (see xapian.org/docs/sourcedoc/html/classXapian_1_1Document.html)

   # File lib/xapian_db/indexer.rb
20 def build_document_for(obj)
21   @obj = obj
22   @blueprint = DocumentBlueprint.blueprint_for(@obj.class.name)
23   @xapian_doc = Xapian::Document.new
24   @xapian_doc.data = @obj.xapian_id
25   store_fields
26   index_text
27   @xapian_doc
28 end

Private Instance Methods

get_values_to_index_from(obj) click to toggle source

Get the values to index from an object

    # File lib/xapian_db/indexer.rb
107 def get_values_to_index_from(obj)
108 
109   # if it's an array, we collect the values for its elements recursive
110   if obj.is_a? Array
111     return obj.map { |element| get_values_to_index_from element }.flatten.compact
112   end
113 
114   # if the object responds to attributes and attributes is a hash,
115   # we use the attributes values (works well for active_record and datamapper objects)
116   return obj.attributes.values.compact if obj.respond_to?(:attributes) && obj.attributes.is_a?(Hash)
117 
118   # The object is unkown and will be indexed by its to_s method; if to_s retruns nil, we
119   # will not index it
120   obj.to_s.nil? ? [] : [obj]
121 end
index_text() click to toggle source

Index all configured text methods

    # File lib/xapian_db/indexer.rb
 60 def index_text
 61   term_generator = Xapian::TermGenerator.new
 62   term_generator.document = @xapian_doc
 63   if XapianDb::Config.stemmer
 64     term_generator.stemmer  = XapianDb::Config.stemmer
 65     term_generator.stopper  = XapianDb::Config.stopper if XapianDb::Config.stopper
 66     # Enable the creation of a spelling dictionary if the database is not in memory
 67     if @database.is_a?(XapianDb::PersistentDatabase) &&
 68        XapianDb::Config.query_flags.include?(Xapian::QueryParser::FLAG_SPELLING_CORRECTION)
 69       term_generator.database = @database.writer
 70       term_generator.set_flags Xapian::TermGenerator::FLAG_SPELLING
 71     end
 72   end
 73 
 74   # Index the primary key as a unique term
 75   @xapian_doc.add_term("Q#{@obj.xapian_id}")
 76 
 77   # Index the class with the field name
 78   term_generator.index_text("#{@obj.class}".downcase, 1, "XINDEXED_CLASS")
 79   @xapian_doc.add_term("C#{@obj.class}")
 80 
 81   @blueprint.indexed_method_names.each do |method|
 82     options = @blueprint.options_for_indexed_method method
 83     if options.block
 84       obj = @obj.instance_eval(&options.block)
 85     else
 86       obj = @obj.send(method)
 87     end
 88     unless obj.nil?
 89       values = get_values_to_index_from obj
 90       values.each do |value|
 91         terms = value.to_s.downcase
 92         terms = @blueprint.preprocess_terms.call(terms) if @blueprint.preprocess_terms
 93         terms = split(terms) if XapianDb::Config.term_splitter_count > 0 && !options.no_split
 94         # Add value with field name
 95         term_generator.index_text(terms, options.weight, "X#{method.upcase}") if options.prefixed
 96         # Add value without field name
 97         term_generator.index_text(terms, options.weight)
 98       end
 99     end
100   end
101 
102   terms_to_ignore = @xapian_doc.terms.select{ |term| term.term.length < XapianDb::Config.term_min_length }
103   terms_to_ignore.each { |term| @xapian_doc.remove_term term.term }
104 end
split(terms) click to toggle source
    # File lib/xapian_db/indexer.rb
125 def split(terms)
126   splitted_terms = []
127   terms.split(" ").each do |term|
128     (1..XapianDb::Config.term_splitter_count).each { |i| splitted_terms << term[0...i] }
129     splitted_terms << term
130   end
131   splitted_terms.join " "
132 end
store_fields() click to toggle source

Store all configured fields

   # File lib/xapian_db/indexer.rb
33 def store_fields
34 
35   # class name of the object goes to position 0
36   @xapian_doc.add_value 0, @obj.class.name
37   # natural sort order goes to position 1
38   if @blueprint._natural_sort_order.is_a? Proc
39     sort_value = @obj.instance_eval &@blueprint._natural_sort_order
40   else
41     sort_value = @obj.send @blueprint._natural_sort_order
42   end
43   @xapian_doc.add_value 1, sort_value.to_s
44 
45   @blueprint.attribute_names.each do |attribute|
46     block = @blueprint.block_for_attribute attribute
47     if block
48       value = @obj.instance_eval &block
49     else
50       value = @obj.send attribute
51     end
52 
53     codec          = XapianDb::TypeCodec.codec_for @blueprint.type_map[attribute]
54     encoded_string = codec.encode value
55     @xapian_doc.add_value DocumentBlueprint.value_number_for(attribute), encoded_string unless encoded_string.nil?
56   end
57 end