class EPUB::Searcher::XHTML::Seamless

Public Class Methods

new(element) click to toggle source
Calls superclass method
# File lib/epub/searcher/xhtml.rb, line 62
def initialize(element)
  super
  @indices = nil
end

Public Instance Methods

build_indices(element) click to toggle source
# File lib/epub/searcher/xhtml.rb, line 74
def build_indices(element)
  indices = {}
  content = ''

  elem_index = 0
  element.children.each do |child|
    if child.element?
      child_step = [:element, elem_index, {:name => child.name, :id => child.attribute_with_prefix('id')}]
      elem_index += 1
      if child.name == 'img'
        alt = child.attribute_with_prefix('alt')
        next if alt.nil? || alt.empty?
        indices[content.length] = [child_step]
        content << alt
      else
        # TODO: Consider block level elements
        content_length = content.length
        sub_indices, sub_content = build_indices(child)
        # TODO: Pass content_length and child_step to build_indices and remove this block
        sub_indices.each_pair do |sub_pos, child_steps|
          indices[content_length + sub_pos] = [child_step] + child_steps
        end
        content << sub_content
      end
    elsif child.text? || child.cdata?
      text_index = elem_index
      text_step = [:text, text_index]
      indices[content.length] = [text_step]
      content << child.content
    end
  end

  [indices, content]
end
search_text(word) click to toggle source
# File lib/epub/searcher/xhtml.rb, line 67
def search_text(word)
  unless @indices
    @indices, @content = build_indices(@element)
  end
  visit(@indices, @content, word)
end

Private Instance Methods

find_offset(offsets, index, for_end_position=false) click to toggle source

Find max offset greater than or equal to index @param offsets [Array<Integer>] keys of indices @param index [Integer] position of search word in content string @todo: more efficient algorithm

# File lib/epub/searcher/xhtml.rb, line 143
def find_offset(offsets, index, for_end_position=false)
  comparison_operator = for_end_position ? :< : :<=
  l = offsets.length
  offset_index = (0..l).bsearch {|i|
    o = offsets[l - i]
    next false unless o
    o.send(comparison_operator, index)
  }
  offsets[l - offset_index]
end
to_result_steps(steps) click to toggle source
# File lib/epub/searcher/xhtml.rb, line 154
def to_result_steps(steps)
  steps.map {|step| Result::Step.new(*step)}
end
visit(indices, content, word) click to toggle source
# File lib/epub/searcher/xhtml.rb, line 111
def visit(indices, content, word)
  results = []
  offsets = indices.keys
  i = 0
  while i = content.index(word, i)
    offset = find_offset(offsets, i)
    start_steps = to_result_steps(indices[offset])
    last_step = start_steps.last
    if last_step.info[:name] == 'img'
      parent_steps = start_steps
      start_steps = end_steps = nil
    else
      word_length = word.length
      start_char_step = Result::Step.new(:character, i - offset)
      end_offset = find_offset(offsets, i + word_length, true)
      end_steps = to_result_steps(indices[end_offset])
      end_char_step = Result::Step.new(:character, i + word_length - end_offset)
      parent_steps, start_steps, end_steps = Result.aggregate_step_intersection(start_steps, end_steps)
      start_steps << start_char_step
      end_steps << end_char_step
    end
    results << Result.new(parent_steps, start_steps, end_steps)
    i += 1
  end

  results
end