class BentleyMcIlroy::BlockFingerprintTable

Look-up table with a find method which finds an appropriate block and then modifies the match to extend it to more characters.

Public Class Methods

new(block_sequenced_text) click to toggle source
# File lib/bentley_mcilroy.rb, line 39
def initialize(block_sequenced_text)
  @blocked_text = block_sequenced_text
  @hash = {}

  @blocked_text.blocks.each do |block|
    (@hash[block.hash] ||= []) << block
  end
end

Public Instance Methods

find_for_compress(fingerprint, block_size, target, position) click to toggle source
# File lib/bentley_mcilroy.rb, line 48
def find_for_compress(fingerprint, block_size, target, position)
  source = @blocked_text.text
  find(fingerprint, block_size, source, target, position)
end
find_for_diff(fingerprint, block_size, target) click to toggle source
# File lib/bentley_mcilroy.rb, line 53
def find_for_diff(fingerprint, block_size, target)
  source = @blocked_text.text
  find(fingerprint, block_size, source, target)
end

Private Instance Methods

find(fingerprint, block_size, source, target, position = nil) click to toggle source
# File lib/bentley_mcilroy.rb, line 60
def find(fingerprint, block_size, source, target, position = nil)
  blocks = @hash[fingerprint]
  return nil unless blocks
  
  blocks.each do |block|
    next unless block.text == target[0, block_size]
    
    # in compression, since we don't have true source and target strings as
    # separate things, we have to ensure that we don't use a fingerprinted
    # block which appears _after_ the current position, otherwise
    #
    # a<x, 0> with x > 0
    #
    # might happen, or similar. since blocks are ordered left to right in the
    # string, we can just return nil, because we know there's not going to be
    # a valid block for compression.
    if position && block.position >= position
      return nil
    end
    
    # we know that block matches, so cut it from the beginning,
    # so we can then see how much of the rest also matches
    source_match = source[block.position + block_size..-1]
    target_match = target[block_size..-1]
    
    # in a backwards extension, we can see how many of the characters before
    # +position+ (up the previous block we covered, which is +limit+) match
    # characters block.position (up to b-1) characters. In other words, we can
    # find the maximum i such that
    #
    # original_text[position-k, 1] == original_text[block.position-k, 1]
    #
    # for all k in {1, 2, ..., i}, where i <= b-1

    # it may be that the block we've matched on reaches to the end of the
    # string, in which case, bail
    if source_match.empty? || target_match.empty?
      return block
    end

    end_index = find_end_index(source_match, target_match)
    match = produce_match(end_index, block, source)
    return match
  end

  nil
end
find_end_index(source, target) click to toggle source
# File lib/bentley_mcilroy.rb, line 108
def find_end_index(source, target)
  end_index = 0
  any_match = false
  while end_index < source.length && end_index < target.length && source[end_index, 1] == target[end_index, 1]
    any_match = true
    end_index += 1
  end
  # undo the final increment, since that's where it failed the equality check
  end_index -= 1
  
  any_match ? end_index : nil
end
produce_match(end_index, block, source) click to toggle source
# File lib/bentley_mcilroy.rb, line 121
def produce_match(end_index, block, source)
  text = block.text
  if end_index # we have more to grab in the string
    text += source[0..end_index]
  end
  Block.new(text, block.position)
end