class BentleyMcIlroy::BlockFingerprintTable
Look-up table with a find
method which finds an appropriate block and then modifies the match to extend it to more characters.
Public Class Methods
new(block_sequenced_text)
click to toggle source
# File lib/bentley_mcilroy.rb, line 39 def initialize(block_sequenced_text) @blocked_text = block_sequenced_text @hash = {} @blocked_text.blocks.each do |block| (@hash[block.hash] ||= []) << block end end
Public Instance Methods
find_for_compress(fingerprint, block_size, target, position)
click to toggle source
# File lib/bentley_mcilroy.rb, line 48 def find_for_compress(fingerprint, block_size, target, position) source = @blocked_text.text find(fingerprint, block_size, source, target, position) end
find_for_diff(fingerprint, block_size, target)
click to toggle source
# File lib/bentley_mcilroy.rb, line 53 def find_for_diff(fingerprint, block_size, target) source = @blocked_text.text find(fingerprint, block_size, source, target) end
Private Instance Methods
find(fingerprint, block_size, source, target, position = nil)
click to toggle source
# File lib/bentley_mcilroy.rb, line 60 def find(fingerprint, block_size, source, target, position = nil) blocks = @hash[fingerprint] return nil unless blocks blocks.each do |block| next unless block.text == target[0, block_size] # in compression, since we don't have true source and target strings as # separate things, we have to ensure that we don't use a fingerprinted # block which appears _after_ the current position, otherwise # # a<x, 0> with x > 0 # # might happen, or similar. since blocks are ordered left to right in the # string, we can just return nil, because we know there's not going to be # a valid block for compression. if position && block.position >= position return nil end # we know that block matches, so cut it from the beginning, # so we can then see how much of the rest also matches source_match = source[block.position + block_size..-1] target_match = target[block_size..-1] # in a backwards extension, we can see how many of the characters before # +position+ (up the previous block we covered, which is +limit+) match # characters block.position (up to b-1) characters. In other words, we can # find the maximum i such that # # original_text[position-k, 1] == original_text[block.position-k, 1] # # for all k in {1, 2, ..., i}, where i <= b-1 # it may be that the block we've matched on reaches to the end of the # string, in which case, bail if source_match.empty? || target_match.empty? return block end end_index = find_end_index(source_match, target_match) match = produce_match(end_index, block, source) return match end nil end
find_end_index(source, target)
click to toggle source
# File lib/bentley_mcilroy.rb, line 108 def find_end_index(source, target) end_index = 0 any_match = false while end_index < source.length && end_index < target.length && source[end_index, 1] == target[end_index, 1] any_match = true end_index += 1 end # undo the final increment, since that's where it failed the equality check end_index -= 1 any_match ? end_index : nil end
produce_match(end_index, block, source)
click to toggle source
# File lib/bentley_mcilroy.rb, line 121 def produce_match(end_index, block, source) text = block.text if end_index # we have more to grab in the string text += source[0..end_index] end Block.new(text, block.position) end