class BentleyMcIlroy::Codec

Public Class Methods

compress(text, block_size) click to toggle source
# File lib/bentley_mcilroy.rb, line 156
def self.compress(text, block_size)
  __compress_encode__(text, nil, block_size)
end
decode(source, delta) click to toggle source
# File lib/bentley_mcilroy.rb, line 145
def self.decode(source, delta)
  delta.inject("") do |result, i|
    if i.is_a?(Array)
      index, length = i
      result << source[index, length]
    else
      result << i
    end
  end
end
decompress(sequence) click to toggle source
# File lib/bentley_mcilroy.rb, line 131
def self.decompress(sequence)
  sequence.inject("") do |result, i|
    if i.is_a?(Array)
      index, length = i
      length.times do |k|
        result << result[index+k, 1]
      end
      result
    else
      result << i
    end
  end
end
encode(source, target, block_size) click to toggle source
# File lib/bentley_mcilroy.rb, line 160
def self.encode(source, target, block_size)
  __compress_encode__(source, target, block_size)
end

Private Class Methods

__compress_encode__(source, target, block_size) click to toggle source
# File lib/bentley_mcilroy.rb, line 166
def self.__compress_encode__(source, target, block_size)
  return [] if source == target
  
  block_sequenced_text = BlockSequencedText.new(source, block_size)
  table = BlockFingerprintTable.new(block_sequenced_text)
  output = []
  buffer = ""
  current_hash = nil
  hasher = RollingHash.new
  
  mode = (target ? :diff : :compress)
  
  if mode == :compress
    # it's the source we're compressing, there is no target
    text = source
  else
    # it's the target we're compressing against the source
    text = target
  end

  position = 0
  while position < text.length

    if text.length - position < block_size
      # if there isn't a block-sized substring in the remaining text, stop.
      # note that we could add the buffer to the output here, but if block_size
      # is 1, text.length - position < 1 can't be true, so the final character
      # would go missing. so appending to the buffer goes below, outside the
      # while loop.
      break
    end

    # if we've recently found a block of text which matches and added that to
    # the output, current_hash will be reset to nil, so get the new hash. note
    # that we can't just use next_hash, because we might have skipped several
    # characters in one go, which breaks the rolling aspect of the hash
    if !current_hash
      current_hash = hasher.hash(text[position, block_size])
    else
      # position-1 is the previous position, + block_size to get the last
      # character of the current block
      current_hash = hasher.next_hash(text[position-1 + block_size, 1])
    end

    match = target ? table.find_for_diff(current_hash, block_size, target[position..-1]) :
                     table.find_for_compress(current_hash, block_size, text[position..-1], position)

    if match
      if !buffer.empty?
        output << buffer
        buffer = ""
      end

      output << [match.position, match.text.length]
      position += match.text.length
      current_hash = nil
      # get a new hasher, because we've skipped over by match.text.length
      # characters, so the rolling hash's next_hash won't work
      hasher = RollingHash.new
    else
      buffer << text[position, 1]
      position += 1
    end
  end

  remainder = buffer + text[position..-1]
  output << remainder if !remainder.empty?
  output
end