class PackfileReader::PackfileEntry

Constants

ZLIB_HEADERS

ZLIB RFC: tools.ietf.org/html/rfc1950

Attributes

id[R]
size[R]
type[R]

Public Class Methods

new(type, size, id) click to toggle source
# File lib/packfile_reader/packfile_entry.rb, line 112
def initialize(type, size, id)
  @type = type
  @size = size
  @id = id
end
next_entry(packfile_io, objects_to_find=:any, log_verbose=false) { |compressed_data, uncompressed_data, object_id| ... } click to toggle source

Accepts a block that will receive the compressed data, uncompressed data and the computed object id

# File lib/packfile_reader/packfile_entry.rb, line 20
def self.next_entry(packfile_io, objects_to_find=:any, log_verbose=false)
  raise 'Object id must be a valid sha1' unless objects_to_find == :any || objects_to_find.all? {|id| /^[0-9a-f]{40}$/.match? id }

  loop do
    return nil if packfile_io.eof?

    hunk = PackfileReader::Hunk.new_with_type(packfile_io)

    type = hunk.type
    size = hunk.size
    offset = hunk.offset_size

    # Clean the current line before printing the message
    $stderr.puts "\u001b[0K>>>> Processing new entry [#{type}]" if log_verbose
    while hunk.continuation?
      hunk = PackfileReader::Hunk.new_without_type(packfile_io)
      size = (hunk.size << offset) | size # Data size is a combination of all hunk sizes
      offset += hunk.offset_size
    end

    compressed_data, uncompressed_data = find_data(packfile_io, log_verbose)
    object_id = compute_id(type, size, uncompressed_data)

    type = "#{type} [CORRUPTED] " if uncompressed_data.nil?

    if objects_to_find == :any || objects_to_find.member?(object_id)
      yield compressed_data, uncompressed_data, object_id if block_given?
      return PackfileEntry.new(type, size, object_id)
    end
  end
end

Private Class Methods

compute_id(type, size, uncompressed_data) click to toggle source
# File lib/packfile_reader/packfile_entry.rb, line 96
def self.compute_id(type, size, uncompressed_data)
  header_type = case type
                when :OBJ_COMMIT then 'commit'
                when :OBJ_TREE then 'tree'
                when :OBJ_BLOB then 'blob'
                when :OBJ_TAG then 'tag'
                else ''
  end

  return '000' if header_type.empty?

  header = "#{header_type} #{size}\0"
  store = "#{header}#{uncompressed_data}"
  Digest::SHA1.hexdigest(store)
end
find_data(packfile_io, log_verbose) click to toggle source
# File lib/packfile_reader/packfile_entry.rb, line 53
def self.find_data(packfile_io, log_verbose)
  data_header = find_zlib_data_header(packfile_io)

  # since we don't have the index file that accompanies pack files
  # we need to use brute force to find where the compressed data ends
  # to do that, we go byte by byte and try to deflate the data, when
  # that succeedes, we know we go it all
  compressed_data = data_header
  compressed_data += packfile_io.read(1)

  bytes_read = 1
  begin
    uncompressed_data = Zlib.inflate(compressed_data)
  rescue Zlib::BufError
    compressed_data += packfile_io.read(1)
    bytes_read += 1
    $stderr.print " .... retrying on data gathering [#{bytes_read}] bytes read\r" if log_verbose
    retry
  rescue Zlib::DataError
    uncompressed_data = nil
  end

  [compressed_data, uncompressed_data]
end
find_zlib_data_header(packfile_io) click to toggle source
# File lib/packfile_reader/packfile_entry.rb, line 78
def self.find_zlib_data_header(packfile_io)
  # If type is OBJ, TREE or COMMIT, data is a zlib stream data
  # ref-delta uses a 20 byte hash of the base object at the beginning of data
  # ofs-delta stores an offset within the same packfile to identify the base object
  #
  # Need to skip until we find a compressed data
  # We really don't care about the delta objects

  data_header = packfile_io.read(2) # 2 bytes to find the zlib header

  while (not ZLIB_HEADERS.member?(data_header.unpack('n')[0]))
    packfile_io.seek(packfile_io.pos - 1) # need to walk 2 by 2 bytes
    data_header = packfile_io.read(2)
  end

  data_header
end