class PackfileReader::PackfileEntry
Constants
- ZLIB_HEADERS
ZLIB RFC: tools.ietf.org/html/rfc1950
Attributes
id[R]
size[R]
type[R]
Public Class Methods
new(type, size, id)
click to toggle source
# File lib/packfile_reader/packfile_entry.rb, line 112 def initialize(type, size, id) @type = type @size = size @id = id end
next_entry(packfile_io, objects_to_find=:any, log_verbose=false) { |compressed_data, uncompressed_data, object_id| ... }
click to toggle source
Accepts a block that will receive the compressed data, uncompressed data and the computed object id
# File lib/packfile_reader/packfile_entry.rb, line 20 def self.next_entry(packfile_io, objects_to_find=:any, log_verbose=false) raise 'Object id must be a valid sha1' unless objects_to_find == :any || objects_to_find.all? {|id| /^[0-9a-f]{40}$/.match? id } loop do return nil if packfile_io.eof? hunk = PackfileReader::Hunk.new_with_type(packfile_io) type = hunk.type size = hunk.size offset = hunk.offset_size # Clean the current line before printing the message $stderr.puts "\u001b[0K>>>> Processing new entry [#{type}]" if log_verbose while hunk.continuation? hunk = PackfileReader::Hunk.new_without_type(packfile_io) size = (hunk.size << offset) | size # Data size is a combination of all hunk sizes offset += hunk.offset_size end compressed_data, uncompressed_data = find_data(packfile_io, log_verbose) object_id = compute_id(type, size, uncompressed_data) type = "#{type} [CORRUPTED] " if uncompressed_data.nil? if objects_to_find == :any || objects_to_find.member?(object_id) yield compressed_data, uncompressed_data, object_id if block_given? return PackfileEntry.new(type, size, object_id) end end end
Private Class Methods
compute_id(type, size, uncompressed_data)
click to toggle source
# File lib/packfile_reader/packfile_entry.rb, line 96 def self.compute_id(type, size, uncompressed_data) header_type = case type when :OBJ_COMMIT then 'commit' when :OBJ_TREE then 'tree' when :OBJ_BLOB then 'blob' when :OBJ_TAG then 'tag' else '' end return '000' if header_type.empty? header = "#{header_type} #{size}\0" store = "#{header}#{uncompressed_data}" Digest::SHA1.hexdigest(store) end
find_data(packfile_io, log_verbose)
click to toggle source
# File lib/packfile_reader/packfile_entry.rb, line 53 def self.find_data(packfile_io, log_verbose) data_header = find_zlib_data_header(packfile_io) # since we don't have the index file that accompanies pack files # we need to use brute force to find where the compressed data ends # to do that, we go byte by byte and try to deflate the data, when # that succeedes, we know we go it all compressed_data = data_header compressed_data += packfile_io.read(1) bytes_read = 1 begin uncompressed_data = Zlib.inflate(compressed_data) rescue Zlib::BufError compressed_data += packfile_io.read(1) bytes_read += 1 $stderr.print " .... retrying on data gathering [#{bytes_read}] bytes read\r" if log_verbose retry rescue Zlib::DataError uncompressed_data = nil end [compressed_data, uncompressed_data] end
find_zlib_data_header(packfile_io)
click to toggle source
# File lib/packfile_reader/packfile_entry.rb, line 78 def self.find_zlib_data_header(packfile_io) # If type is OBJ, TREE or COMMIT, data is a zlib stream data # ref-delta uses a 20 byte hash of the base object at the beginning of data # ofs-delta stores an offset within the same packfile to identify the base object # # Need to skip until we find a compressed data # We really don't care about the delta objects data_header = packfile_io.read(2) # 2 bytes to find the zlib header while (not ZLIB_HEADERS.member?(data_header.unpack('n')[0])) packfile_io.seek(packfile_io.pos - 1) # need to walk 2 by 2 bytes data_header = packfile_io.read(2) end data_header end