class Ole::Storage
This class is the primary way the user interacts with an OLE storage file.
TODO¶ ↑
-
the custom header cruft for
Header
andDirent
needs some love. -
i have a number of classes doing load/save combos:
Header
,AllocationTable
,Dirent
, and, in a manner of speaking, but arguably different,Storage
itself. they have differing api’s which would be nice to rethink.AllocationTable::Big
must be created aot now, as it is used for all subsequent reads.
Constants
- VERSION
Attributes
Low level internals, you probably shouldn’t need to mess with these
The underlying io object to/from which the ole object is serialized, whether we should close it, and whether it is writeable
Low level internals, you probably shouldn’t need to mess with these
The underlying io object to/from which the ole object is serialized, whether we should close it, and whether it is writeable
options used at creation time
The top of the ole tree structure
Low level internals, you probably shouldn’t need to mess with these
Low level internals, you probably shouldn’t need to mess with these
The underlying io object to/from which the ole object is serialized, whether we should close it, and whether it is writeable
Public Class Methods
arg
should be either a filename, or an IO
object, and needs to be seekable. mode
is optional, and should be a regular mode string.
# File lib/ole/storage/base.rb, line 40 def initialize arg, mode=nil, params={} params, mode = mode, nil if Hash === mode params = {:update_timestamps => true}.merge(params) @params = params # get the io object @close_parent, @io = if String === arg mode ||= 'rb' [true, open(arg, mode)] else raise ArgumentError, 'unable to specify mode string with io object' if mode [false, arg] end # force encoding, to avoid picking up source encoding with StringIO or files in text mode @io.set_encoding Encoding::ASCII_8BIT if @io.respond_to?(:set_encoding) # do we have this file opened for writing? use mode when provided, # otherwise try no-op methods which will raise if read-only @writeable = begin if mode IOMode.new(mode).writeable? else # works on mri 1.8 & jruby @io.flush begin # works on mri 1.9 & rubinius, throws EBADF on windows @io.write_nonblock('') if @io.respond_to?(:write_nonblock) rescue Errno::EBADF # for windows @io.syswrite(''); end true end rescue IOError false end # silence undefined warning in clear @sb_file = nil # if the io object has data, we should load it, otherwise start afresh # this should be based on the mode string rather. @io.size > 0 ? load : clear end
somewhat similar to File.open, the open class method allows a block form where the Ole::Storage
object is automatically closed on completion of the block.
# File lib/ole/storage/base.rb, line 84 def self.open arg, mode=nil, params={} ole = new arg, mode, params if block_given? begin yield ole ensure; ole.close end else ole end end
Public Instance Methods
# File lib/ole/storage/base.rb, line 333 def bat_for_size size # note >=, not > previously. size >= @header.threshold ? @bbat : @sbat end
# File lib/ole/storage/base.rb, line 295 def clear # initialize to equivalent of loading an empty ole document. Log.warn 'creating new ole storage object on non-writable io' unless @writeable @header = Header.new @bbat = AllocationTable::Big.new self @root = Dirent.new self, :type => :root, :name => 'Root Entry' @dirents = [@root] @root.idx = 0 @sb_file.close if @sb_file @sb_file = RangesIOResizeable.new @bbat, :first_block => AllocationTable::EOC @sbat = AllocationTable::Small.new self # throw everything else the hell away @io.truncate 0 end
# File lib/ole/storage/base.rb, line 164 def close @sb_file.close flush if @writeable @io.close if @close_parent end
# File lib/ole/storage/file_system.rb, line 40 def dir @dir ||= DirClass.new self end
tries to get a dirent for path. return nil if it doesn’t exist (change it)
# File lib/ole/storage/file_system.rb, line 46 def dirent_from_path path dirent = @root path = file.expand_path(path).split('/') until path.empty? part = path.shift next if part.empty? return nil if dirent.file? return nil unless dirent = dirent/part end dirent end
# File lib/ole/storage/file_system.rb, line 36 def file @file ||= FileClass.new self end
the flush method is the main “save” method. all file contents are always written directly to the file by the RangesIO
objects, all this method does is write out all the file meta data - dirents, allocation tables, file header etc.
maybe add an option to zero the padding, and any remaining avail blocks in the allocation table.
TODO: long and overly complex. simplify and test better. eg, perhaps move serialization of bbat to AllocationTable::Big
.
# File lib/ole/storage/base.rb, line 180 def flush # update root dirent, and flatten dirent tree @root.name = 'Root Entry' @root.first_block = @sb_file.first_block @root.size = @sb_file.size @dirents = @root.flatten # serialize the dirents using the bbat RangesIOResizeable.open @bbat, 'w', :first_block => @header.dirent_start do |io| io.write @dirents.map { |dirent| dirent.to_s }.join padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size io.write 0.chr * padding @header.dirent_start = io.first_block end # serialize the sbat # perhaps the blocks used by the sbat should be marked with BAT? RangesIOResizeable.open @bbat, 'w', :first_block => @header.sbat_start do |io| io.write @sbat.to_s @header.sbat_start = io.first_block @header.num_sbat = @bbat.chain(@header.sbat_start).length end # create RangesIOResizeable hooked up to the bbat. use that to claim bbat blocks using # truncate. then when its time to write, convert that chain and some chunk of blocks at # the end, into META_BAT blocks. write out the chain, and those meta bat blocks, and its # done. # this is perhaps not good, as we reclaim all bat blocks here, which # may include the sbat we just wrote. FIXME @bbat.map! do |b| b == AllocationTable::BAT || b == AllocationTable::META_BAT ? AllocationTable::AVAIL : b end # currently we use a loop. this could be better, but basically, # the act of writing out the bat, itself requires blocks which get # recorded in the bat. # # i'm sure that there'd be some simpler closed form solution to this. solve # recursive func: # # num_mbat_blocks = ceil(max((mbat_len - 109) * 4 / block_size, 0)) # bbat_len = initial_bbat_len + num_mbat_blocks # mbat_len = ceil(bbat_len * 4 / block_size) # # the actual bbat allocation table is itself stored throughout the file, and that chain # is stored in the initial blocks, and the mbat blocks. num_mbat_blocks = 0 io = RangesIOResizeable.new @bbat, 'w', :first_block => AllocationTable::EOC # truncate now, so that we can simplify size calcs - the mbat blocks will be appended in a # contiguous chunk at the end. # hmmm, i think this truncate should be matched with a truncate of the underlying io. if you # delete a lot of stuff, and free up trailing blocks, the file size never shrinks. this can # be fixed easily, add an io truncate @bbat.truncate! @io.truncate @bbat.block_size * (@bbat.length + 1) while true # get total bbat size. equivalent to @bbat.to_s.length, but for the factoring in of # the mbat blocks. we can't just add the mbat blocks directly to the bbat, as as this iteration # progresses, more blocks may be needed for the bat itself (if there are no more gaps), and the # mbat must remain contiguous. bbat_data_len = ((@bbat.length + num_mbat_blocks) * 4 / @bbat.block_size.to_f).ceil * @bbat.block_size # now storing the excess mbat blocks also increases the size of the bbat: new_num_mbat_blocks = ([bbat_data_len / @bbat.block_size - 109, 0].max * 4 / (@bbat.block_size.to_f - 4)).ceil if new_num_mbat_blocks != num_mbat_blocks # need more space for the mbat. num_mbat_blocks = new_num_mbat_blocks elsif io.size != bbat_data_len # need more space for the bat # this may grow the bbat, depending on existing available blocks io.truncate bbat_data_len else break end end # now extract the info we want: ranges = io.ranges bbat_chain = @bbat.chain io.first_block io.close bbat_chain.each { |b| @bbat[b] = AllocationTable::BAT } # tack on the mbat stuff @header.num_bat = bbat_chain.length mbat_blocks = (0...num_mbat_blocks).map do block = @bbat.free_block @bbat[block] = AllocationTable::META_BAT block end @header.mbat_start = mbat_blocks.first || AllocationTable::EOC # now finally write the bbat, using a not resizable io. # the mode here will be 'r', which allows write atm. RangesIO.open(@io, :ranges => ranges) { |f| f.write @bbat.to_s } # this is the mbat. pad it out. bbat_chain += [AllocationTable::AVAIL] * [109 - bbat_chain.length, 0].max @header.num_mbat = num_mbat_blocks if num_mbat_blocks != 0 # write out the mbat blocks now. first of all, where are they going to be? mbat_data = bbat_chain[109..-1] # expand the mbat_data to include the linked list forward pointers. mbat_data = mbat_data.to_enum(:each_slice, @bbat.block_size / 4 - 1).to_a. zip(mbat_blocks[1..-1] + [nil]).map { |a, b| b ? a + [b] : a } # pad out the last one. mbat_data.last.push(*([AllocationTable::AVAIL] * (@bbat.block_size / 4 - mbat_data.last.length))) RangesIO.open @io, :ranges => @bbat.ranges(mbat_blocks) do |f| f.write mbat_data.flatten.pack('V*') end end # now seek back and write the header out @io.seek 0 @io.write @header.to_s + bbat_chain[0, 109].pack('V*') @io.flush end
# File lib/ole/storage/base.rb, line 338 def inspect "#<#{self.class} io=#{@io.inspect} root=#{@root.inspect}>" end
load document from file.
TODO: implement various allocationtable checks, maybe as a AllocationTable#fsck function :)
-
reterminate any chain not ending in EOC. compare file size with actually allocated blocks per file.
-
pass through all chain heads looking for collisions, and making sure nothing points to them (ie they are really heads). in both sbat and mbat
-
we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks in the bat for them.
-
maybe a check of excess data. if there is data outside the bbat.truncate.length + 1 * block_size, (eg what is used for truncate in
flush
), then maybe add some sort of message about that. it will be automatically thrown away at close time.
# File lib/ole/storage/base.rb, line 107 def load # we always read 512 for the header block. if the block size ends up being different, # what happens to the 109 fat entries. are there more/less entries? @io.rewind header_block = @io.read 512 @header = Header.new header_block # create an empty bbat. @bbat = AllocationTable::Big.new self bbat_chain = header_block[Header::SIZE..-1].unpack 'V*' mbat_block = @header.mbat_start @header.num_mbat.times do blocks = @bbat.read([mbat_block]).unpack 'V*' mbat_block = blocks.pop bbat_chain += blocks end # am i using num_bat in the right way? @bbat.load @bbat.read(bbat_chain[0, @header.num_bat]) # get block chain for directories, read it, then split it into chunks and load the # directory entries. semantics changed - used to cut at first dir where dir.type == 0 @dirents = @bbat.read(@header.dirent_start).to_enum(:each_chunk, Dirent::SIZE). map { |str| Dirent.new self, str } # now reorder from flat into a tree # links are stored in some kind of balanced binary tree # check that everything is visited at least, and at most once # similarly with the blocks of the file. # was thinking of moving this to Dirent.to_tree instead. class << @dirents def to_tree idx=0 return [] if idx == Dirent::EOT d = self[idx] to_tree(d.child).each { |child| d << child } raise FormatError, "directory #{d.inspect} used twice" if d.idx d.idx = idx to_tree(d.prev) + [d] + to_tree(d.next) end end @root = @dirents.to_tree.first @dirents.reject! { |d| d.type_id == 0 } # silence this warning by default, its not really important (issue #5). # fairly common one appears to be "R" (from office OS X?) which smells # like some kind of UTF16 snafu, but scottwillson also has had some kanji... #Log.warn "root name was #{@root.name.inspect}" unless @root.name == 'Root Entry' unused = @dirents.reject(&:idx).length Log.warn "#{unused} unused directories" if unused > 0 # FIXME i don't currently use @header.num_sbat which i should # hmm. nor do i write it. it means what exactly again? # which mode to use here? @sb_file = RangesIOResizeable.new @bbat, :first_block => @root.first_block, :size => @root.size @sbat = AllocationTable::Small.new self @sbat.load @bbat.read(@header.sbat_start) end
# File lib/ole/storage/meta_data.rb, line 145 def meta_data @meta_data ||= MetaData.new(self) end
could be useful with mis-behaving ole documents. or to just clean them up.
# File lib/ole/storage/base.rb, line 311 def repack temp=:file case temp when :file Tempfile.open 'ole-repack' do |io| io.binmode repack_using_io io end when :mem; StringIO.open(''.dup, &method(:repack_using_io)) else raise ArgumentError, "unknown temp backing #{temp.inspect}" end end
# File lib/ole/storage/base.rb, line 323 def repack_using_io temp_io @io.rewind IO.copy @io, temp_io clear Storage.open temp_io, nil, @params do |temp_ole| #temp_ole.root.type = :dir Dirent.copy temp_ole.root, root end end