class Warc::Stream

Constants

DEFAULT_OPTS

Attributes

parser[R]

Public Class Methods

new(fh,options={},&block) click to toggle source
# File lib/warc/stream.rb, line 28
def initialize(fh,options={},&block)
  @options = DEFAULT_OPTS.merge options
  @index = 0
  fh = case fh
  when ::File
    @name = ::File.basename(fh)
    fh
  when String
    @name = fh
    @naming_proc = block || lambda {|name,index| "#{name}.#{sprintf('%06d',index)}"} 
    next_file_handle
  end
  @file_handle=fh
  @parser = ::Warc::Parser.new
end

Public Instance Methods

close() click to toggle source
# File lib/warc/stream.rb, line 67
def close
  @file_handle.close
end
each(offset=0) { |rec| ... } click to toggle source
# File lib/warc/stream.rb, line 44
def each(offset=0,&block)
  @file_handle.seek(offset,::IO::SEEK_SET)
  loop do
    position = @file_handle.tell
    rec = self.read_record
    if rec
      rec.offset = position
      if block_given?
        block.call(rec)
      else
        yield rec
      end
    else
      break
    end
  end
end
read_record() click to toggle source
# File lib/warc/stream.rb, line 71
def read_record
  raise StandardError
end
record(offset=0) click to toggle source
# File lib/warc/stream.rb, line 62
def record(offset=0)
  @file_handle.seek(offset,::IO::SEEK_SET)
  self.read_record
end
size() click to toggle source
# File lib/warc/stream.rb, line 83
def size
  @file_handle.stat.size
end
write_record(record) click to toggle source
# File lib/warc/stream.rb, line 75
def write_record(record)
  # Go to end of file
  @file_handle.seek(0,::IO::SEEK_END)
  expected_size = record.header.content_length + @file_handle.tell
  next_file_handle if (expected_size > @options[:max_filesize])
  record.offset = @file_handle.tell
end

Private Instance Methods

next_file_handle() click to toggle source
# File lib/warc/stream.rb, line 89
def next_file_handle
  @file_handle.close if @file_handle
  @index += 1
  path = @naming_proc.call(@name,@index)
  @file_handle = ::File.new(path + @ext,'a+')
end