class Warc::Record::Header

Constants

NAMED_FIELDS

Set of field names defined in the spec

REQUIRED_FIELDS

Attributes

record[R]

WARC field names are case-insensitive header == header

Public Class Methods

new(record,h={}) click to toggle source
Calls superclass method
# File lib/warc/record/header.rb, line 38
def initialize(record,h={})
  @record=record
  super(h)
end

Public Instance Methods

block_digest() click to toggle source
# File lib/warc/record/header.rb, line 63
def block_digest
  self["warc-block-digest"] ||= compute_digest(self.record.content)
end
compute_digest(content) click to toggle source
# File lib/warc/record/header.rb, line 67
def compute_digest(content)
  "sha256:" + (Digest::SHA256.hexdigest(content))
end
content_length() click to toggle source
# File lib/warc/record/header.rb, line 43
def content_length
(self["content-length"] ||= self.record.content.length rescue 0).to_i
end
date() click to toggle source
# File lib/warc/record/header.rb, line 47
def date
  Time.parse(self["warc-date"]).iso8601 ||= Time.now.iso8601
end
date=(d) click to toggle source
# File lib/warc/record/header.rb, line 51
def date=(d)
  self["warc-date"] = Time.parse(d).iso8601
end
record_id() click to toggle source
# File lib/warc/record/header.rb, line 59
def record_id
  self["warc-record-id"] ||= sprintf("<urn:uuid:%s>",UUID.generate)
end
to_s() click to toggle source
# File lib/warc/record/header.rb, line 75
def to_s
  crfl="\r\n"
  str = String.new
  str << "WARC-Type: #{self.type}" + crfl
  str << "WARC-Record-ID: #{self.record_id}" + crfl
  str << "WARC-Date: #{self.date}" + crfl
  str << "Content-Length: #{self.content_length}" + crfl
  each do |k,v|
    str << "#{k}: #{v}#{crfl}" unless REQUIRED_FIELDS.map(&:downcase).include?(k)
  end
  return str
end
type() click to toggle source
# File lib/warc/record/header.rb, line 55
def type
  self["warc-type"]
end
uri() click to toggle source
# File lib/warc/record/header.rb, line 71
def uri
  self["warc-target-uri"]
end