module MongoOplogBackup::Oplog
Constants
- FILENAME_RE
Public Class Methods
each_document(filename) { |from_bson| ... }
click to toggle source
# File lib/mongo_oplog_backup/oplog.rb, line 4 def self.each_document(filename) yield_bson_document = Proc.new do |stream| while !stream.eof? # FIXME: Since bson4, from_bson takes a ByteArray instead of a StringIO yield BSON::Document.from_bson(stream) end end if gzip_fingerprint(filename) Zlib::GzipReader.open(filename, &yield_bson_document) else File.open(filename, 'rb', &yield_bson_document) end end
find_oplogs(dir)
click to toggle source
# File lib/mongo_oplog_backup/oplog.rb, line 113 def self.find_oplogs(dir) files = Dir.glob(File.join(dir, 'oplog-*.bson*')) files.keep_if {|name| name =~ FILENAME_RE} files.sort! {|a, b| timestamps_from_filename(a)[:first] <=> timestamps_from_filename(b)[:first]} files end
gzip_fingerprint(filename)
click to toggle source
# File lib/mongo_oplog_backup/oplog.rb, line 128 def self.gzip_fingerprint filename bytes = File.read(filename, 2, 0) bytes[0] == "\x1f".force_encoding('BINARY') && bytes[1] == "\x8b".force_encoding('BINARY') end
merge(target, source_files, options={})
click to toggle source
# File lib/mongo_oplog_backup/oplog.rb, line 46 def self.merge(target, source_files, options={}) limit = options[:limit] # TODO: use force = options[:force] compress = !!options[:gzip] process_output = Proc.new do |output| last_timestamp = nil first = true source_files.each do |filename| timestamps = timestamps_from_filename(filename) if timestamps expected_first = timestamps[:first] expected_last = timestamps[:last] else expected_first = nil expected_last = nil end # Optimize: # We can assume that the timestamps are in order. # This means we only need to find the first non-overlapping point, # and the rest we can pass through directly. MongoOplogBackup.log.debug "Reading #{filename}" last_file_timestamp = nil skipped = 0 wrote = 0 first_file_timestamp = nil Oplog.each_document(filename) do |doc| timestamp = doc['ts'] first_file_timestamp = timestamp if first_file_timestamp.nil? # gzip stores the mtime in the header, so we set it explicity for consistency between runs. output.mtime = first_file_timestamp.seconds if output.mtime.to_i == 0 if !last_timestamp.nil? && timestamp <= last_timestamp skipped += 1 elsif !last_file_timestamp.nil? && timestamp <= last_file_timestamp raise "Timestamps out of order in #{filename}" else output.write(doc.to_bson) wrote += 1 last_timestamp = timestamp end last_file_timestamp = timestamp end if expected_first && first_file_timestamp != expected_first raise "#{expected_first} was not the first timestamp in #{filename}" end if expected_last && last_file_timestamp != expected_last raise "#{expected_last} was not the last timestamp in #{filename}" end MongoOplogBackup.log.info "Wrote #{wrote} and skipped #{skipped} oplog entries from #{filename}" raise "Overlap must be exactly 1" unless first || skipped == 1 || force first = false end end if (compress) Zlib::GzipWriter.open(target, &process_output) else File.open(target, 'wb', &process_output) end end
merge_backup(dir)
click to toggle source
# File lib/mongo_oplog_backup/oplog.rb, line 120 def self.merge_backup(dir) oplogs = find_oplogs(dir) compress_target = oplogs.any? { |o| o.end_with?('.gz') } target = File.join(dir, 'dump', 'oplog.bson') # Mongorestore expects this filename, without a gzip suffix. FileUtils.mkdir_p(File.join(dir, 'dump')) merge(target, oplogs, {gzip: compress_target}) end
oplog_timestamps(filename)
click to toggle source
# File lib/mongo_oplog_backup/oplog.rb, line 19 def self.oplog_timestamps(filename) timestamps = [] each_document(filename) do |doc| # This can be optimized by only decoding the timestamp # (first field), instead of decoding the entire document. timestamps << doc['ts'] end timestamps end
timestamps_from_filename(filename)
click to toggle source
# File lib/mongo_oplog_backup/oplog.rb, line 31 def self.timestamps_from_filename filename match = FILENAME_RE.match(filename) return nil unless match s1 = match[1].to_i i1 = match[2].to_i s2 = match[3].to_i i2 = match[4].to_i first = BSON::Timestamp.new(s1, i1) last = BSON::Timestamp.new(s2, i2) { first: first, last: last } end