class Textfile

Attributes

debug[RW]
logger[RW]
path[RW]
sorted[RW]
tmpdir[RW]

Public Class Methods

new(path, options = {}) click to toggle source

options

  • :bufsiz - Passed to GNU sort to optimize performance.

  • :debug - Suppress deletion of temp files.

  • :lang - Collation sequence.

  • :logger - Logs shell commands and resulting ouput (default: STDOUT).

# File lib/textfile.rb, line 13
def initialize(path, options = {})
  @bufize = options[:bufsiz]
  @debug = options[:debug]
  @lang = options[:lang] || 'en_US.UTF-8'
  @logger = options[:logger] || Logger.new(STDOUT)
  @path = path
end

Public Instance Methods

clear() click to toggle source

Removes all records.

# File lib/textfile.rb, line 22
def clear
  sh "cat /dev/null > #{@path}"
end
intersection(textfile) click to toggle source

Removes records not present in other textfile.

# File lib/textfile.rb, line 27
def intersection(textfile)
  comm(textfile, '-12')
end
merge(*textfiles) click to toggle source

Merges the contents of other textfiles and returns self.

# File lib/textfile.rb, line 32
def merge(*textfiles)
  sh "cat #{textfiles.map(&:path).join(' ')} >> #{@path}"
  self.sort
end
subtract(textfile) click to toggle source

Remove records present in other textfile.

# File lib/textfile.rb, line 38
def subtract(textfile)
  # --nocheck-order, see https://bugzilla.redhat.com/show_bug.cgi?id=1001775
  comm(textfile, '--nocheck-order -23')
end

Protected Instance Methods

comm(textfile, options) click to toggle source
# File lib/textfile.rb, line 50
def comm(textfile, options)
  self.sort
  textfile.sort
  with_tempcopy do |tempcopy|
    sh "#{comm_cmd} #{options} #{tempcopy} #{textfile.path} > #{@path}"
  end
end
comm_cmd() click to toggle source

OS X comm can't handle lines > 2K bytes. See apple.stackexchange.com/questions/69223/how-to-replace-mac-os-x-utilities-with-gnu-core-utilities

# File lib/textfile.rb, line 46
def comm_cmd() (RUBY_PLATFORM =~ /darwin/ ? 'gcomm' : 'comm') end
sh(cmd) click to toggle source
# File lib/textfile.rb, line 58
def sh(cmd)
  cmd = "export LC_COLLATE=#{@lang}; #{cmd}" if @lang
  logger.info cmd;
  logger.info %x[ #{cmd} ] # TODO: capture $?
  self
end
sort() click to toggle source

Sorts file and removes any duplicate records.

# File lib/textfile.rb, line 66
def sort
  return self if sorted
  options = "--buffer-size=#{@bufsiz}" if @bufsiz
  with_tempcopy do |tempcopy|
    sh "#{sort_cmd} #{options} #{tempcopy} | #{uniq_cmd} > #{@path}"
  end
  @sorted = true
  self
end
sort_cmd() click to toggle source
# File lib/textfile.rb, line 47
def sort_cmd() (RUBY_PLATFORM =~ /darwin/ ? 'gsort' : 'sort') end
uniq_cmd() click to toggle source
# File lib/textfile.rb, line 48
def uniq_cmd() (RUBY_PLATFORM =~ /darwin/ ? 'guniq' : 'uniq') end
with_tempcopy() { |path| ... } click to toggle source
# File lib/textfile.rb, line 77
def with_tempcopy
  tempcopy = Tempfile.new(['temp-','.txt'], tmpdir)
  tempcopy.write(File.read(@path))
  tempcopy.close
  yield tempcopy.path
  tempcopy.unlink unless @debug
  self
end