class Textfile
Attributes
debug[RW]
logger[RW]
path[RW]
sorted[RW]
tmpdir[RW]
Public Class Methods
new(path, options = {})
click to toggle source
options
-
:bufsiz
- Passed to GNU sort to optimize performance. -
:debug
- Suppress deletion of temp files. -
:lang
- Collation sequence. -
:logger
- Logs shell commands and resulting ouput (default: STDOUT).
# File lib/textfile.rb, line 13 def initialize(path, options = {}) @bufize = options[:bufsiz] @debug = options[:debug] @lang = options[:lang] || 'en_US.UTF-8' @logger = options[:logger] || Logger.new(STDOUT) @path = path end
Public Instance Methods
clear()
click to toggle source
Removes all records.
# File lib/textfile.rb, line 22 def clear sh "cat /dev/null > #{@path}" end
intersection(textfile)
click to toggle source
Removes records not present in other textfile.
# File lib/textfile.rb, line 27 def intersection(textfile) comm(textfile, '-12') end
merge(*textfiles)
click to toggle source
Merges the contents of other textfiles and returns self.
# File lib/textfile.rb, line 32 def merge(*textfiles) sh "cat #{textfiles.map(&:path).join(' ')} >> #{@path}" self.sort end
subtract(textfile)
click to toggle source
Remove records present in other textfile.
# File lib/textfile.rb, line 38 def subtract(textfile) # --nocheck-order, see https://bugzilla.redhat.com/show_bug.cgi?id=1001775 comm(textfile, '--nocheck-order -23') end
Protected Instance Methods
comm(textfile, options)
click to toggle source
# File lib/textfile.rb, line 50 def comm(textfile, options) self.sort textfile.sort with_tempcopy do |tempcopy| sh "#{comm_cmd} #{options} #{tempcopy} #{textfile.path} > #{@path}" end end
comm_cmd()
click to toggle source
OS X comm can't handle lines > 2K bytes. See apple.stackexchange.com/questions/69223/how-to-replace-mac-os-x-utilities-with-gnu-core-utilities
# File lib/textfile.rb, line 46 def comm_cmd() (RUBY_PLATFORM =~ /darwin/ ? 'gcomm' : 'comm') end
sh(cmd)
click to toggle source
# File lib/textfile.rb, line 58 def sh(cmd) cmd = "export LC_COLLATE=#{@lang}; #{cmd}" if @lang logger.info cmd; logger.info %x[ #{cmd} ] # TODO: capture $? self end
sort()
click to toggle source
Sorts file and removes any duplicate records.
# File lib/textfile.rb, line 66 def sort return self if sorted options = "--buffer-size=#{@bufsiz}" if @bufsiz with_tempcopy do |tempcopy| sh "#{sort_cmd} #{options} #{tempcopy} | #{uniq_cmd} > #{@path}" end @sorted = true self end
sort_cmd()
click to toggle source
# File lib/textfile.rb, line 47 def sort_cmd() (RUBY_PLATFORM =~ /darwin/ ? 'gsort' : 'sort') end
uniq_cmd()
click to toggle source
# File lib/textfile.rb, line 48 def uniq_cmd() (RUBY_PLATFORM =~ /darwin/ ? 'guniq' : 'uniq') end
with_tempcopy() { |path| ... }
click to toggle source
# File lib/textfile.rb, line 77 def with_tempcopy tempcopy = Tempfile.new(['temp-','.txt'], tmpdir) tempcopy.write(File.read(@path)) tempcopy.close yield tempcopy.path tempcopy.unlink unless @debug self end