class Libis::RosettaChecker::FilesToIngestCleanup
Constants
- CSV_HEADER
- FIND_SQL
- LOG_PATTERN
- MSG_CALC_FC
- MSG_CHCK_DB
- MSG_DEFLATE
- SQL_DATA
Attributes
cfg[RW]
connection[RW]
cursor[RW]
logger[RW]
report[RW]
Public Class Methods
command()
click to toggle source
# File lib/libis/rosetta_checker/files_to_ingest_cleanup.rb, line 21 def self.command 'files2ingest'.freeze end
new(cfg)
click to toggle source
# File lib/libis/rosetta_checker/files_to_ingest_cleanup.rb, line 37 def initialize(cfg) @cfg = cfg setup_logging setup_db end
options_class()
click to toggle source
# File lib/libis/rosetta_checker/files_to_ingest_cleanup.rb, line 25 def self.options_class FilesToIngestCleanupOptions end
run()
click to toggle source
Calls superclass method
# File lib/libis/rosetta_checker/files_to_ingest_cleanup.rb, line 29 def self.run super do |cfg| self.new(cfg).run(ARGV) end end
short_desc()
click to toggle source
# File lib/libis/rosetta_checker/files_to_ingest_cleanup.rb, line 17 def self.short_desc 'Report on files that are/are not ingested'.freeze end
Public Instance Methods
finalize()
click to toggle source
# File lib/libis/rosetta_checker/files_to_ingest_cleanup.rb, line 45 def finalize cursor.close if cursor connection.logoff if connection end
run(argv)
click to toggle source
# File lib/libis/rosetta_checker/files_to_ingest_cleanup.rb, line 50 def run(argv) raise ArgumentError, 'Need to specify at least a directory/file to parse' unless argv.size > 0 while (dir = argv.shift) process_dir dir next if argv.empty? self.class.parse_options(argv) setup_logging end end
Protected Instance Methods
check_file(info)
click to toggle source
# File lib/libis/rosetta_checker/files_to_ingest_cleanup.rb, line 252 def check_file(info) logger.info MSG_CHCK_DB cursor.bind_param(':filesize', info[:size].to_i) cursor.bind_param(':checksum', info[:md5].to_s) cursor.exec info[:found] = 0 info_list = [] while (found = cursor.fetch_hash) dnx = found['DNX'].read next unless dnx =~ /<key id="fixityType">SHA1<\/key><key id="fixityValue">#{info[:sha1]}<\/key>/ SQL_DATA.each {|x| info[x.to_sym] = found[x.upcase]} logger.info " found match: #{info[:ie_id]}/#{info[:rep_id]}/#{info[:fl_id]}" if info[:original_name] =~ Regexp.new(info[:file].split(/[ #._-]/).join('.*')) logger.info " name matches: #{info[:original_name]}" info[:name_match] = true else info[:name_match] = false end info_list << info.dup end if info_list.empty? to_report info else info_list.each do |i| i[:found] = info_list.count to_report i end end info_list.size end
checksum_file(reader, info)
click to toggle source
# File lib/libis/rosetta_checker/files_to_ingest_cleanup.rb, line 238 def checksum_file(reader, info) info[:size] = 0 md5 = Digest::MD5.new sha1 = Digest::SHA1.new while (data = reader.read 2048000) do info[:size] += data.length md5 << data sha1 << data end reader.close info[:md5] = md5.hexdigest info[:sha1] = sha1.hexdigest end
process_dir(dir)
click to toggle source
# File lib/libis/rosetta_checker/files_to_ingest_cleanup.rb, line 127 def process_dir(dir) if File.directory?(dir) unless Dir.exist?(dir) logger.error "Directory '#{dir}' does not exist" return nil end unless File.readable?(dir) logger.error "Directory '#{dir}' cannot be read" return nil end logger.info "Processing dir '#{dir}'" Dir.entries(dir).each do |entry| next if %w'. ..'.include? entry path = File.join dir, entry begin process_dir path if @cfg.recursive next end if File.directory?(path) process_file path end elsif dir[0] == '@' file = to_file(dir[1..-1]) return nil unless file logger.info "Processing input file '#{file}'" File.open(file, 'r').each_line do |line| process_file(line.chomp, File.dirname(file)) end elsif File.file?(dir) process_file(dir) else raise ArgumentError, "Argument '#{dir}' should refer to an existing and readable file or directory" end end
process_file(_file, *search_dir)
click to toggle source
# File lib/libis/rosetta_checker/files_to_ingest_cleanup.rb, line 179 def process_file(_file, *search_dir) file = to_file(_file, *search_dir) return unless file if File.directory?(file) process_dir(file) return end info = { parent_type: 'D', parent: File.dirname(file), file: File.basename(file) } logger.info "- #{file}" if File.extname(file) == '.bz2' logger.info MSG_DEFLATE info[:parent_type] = 'F' info[:parent] = file info[:file] = File.basename file, '.bz2' logger.info MSG_CALC_FC checksum_file Bzip2::FFI::Reader.open(file), info check_file info elsif File.extname(file) == '.zip' begin logger.info MSG_CALC_FC checksum_file File.open(file), info unless check_file info > 0 logger.info ' - Unpacking'.freeze info[:parent_type] = 'Z' info[:parent] = file Zip::File.open(file) do |zip| zip.each do |entry| next if entry.directory? info[:file] = entry.name logger.info "- #{file}/#{entry.name}" logger.info MSG_CALC_FC checksum_file entry.get_input_stream, info check_file info end end end rescue Zip::ZipError logger.error "Could not unpack file '#{file}'" rescue Exception logger.error "Could not access file '#{file}'" end else begin logger.info MSG_CALC_FC checksum_file File.open(file), info check_file info rescue Exception logger.error "Could not access file '#{file}'" end end end
setup_db()
click to toggle source
# File lib/libis/rosetta_checker/files_to_ingest_cleanup.rb, line 121 def setup_db @connection = OCI8.new(@cfg.dbuser, @cfg.dbpass, @cfg.dburl) @cursor = @connection.parse(FIND_SQL) @cursor.prefetch_rows = 10 end
setup_logging()
click to toggle source
# File lib/libis/rosetta_checker/files_to_ingest_cleanup.rb, line 108 def setup_logging Logging.logger.root.level = :info @logger = Logging.logger[self.class.command] @logger.appenders = [Logging.appenders.stdout] Logging.appenders.stdout.level = (@cfg.quiet ? :warn : :info) @cfg.log_file = nil if @cfg.log_file&.chomp&.strip&.empty? @logger.add_appenders Logging.appenders.file( @cfg.log_file, truncate: false, layout: Logging.layouts.pattern(pattern: LOG_PATTERN) ) if @cfg.log_file end
to_file(file, *search_dirs)
click to toggle source
# File lib/libis/rosetta_checker/files_to_ingest_cleanup.rb, line 161 def to_file(file, *search_dirs) if File.exist?(file) return file if File.readable?(file) logger.error "File '#{file}' cannot be read" return nil end search_dirs.each do |dir| f = File.join(dir, file) if File.exist? f return f if File.readable?(f) logger.error "File '#{f}' cannot be read" return nil end end logger.error "File '#{file}' does not exist" nil end
to_report(info = nil)
click to toggle source
# File lib/libis/rosetta_checker/files_to_ingest_cleanup.rb, line 292 def to_report(info = nil) return unless @cfg.report unless @report @report ||= CSV.open(@cfg.report_file, 'wb') @report << CSV_HEADER end @report << CSV_HEADER.map {|x| info[x.to_sym]} if info end