class Datafile::ZipDataset

Public Class Methods

new( dataset ) click to toggle source
# File lib/datafile/workers/zip/dataset.rb, line 11
def initialize( dataset )
  @dataset = dataset
end

Public Instance Methods

download() click to toggle source
# File lib/datafile/workers/zip/dataset.rb, line 34
def download
  logger.info( "download dataset '#{@dataset.name}'" )
  logger.info( "   from '#{remote_zip_url}'" )
  logger.info( "   to '#{local_zip_path}'..." )

  download_blob( remote_zip_url, local_zip_path )
end
dump() click to toggle source
# File lib/datafile/workers/zip/dataset.rb, line 43
def dump
  ## for debuggin dump dataset (also check if zip exits)
  puts "dataset '#{@dataset.name}' opts=#{@dataset.opts.to_json}"     ## use opts.inspect instead of to_json - why? why not?
  puts "  local '#{local_zip_name}' (#{local_zip_path})"
  if File.exist?( local_zip_path )
    puts "    size: #{File.size(local_zip_path)} bytes"
  else
    puts "    (file not found)"
  end
  puts "  remote '#{remote_zip_url}'"
end
local_zip_name() click to toggle source
# File lib/datafile/workers/zip/dataset.rb, line 19
def local_zip_name
  ### note: replace / in name w/ --I--
  ##  e.g. flatten the filename, that is, do NOT include any folders
  @dataset.name.gsub('/', '--I--')   # note: will NOT include/return .zip extension
end
local_zip_path() click to toggle source
# File lib/datafile/workers/zip/dataset.rb, line 29
def local_zip_path  # local zip path
  "#{local_zip_root}/#{local_zip_name}.zip"
end
local_zip_root() click to toggle source
# File lib/datafile/workers/zip/dataset.rb, line 25
def local_zip_root
  "./tmp"
end
read() click to toggle source
# File lib/datafile/workers/zip/dataset.rb, line 55
def read
  if @dataset.is_a?( FootballDataset )
    logger.info( "read football dataset (zip) '#{@dataset.name}', '#{@dataset.setup}'" )

    pack = SportDb::ZipPackage.new( local_zip_path )
    pack.read( season: @dataset.setup )   ##  note: pass on (optional) setup arg as season (filter) arg for now
  else
    logger.info( "TODO/FIX: read dataset (zip) '#{@dataset.name}', '#{@dataset.setup}'; sorry" )
  end
end
remote_zip_url() click to toggle source
# File lib/datafile/workers/zip/dataset.rb, line 15
def remote_zip_url  # remote zip url
  "https://github.com/#{@dataset.name}/archive/master.zip"
end

Private Instance Methods

download_blob( url, dest ) click to toggle source

download tasks for zips

# File lib/datafile/workers/zip/dataset.rb, line 70
def download_blob( url, dest )
  logger.info "downloading #{url} to #{dest}..."

  ## make sure dest path exists
  dest_p = File.dirname( dest )
  FileUtils.mkdir_p( dest_p )  unless File.exists?( dest_p )   ## use Dir.exists?? why? why not??

  worker = Fetcher::Worker.new
  worker.copy( url, dest )
  ## print some file stats
  logger.debug "  size: #{File.size(dest)} bytes"
end