class Relaton3gpp::DataFetcher

Constants

CURRENT

Public Class Methods

fetch(output: "data", format: "yaml") click to toggle source

Initialize fetcher and run fetch

@param [Strin] output directory to save files, default: “data” @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml

# File lib/relaton_3gpp/data_fetcher.rb, line 28
def self.fetch(output: "data", format: "yaml")
  t1 = Time.now
  puts "Started at: #{t1}"
  FileUtils.mkdir_p output unless Dir.exist? output
  new(output, format).fetch
  t2 = Time.now
  puts "Stopped at: #{t2}"
  puts "Done in: #{(t2 - t1).round} sec."
end
new(output, format) click to toggle source

Data fetcher initializer

@param [String] output directory to save files @param [String] format format of output files (xml, yaml, bibxml)

# File lib/relaton_3gpp/data_fetcher.rb, line 10
def initialize(output, format)
  require "fileutils"
  require "net/ftp"
  require "zip"
  require "mdb"

  @output = output
  @format = format
  @ext = format.sub(/^bib/, "")
  @files = []
end

Public Instance Methods

fetch() click to toggle source

Parse documents

# File lib/relaton_3gpp/data_fetcher.rb, line 41
def fetch # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
  file = get_file
  return unless file

  Zip::File.open(file) do |zip_file|
    enntry = zip_file.glob("status_smg_3GPP.mdb").first
    File.open("status_smg_3GPP.mdb", "wb") do |f|
      f.write enntry.get_input_stream.read
    end
  end
  dbs = Mdb.open "status_smg_3GPP.mdb"
  specs = dbs["Specs_GSM+3G"]
  specrels = dbs["Specs_GSM+3G_release-info"]
  releases = dbs["Releases"]
  tstatus = dbs["temp-status"]
  dbs["2001-04-25_schedule"].each do |row|
    fetch_doc row, specs, specrels, releases, tstatus
  end
  File.write CURRENT, @current.to_yaml, encoding: "UTF-8"
end
fetch_doc(row, specs, specrels, releases, tstatus) click to toggle source

Fetch document

@param [Hash] row row from mdb @param [Array<Hash>] specs specs @param [Array<Hash>] specrels specrels @param [Array<Hash>] releases releases @param [Array<Hash>] tstatus tstatus

@return [Relaton3gpp::BibliographicItem, nil] bibliographic item

# File lib/relaton_3gpp/data_fetcher.rb, line 102
def fetch_doc(row, specs, specrels, releases, tstatus)
  doc = Parser.parse row, specs, specrels, releases, tstatus
  save_doc doc
rescue StandardError => e
  warn "Error: #{e.message}"
  warn "PubID: #{row[:spec]}:#{row[:release]}/#{row[:MAJOR_VERSION_NB]}."\
       "#{row[:TECHNICAL_VERSION_NB]}.#{row[:EDITORIAL_VERSION_NB]}"
  warn e.backtrace[0..5].join("\n")
end
file_name(bib) click to toggle source

Generate file name

@param [RelatonW3c::W3cBibliographicItem] bib bibliographic item

@return [String] file name

# File lib/relaton_3gpp/data_fetcher.rb, line 141
def file_name(bib)
  name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
  File.join @output, "#{name}.#{@ext}"
end
get_file() click to toggle source

Get file from FTP

@return [String] file name

# File lib/relaton_3gpp/data_fetcher.rb, line 67
def get_file # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity
  @current = YAML.load_file CURRENT if File.exist? CURRENT
  @current ||= {}
  n = 0
  begin
    ftp = Net::FTP.new("www.3gpp.org")
    ftp.resume = true
    ftp.login
    ftp.chdir "/Information/Databases/Spec_Status/"
    d, t, _, file = ftp.list("*.zip").first.split
    dt = DateTime.strptime("#{d} #{t}", "%m-%d-%y %I:%M%p")
    return if file == @current["file"] && dt == DateTime.parse(@current["date"])

    ftp.getbinaryfile file
  rescue Net::ReadTimeout => e
    n += 1
    retry if n < 5
    raise e
  end
  @current["file"] = file
  @current["date"] = dt.to_s
  file
end
save_doc(bib) click to toggle source

Save document to file

@param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item

# File lib/relaton_3gpp/data_fetcher.rb, line 117
def save_doc(bib) # rubocop:disable Metrics/MethodLength
  return unless bib

  c = case @format
      when "xml" then bib.to_xml(bibdata: true)
      when "yaml" then bib.to_hash.to_yaml
      else bib.send("to_#{@format}")
      end
  file = file_name(bib)
  if @files.include? file
    warn "File #{file} already exists. Document: #{bib.docnumber}"
  else
    @files << file
  end
  File.write file, c, encoding: "UTF-8"
end