class SwissMatch::Location::DataFiles

SwissMatch::Location::DataFiles

Deals with retrieving and updating the files provided by the swiss postal service, and loading the data from them.

@todo

The current handling of the urls is not clean. I don't know yet how the urls will
change over iterations.

Constants

AllCantons

The data of all cantons @private

LanguageCodes

Used to convert numerical language codes to symbols

Attributes

cantons[R]

@return [SwissMatch::Cantons] The loaded swiss cantons

communities[R]

@return [SwissMatch::Communities] The loaded swiss communities

data_directory[RW]

The directory in which the post mat files reside

date[R]

@return [Date]

The date from when the data from the swiss post master data file
starts to be valid
districts[R]

@return [SwissMatch::Districts] The loaded swiss districts

errors[R]

@return [Array<LoadError>] Errors that occurred while loading the data

random_code[R]

@return [Integer]

The random code from the swiss post master data file
zip_codes[R]

@return [SwissMatch::ZipCodes] The loaded swiss zip codes

Public Class Methods

empty() click to toggle source
# File lib/swissmatch/location/datafiles.rb, line 69
def self.empty
  data = new
  data.load_empty!

  data
end
new(data_directory=nil) click to toggle source

@param [nil, String] data_directory

The directory in which the post mat[ch] files reside
# File lib/swissmatch/location/datafiles.rb, line 105
def initialize(data_directory=nil)
  reset_errors!
  @loaded = false
  if data_directory then
    @data_directory = data_directory
  elsif ENV['SWISSMATCH_DATA'] then
    @data_directory = ENV['SWISSMATCH_DATA']
  else
    @data_directory  = File.expand_path('~/.swissmatch')
  end
end

Public Instance Methods

latest_binary_file() click to toggle source
# File lib/swissmatch/location/datafiles.rb, line 124
def latest_binary_file
  Dir.enum_for(:glob, "#{@data_directory}/locations_*.binary").last
end
load!(file=nil) click to toggle source

Loads the data into this DataFiles instance

@return [self]

Returns self.
# File lib/swissmatch/location/datafiles.rb, line 144
def load!(file=nil)
  return if @loaded

  file ||= latest_binary_file

  unless file && File.readable?(file)
    if ENV['SWISSMATCH_DATA']
      raise LoadError.new("File #{file.inspect} not found or not readable (used SWISSMATCH_DATA, data_directory=#{@data_directory}) - see https://github.com/apeiros/swissmatch-location#installation", nil)
    else
      raise LoadError.new("File #{file.inspect} not found or not readable (used ~/.swissmatch, data_directory=#{@data_directory}) - see https://github.com/apeiros/swissmatch-location#installation", nil)
    end
  end

  data = File.read(file, encoding: Encoding::BINARY)
  date, random_code, zip1_count, zip2_count, com1_count, com2_count, district_count = *data[0,18].unpack("NNn*")
  int1_size, int2_size, int4_size, text_size = *data[18,16].unpack("N*")

  offset    = 34
  int1_cols = data[offset, int1_size].unpack("C*")
  int2_cols = data[offset+=int1_size, int2_size].unpack("n*")
  int4_cols = data[offset+=int2_size, int4_size].unpack("N*")
  text_cols = data[offset+=int4_size, text_size].force_encoding(Encoding::UTF_8).split("\x1f")

  offset                    = 0
  zip1_type                 = int1_cols[offset, zip1_count]
  zip1_addon                = int1_cols[offset += zip1_count, zip1_count]
  zip1_language             = int1_cols[offset += zip1_count, zip1_count]
  zip1_language_alternative = int1_cols[offset += zip1_count, zip1_count]
  zip2_region               = int1_cols[offset += zip1_count, zip2_count]
  zip2_type                 = int1_cols[offset += zip2_count, zip2_count]
  zip2_lang                 = int1_cols[offset += zip2_count, zip2_count]
  com2_PLZZ                 = int1_cols[offset += zip2_count, com2_count]

  offset                        = 0
  zip1_onrp                     = int2_cols[offset, zip1_count]
  zip1_code                     = int2_cols[offset += zip1_count, zip1_count]
  zip1_delivery_by              = int2_cols[offset += zip1_count, zip1_count]
  zip1_largest_community_number = int2_cols[offset += zip1_count, zip1_count]
  zip2_onrp                     = int2_cols[offset += zip1_count, zip2_count]
  com1_bfsnr                    = int2_cols[offset += zip2_count, com1_count]
  com1_agglomeration            = int2_cols[offset += com1_count, com1_count]
  com2_GDENR                    = int2_cols[offset += com1_count, com2_count]
  com2_PLZ4                     = int2_cols[offset += com2_count, com2_count]
  district_GDEBZNR              = int2_cols[offset += com2_count, district_count]

  zip1_valid_from = int4_cols

  offset           = 0
  zip1_name_short  = text_cols[offset, zip1_count]
  zip1_name        = text_cols[offset += zip1_count, zip1_count]
  zip1_canton      = text_cols[offset += zip1_count, zip1_count]
  zip2_short       = text_cols[offset += zip1_count, zip2_count]
  zip2_name        = text_cols[offset += zip2_count, zip2_count]
  com1_name        = text_cols[offset += zip2_count, com1_count]
  com1_canton      = text_cols[offset += com1_count, com1_count]
  district_GDEKT   = text_cols[offset += com1_count, district_count]
  district_GDEBZNA = text_cols[offset += district_count, district_count]

  zip1     = [
    zip1_onrp, zip1_type, zip1_canton, zip1_code, zip1_addon,
    zip1_delivery_by, zip1_language, zip1_language_alternative,
    zip1_name_short, zip1_name, zip1_largest_community_number,
    zip1_valid_from
  ].transpose
  zip2     = [zip2_onrp, zip2_region, zip2_type, zip2_lang, zip2_short, zip2_name].transpose
  com1     = [com1_bfsnr, com1_name, com1_canton, com1_agglomeration].transpose
  com2     = [com2_PLZ4, com2_PLZZ, com2_GDENR].transpose
  district = [district_GDEKT, district_GDEBZNR, district_GDEBZNA].transpose

  @date        = Date.jd(date)
  @random_code = random_code
  @cantons     = AllCantons
  @districts   = load_districts(district)
  @communities = load_communities(com1)
  @zip_codes   = load_zipcodes(zip1, zip2, com2)

  self
end
load_communities(data) click to toggle source

@return [SwissMatch::Communities]

An instance of SwissMatch::Communities containing all communities defined by the
files known to this DataFiles instance.
# File lib/swissmatch/location/datafiles.rb, line 232
def load_communities(data)
  temporary = []
  complete  = {}
  data.each do |bfsnr, name, canton, agglomeration|
    canton = @cantons.by_license_tag(canton)
    if agglomeration == bfsnr then
      complete[bfsnr] = Community.new(bfsnr, name, canton, :self)
    elsif agglomeration.zero? then
      complete[bfsnr] = Community.new(bfsnr, name, canton, nil)
    else
      temporary << [bfsnr, name, canton, agglomeration]
    end
  end
  temporary.each do |bfsnr, name, canton, agglomeration|
    community = complete[agglomeration]
    raise "Incomplete community referenced by #{bfsnr}: #{agglomeration}" unless agglomeration
    complete[bfsnr] = Community.new(bfsnr, name, canton, community)
  end

  Communities.new(complete.values)
end
load_districts(data) click to toggle source
# File lib/swissmatch/location/datafiles.rb, line 223
def load_districts(data)
  Districts.new(data.map { |data|
    District.new(*data, SwissMatch::Communities.new([]))
  })
end
load_empty!() click to toggle source
# File lib/swissmatch/location/datafiles.rb, line 128
def load_empty!
  return if @loaded

  @loaded      = true
  @date        = Date.new(0)
  @random_code = 0
  @cantons     = AllCantons
  @districts   = Districts.new([])
  @communities = Communities.new([])
  @zip_codes   = ZipCodes.new([])
end
load_zipcodes(zip1_data, zip2_data, com2_data) click to toggle source

TODO: load all files, not just the most recent TODO: calculate valid_until dates

@return [SwissMatch::ZipCodes]

An instance of SwissMatch::ZipCodes containing all zip codes defined by the
files known to this DataFiles instance.
# File lib/swissmatch/location/datafiles.rb, line 260
def load_zipcodes(zip1_data, zip2_data, com2_data)
  community_mapping = Hash.new { |h,k| h[k] = [] }
  self_delivered    = []
  others            = []
  temporary         = {}

  com2_data.each do |*key, value|
    community_mapping[key] << value
  end

  zip1_data.each do |onrp, type, canton, code, addon, delivery_by, lang, lang_alt, name_short, name, largest_community_number, valid_from|
    delivery_by               = case delivery_by when 0 then nil; when onrp then :self; else delivery_by; end
    language                  = LanguageCodes[lang]
    language_alternative      = LanguageCodes[lang_alt]
    name_short                = Name.new(name_short, language)
    name                      = Name.new(name, language)

    # compact, because some communities already no longer exist, so by_community_numbers can
    # contain nils which must be removed
    community_numbers         = (community_mapping[[code, addon]] | [largest_community_number]).sort
    communities               = Communities.new(@communities.by_community_numbers(*community_numbers).compact)

    data                      = [
      onrp,                              # ordering_number
      type,                              # type
      code,
      addon,
      name,                              # name (official)
      [name],                            # names (official + alternative)
      name_short,                        # name_short (official)
      [name_short],                      # names_short (official + alternative)
      [],                                # PLZ2 type 3 short names (additional region names)
      [],                                # PLZ2 type 3 names (additional region names)
      cantons.by_license_tag(canton),    # canton
      language,
      language_alternative,
      false,                             # sortfile_member TODO: remove
      delivery_by,                       # delivery_by
      communities.by_community_number(largest_community_number),  # community_number
      communities,
      Date.jd(valid_from) # valid_from
    ]
    temporary[onrp] = data
    if :self == delivery_by then
      self_delivered << data
    else
      others << data
    end
  end

  zip2_data.each do |onrp, rn, type, lang, short, name|
    onrp      = onrp
    lang_code = lang
    language  = LanguageCodes[lang_code]
    entry     = temporary[onrp]
    if type == 2
      entry[5] << Name.new(name, language, rn)
      entry[7] << Name.new(short, language, rn)
    elsif type == 3
      entry[8] << Name.new(name, language, rn)
      entry[9] << Name.new(short, language, rn)
    end
  end

  self_delivered.each do |row|
    temporary[row[0]] = ZipCode.new(*row)
  end
  others.each do |row|
    if row[14] then
      raise "Delivery not found:\n#{row.inspect}" unless tmp = temporary[row[14]]
      if tmp.kind_of?(Array) then
        @errors << LoadError.new("Invalid reference: onrp #{row.at(0)} delivery by #{row.at(14)}", row)
        row[14] = nil
      else
        row[14] = tmp
      end
    end
    temporary[row[0]] = ZipCode.new(*row)
  end

  ZipCodes.new(temporary.values)
end
reset_errors!() click to toggle source

Resets the list of errors that were encountered during load @return [self]

# File lib/swissmatch/location/datafiles.rb, line 119
def reset_errors!
  @errors = []
  self
end