class SwissMatch::Location::Converter

SwissMatch::Location::Converter

Converts the files supplied by post.ch and bfs.admin.ch into a single binary file which is faster to load

Format:

Byte 0...4: PostMatch master file date, in Date.jd format
Byte 4...8: PostMach master file random code
Byte 8...18: zip1_count, zip2_count, community1_count, community2_count, district_count; packed with N*
Byte 18...34: bytesizes of int1_columns, int2_columns, int4_columns and text_columns
Byte 34...-1: int1_columns + int2_columns + int4_columns + text_columns

int1_columns: packed with C* the columns
* zip1_type
* zip1_addon
* zip1_language
* zip1_language_alternative
* zip2_region
* zip2_type
* zip2_lang
* com2_PLZZ

int2_columns: packed with n* the columns
* zip1_onrp
* zip1_code
* zip1_delivery_by
* zip1_largest_community_number
* zip2_onrp
* com1_bfsnr
* com1_agglomeration
* com2_GDENR
* com2_PLZ4
* district_GDEBZNR

int4_columns: packed with N* the columns
* zip1_valid_from

text_columns: joined with \x1f
* zip1_name_short
* zip1_name
* zip1_canton
* zip2_short
* zip2_name
* com1_name
* com1_canton
* district_GDEKT
* district_GDEBZNA

Public Class Methods

new(match_path, districts_path=nil, communities_path=nil) click to toggle source
# File lib/swissmatch/location/converter.rb, line 53
def initialize(match_path, districts_path=nil, communities_path=nil)
  @match_path       = match_path
  @districts_path   = districts_path || gem_districts_path
  @communities_path = communities_path || gem_communities_path
  @data             = nil
end

Public Instance Methods

convert() click to toggle source
# File lib/swissmatch/location/converter.rb, line 79
def convert
  match_data       = File.read(@match_path, encoding: Encoding::Windows_1252).encode(Encoding::UTF_8)
  districts_data   = File.read(@districts_path, encoding: Encoding::Windows_1252).encode(Encoding::UTF_8)
  communities_data = File.read(@communities_path, encoding: Encoding::Windows_1252).encode(Encoding::UTF_8)

  r_base        = generate_expression(3, ';', '\r\n')
  r_zip_1       = generate_expression(16, ';', '\r\n')
  r_zip_2       = generate_expression(7, ';', '\r\n')
  r_community1  = generate_expression(5, ';', '\r\n')
  r_community2  = generate_expression(10, ',', '(?:\n|\z)')
  r_district    = generate_expression(3, ',', '\n')

  start_zip1 = match_data.index(/^01/)
  start_zip2 = match_data.index(/^02/, start_zip1)
  start_com  = match_data.index(/^03/, start_zip2)
  end_com    = match_data.index(/^04/, start_com)

  base      = match_data[0...start_zip1].scan(r_base).first
  zip1      = match_data[start_zip1...start_zip2].scan(r_zip_1); zip1.size
  zip2      = match_data[start_zip2...start_com].scan(r_zip_2); zip2.size
  com1      = match_data[start_com...end_com].scan(r_community1); com1.size
  com2      = communities_data.scan(r_community2); com2.size
  districts = districts_data.scan(r_district); districts.size

  zip1_columns = zip1.transpose; 0
  zip2_columns = zip2.transpose; 0
  com1_columns  = com1.transpose; 0
  com2_columns  = com2.transpose; 0
  dist_columns  = districts.transpose; 0

  int1_columns = (
    zip1_columns.values_at(3,5,10,11).flatten+
    zip2_columns.values_at(2,3,4).flatten+
    com2_columns[8]
  ).map(&:to_i).pack("C*")

  int2_columns = (
    zip1_columns.values_at(1,4,12,2).flatten+
    zip2_columns[1]+
    com1_columns.values_at(1,4).flatten+
    com2_columns[4]+
    com2_columns[7]+
    dist_columns[1]
  ).map(&:to_i).pack("n*")

  int4_columns = (
    zip1_columns[13].map { |date| Date.civil(*date.match(/^(\d{4})(\d\d)(\d\d)$/).captures.map(&:to_i)).jd }
  ).pack("N*")

  text_columns = (
    zip1_columns.values_at(7,8,9).flatten+
    zip2_columns[5]+
    zip2_columns[6]+
    com1_columns[2]+
    com1_columns[3]+
    dist_columns[0]+
    dist_columns[2]
  ).join("\x1f").force_encoding(Encoding::BINARY)

  @data =
    [Date.civil(*base[1].match(/^(\d{4})(\d\d)(\d\d)$/).captures.map(&:to_i)).jd, base[2].to_i].pack("NN")+
    [zip1.size, zip2.size, com1.size, com2.size, districts.size].pack("n*")+
    [int1_columns.bytesize, int2_columns.bytesize, int4_columns.bytesize, text_columns.bytesize].pack("N*")+
    int1_columns+
    int2_columns+
    int4_columns+
    text_columns

  self
end
gem_communities_path() click to toggle source
# File lib/swissmatch/location/converter.rb, line 71
def gem_communities_path
  Dir.enum_for(:glob, "#{gem_data_path}/communities_*.csv").sort.last
end
gem_data_path() click to toggle source
# File lib/swissmatch/location/converter.rb, line 60
def gem_data_path
  data_directory = File.expand_path('../../../../data/swissmatch-location', __FILE__)
  data_directory = Gem.datadir 'swissmatch-location' if defined?(Gem) && !File.directory?(data_directory)

  data_directory
end
gem_districts_path() click to toggle source
# File lib/swissmatch/location/converter.rb, line 67
def gem_districts_path
  Dir.enum_for(:glob, "#{gem_data_path}/districts_*.csv").sort.last
end
generate_expression(size, separator, terminator) click to toggle source
# File lib/swissmatch/location/converter.rb, line 75
def generate_expression(size, separator, terminator)
  /^#{Array.new(size) { "([^#{separator}]*)" }.join(eval("'#{separator}'"))}#{terminator}/
end
write(path) click to toggle source
# File lib/swissmatch/location/converter.rb, line 150
def write(path)
  File.write(path, @data, encoding: Encoding::BINARY)
end