class PiplCollector

Public Class Methods

new(input_dir, output_dir, output_append_dir, id_field, ignore_files, api_key, field_mapping, geocoder_api_key) click to toggle source
# File lib/piplcollector.rb, line 6
def initialize(input_dir, output_dir, output_append_dir, id_field, ignore_files, api_key, field_mapping, geocoder_api_key)
  @input_dir = input_dir
  @output_dir = output_dir
  @output_append_dir = output_append_dir
  @id_field = id_field
  @ignore_files = ignore_files
  @api_key = api_key
  @geocoder_api_key = geocoder_api_key
  @field_mapping = field_mapping
  @already_collected = load_output_files
end

Public Instance Methods

create_write_dirs(dir) click to toggle source

Create if they don’t exist

# File lib/piplcollector.rb, line 90
def create_write_dirs(dir)
  dirs = dir.split("/")
  dirs.delete("")
  overallpath = ""
  dirs.each do |d|
    Dir.mkdir(overallpath+"/"+d) if !File.directory?(overallpath+"/"+d)
    overallpath += ("/"+d)
  end
end
gen_filename_from_id(data_item) click to toggle source

Generates a file-safe name from the id field

# File lib/piplcollector.rb, line 39
def gen_filename_from_id(data_item)
  data_item[@id_field].gsub(":", "").gsub("/", "").gsub(".", "")
end
get_already_collected_person(data_item) click to toggle source

Gets content for already collected person

# File lib/piplcollector.rb, line 66
def get_already_collected_person(data_item)
  filename = @output_dir+"/"+gen_filename_from_id(data_item)+".json"
  return file = JSON.parse(File.read(filename))
end
get_person(data_item) click to toggle source

Get info on person from pipl

# File lib/piplcollector.rb, line 53
def get_person(data_item)
  sleep(1)
  
  # Get data from Pipl
  p = PiplRequest.new(@api_key, @field_mapping, @geocoder_api_key)
  output = p.get_data(data_item)

  # Handle output
  save_output_file(output, data_item) if output
  return JSON.parse(output) if output
end
get_write_dir(dir, file) click to toggle source

Figure out where to write it

# File lib/piplcollector.rb, line 101
def get_write_dir(dir, file)
  dir_save = dir.gsub(@input_dir, @output_append_dir)
  return dir_save+"/"+file
end
load_output_files() click to toggle source

Load the output files into already_collected

# File lib/piplcollector.rb, line 19
def load_output_files
  collected = []

  # Make a list of all saved files
  Dir.foreach(@output_dir) do |file|
    next if file == '.' or file == '..'
    collected.push(file.gsub(".json", ""))
  end
  
  return collected
end
process(file) click to toggle source

Process file

# File lib/piplcollector.rb, line 72
def process(file)
  data = JSON.parse(File.read(file))
  outfile = Array.new

  # Go through each item in file
  data.each do |item|
    if !was_collected?(item)
      item[:pipl] = get_person(item) if item[@id_field]
    else
      item[:pipl] = get_already_collected_person(item) if item[@id_field]
    end
    outfile.push(item)
  end

  JSON.pretty_generate(outfile)
end
run(dir) click to toggle source

Run on files

# File lib/piplcollector.rb, line 107
def run(dir)
  Dir.foreach(dir) do |file|
    next if file == '.' or file == '..'
    if File.directory?(dir+"/"+file)
      run(dir+"/"+file)
    elsif file.include?(".json") && !file.include?(@ignore_files)
      if !File.exist?(get_write_dir(dir, file))
        with_pipl = process(dir+"/"+file)
        create_write_dirs(dir.gsub(@input_dir, @output_append_dir))
        File.write(get_write_dir(dir, file), with_pipl)
      end
    end
  end 
end
save_output_file(output_item, data_item) click to toggle source

Save output file

# File lib/piplcollector.rb, line 32
def save_output_file(output_item, data_item)
  id = gen_filename_from_id(data_item)
  File.write(@output_dir+"/"+id+".json", output_item)
  @already_collected.push(id)
end
was_collected?(data_item) click to toggle source

Checks if it is already collected

# File lib/piplcollector.rb, line 44
def was_collected?(data_item)
  if data_item[@id_field]
    return @already_collected.include?(gen_filename_from_id(data_item))
  else
    return true
  end
end