class Result2csv::Converter

Public Class Methods

bucket() click to toggle source
# File lib/result2csv/converter.rb, line 58
def self.bucket
  s3 = AWS::S3.new
  s3.buckets["datafiniti-voltron-results"]
end
convert_to_csv(url, user_token, result_id) click to toggle source
# File lib/result2csv/converter.rb, line 112
def self.convert_to_csv(url, user_token, result_id)
  require 'json'
  JSON.freeze
  new_array, matrix = [], []
  # report_csv_conversion_progress(user_token, result_id, "downloading")
  cached_result = RestClient.get(url)
  begin
    result_file = JSON.parse(cached_result)
  rescue
    begin
      result_file = JSON.parse(cached_result.gsub(/\],/, ','))
    rescue
      result_file = JSON.parse(cached_result.gsub(/^,/, ''))
    end
  end
  # report_csv_conversion_progress(user_token, result_id, "starting")
  matrix = create_matrix(result_file, user_token, result_id)
  return matrix.to_csv
end
create_matrix(result_file, user_token, result_id) click to toggle source
# File lib/result2csv/converter.rb, line 132
def self.create_matrix(result_file, user_token, result_id)
  matrix = Array.new
  headers = parse_csv_headers(result_file)
  matrix << headers
  total_size = result_file.size
  count = 1
  result_file.each do |object|
    matrix << results_values_to_row(object, headers.size)
    progress = ((count.to_f/total_size.to_f)*100).to_i
    count += 1
    report_csv_conversion_progress(user_token, result_id, "converting: #{progress}%")# if progress % 10 == 0
  end
  return matrix
end
does_not_have_csv?(result_url) click to toggle source
# File lib/result2csv/converter.rb, line 49
def self.does_not_have_csv?(result_url)
  result_url = result_url[:result_url] if result_url.is_a? Hash
  !has_csv?(result_url)
end
file(url) click to toggle source
# File lib/result2csv/converter.rb, line 54
def self.file(url)
  return JSON.parse RestClient.get url
end
get_results_url(options) click to toggle source
# File lib/result2csv/converter.rb, line 21
def self.get_results_url(options)
  result = self.retrieve(crawl_name: options[:crawl_name], user_token: options[:user_token])
  return JSON.parse(result[:body]) if result[:status] < 400
  return ''
end
has_csv?(url) click to toggle source
# File lib/result2csv/converter.rb, line 42
def self.has_csv?(url)

  s3 = AWS::S3.new
  csv_path = "#{s3_object_key(url).split('.').first}_csv.csv"
  s3.buckets["datafiniti-voltron-results"].objects[csv_path].exists? rescue false
end
parse_csv_headers(result_file) click to toggle source
# File lib/result2csv/converter.rb, line 93
def self.parse_csv_headers(result_file)
  headers = []
  result_file.first.keys.to_a.each{|h| headers << h}
  return headers
end
parser() click to toggle source
# File lib/result2csv/converter.rb, line 99
def self.parser
  Yajl::Parser.new
end
report_csv_conversion_progress(user_token, result_id, message) click to toggle source
# File lib/result2csv/converter.rb, line 147
def self.report_csv_conversion_progress(user_token, result_id, message)
  # RealtimeMessage.publish(user_token, 'conversion-status', {:progress => message, :id => result_id})
  print "\r#{message}"
end
results_values_to_row(object, columns) click to toggle source
# File lib/result2csv/converter.rb, line 103
def self.results_values_to_row(object, columns)
  row = []
  object.values.to_a.each do |v|
    row << truncate_to_max_cell_size(v)
  end
  return row
end
retrieve(options) click to toggle source
# File lib/result2csv/converter.rb, line 4
def self.retrieve(options)
  date_string = '{"'+ Time.at(Time.now.gmtime.to_i - 604800).strftime("%F") +'":""}'
  querystrings = 'fields=["url"]&dates=' + date_string
  template = Addressable::Template.new("https://#{options[:user_token]}:@api.80legs.com/v2/results/#{options[:crawl_name]}/{?query*}")
  template = template.expand({
                               "query" => {
                                 "fields" => ["url"],
                                 "dates" => date_string
                               }
  })
  response = RestClient.get(template.to_s) do |response, request|
    @code = response.code
    @body = response.body
  end
  return {body: @body, status: @code}
end
s3_csv_file(url) click to toggle source
# File lib/result2csv/converter.rb, line 37
def self.s3_csv_file(url)
  key = "#{s3_object_key(url).split('.').first}_csv.csv"
  s3_object(key).url_for(:get, endpoint: "s3.amazonaws.com", :response_content_disposition => "attachment", :response_content_type => "application/csv").to_s
end
s3_object(key) click to toggle source
# File lib/result2csv/converter.rb, line 32
def self.s3_object(key)
  s3 = AWS::S3.new
  s3.buckets["datafiniti-voltron-results"].objects[key]
end
s3_object_key(url) click to toggle source
# File lib/result2csv/converter.rb, line 27
def self.s3_object_key(url)
  uri = Addressable::URI.parse(url)
  uri.path.split('/')[1,2].join('/')
end
s3_url(url, content_type="application/json") click to toggle source
# File lib/result2csv/converter.rb, line 63
def self.s3_url(url, content_type="application/json")
  uri = Addressable::URI.parse(url)
  s3 = AWS::S3.new
  obj = s3.buckets["datafiniti-voltron-results"].objects[uri.path.split('/')[1,2].join('/')]
  url = obj.url_for(:get, endpoint: "s3.amazonaws.com", :response_content_disposition => "attachment", :resonse_content_type => content_type)
  return url.to_s
end
test_csv(result_url) click to toggle source
# File lib/result2csv/converter.rb, line 159
def self.test_csv(result_url)
  object_name = "#{Result.s3_object_key(result_url).split('.').first}_csv.csv"
  csv = Result.convert_to_csv(result_url, 1, 1)
  Result.write_csv_to_s3(object_name, csv)
  puts Result.s3_csv_file(result_url).to_s
end
toggle_downloaded_state(user, id, state) click to toggle source
# File lib/result2csv/converter.rb, line 152
def self.toggle_downloaded_state(user, id, state)
  # return EightyLegsApi.conn(token: user.token).put("/results/#{id}", {downloaded: state}.to_json, :content_type => :json)
  RestClient.put("https://#{user.token}:@api.80legs.com/v2/results/#{id}", {downloaded: state}.to_json, :content_type => :json) do |response|
    return response.code
  end
end
truncate_to_max_cell_size(string) click to toggle source
# File lib/result2csv/converter.rb, line 81
def self.truncate_to_max_cell_size(string)
  unless string.nil?
    begin
      JSON.parse(string).to_s.gsub('"', "").gsub(",", ";")
    rescue
      string.to_s[0,32767].gsub('"', "").gsub(",", ";")
    end
  else
    ""
  end
end
write_csv_to_file(object_name, csv) click to toggle source
# File lib/result2csv/converter.rb, line 75
def self.write_csv_to_file(object_name, csv)
  File.open("#{object_name.split('/').last}", 'w') { |file| file.write(csv) }
end
write_csv_to_s3(object_name, csv) click to toggle source
# File lib/result2csv/converter.rb, line 71
def self.write_csv_to_s3(object_name, csv)
  obj = bucket.objects["#{object_name}"].write(csv)
end