class Result2csv::Converter
Public Class Methods
bucket()
click to toggle source
# File lib/result2csv/converter.rb, line 58 def self.bucket s3 = AWS::S3.new s3.buckets["datafiniti-voltron-results"] end
convert_to_csv(url, user_token, result_id)
click to toggle source
# File lib/result2csv/converter.rb, line 112 def self.convert_to_csv(url, user_token, result_id) require 'json' JSON.freeze new_array, matrix = [], [] # report_csv_conversion_progress(user_token, result_id, "downloading") cached_result = RestClient.get(url) begin result_file = JSON.parse(cached_result) rescue begin result_file = JSON.parse(cached_result.gsub(/\],/, ',')) rescue result_file = JSON.parse(cached_result.gsub(/^,/, '')) end end # report_csv_conversion_progress(user_token, result_id, "starting") matrix = create_matrix(result_file, user_token, result_id) return matrix.to_csv end
create_matrix(result_file, user_token, result_id)
click to toggle source
# File lib/result2csv/converter.rb, line 132 def self.create_matrix(result_file, user_token, result_id) matrix = Array.new headers = parse_csv_headers(result_file) matrix << headers total_size = result_file.size count = 1 result_file.each do |object| matrix << results_values_to_row(object, headers.size) progress = ((count.to_f/total_size.to_f)*100).to_i count += 1 report_csv_conversion_progress(user_token, result_id, "converting: #{progress}%")# if progress % 10 == 0 end return matrix end
does_not_have_csv?(result_url)
click to toggle source
# File lib/result2csv/converter.rb, line 49 def self.does_not_have_csv?(result_url) result_url = result_url[:result_url] if result_url.is_a? Hash !has_csv?(result_url) end
file(url)
click to toggle source
# File lib/result2csv/converter.rb, line 54 def self.file(url) return JSON.parse RestClient.get url end
get_results_url(options)
click to toggle source
# File lib/result2csv/converter.rb, line 21 def self.get_results_url(options) result = self.retrieve(crawl_name: options[:crawl_name], user_token: options[:user_token]) return JSON.parse(result[:body]) if result[:status] < 400 return '' end
has_csv?(url)
click to toggle source
# File lib/result2csv/converter.rb, line 42 def self.has_csv?(url) s3 = AWS::S3.new csv_path = "#{s3_object_key(url).split('.').first}_csv.csv" s3.buckets["datafiniti-voltron-results"].objects[csv_path].exists? rescue false end
parse_csv_headers(result_file)
click to toggle source
# File lib/result2csv/converter.rb, line 93 def self.parse_csv_headers(result_file) headers = [] result_file.first.keys.to_a.each{|h| headers << h} return headers end
parser()
click to toggle source
# File lib/result2csv/converter.rb, line 99 def self.parser Yajl::Parser.new end
report_csv_conversion_progress(user_token, result_id, message)
click to toggle source
# File lib/result2csv/converter.rb, line 147 def self.report_csv_conversion_progress(user_token, result_id, message) # RealtimeMessage.publish(user_token, 'conversion-status', {:progress => message, :id => result_id}) print "\r#{message}" end
results_values_to_row(object, columns)
click to toggle source
# File lib/result2csv/converter.rb, line 103 def self.results_values_to_row(object, columns) row = [] object.values.to_a.each do |v| row << truncate_to_max_cell_size(v) end return row end
retrieve(options)
click to toggle source
# File lib/result2csv/converter.rb, line 4 def self.retrieve(options) date_string = '{"'+ Time.at(Time.now.gmtime.to_i - 604800).strftime("%F") +'":""}' querystrings = 'fields=["url"]&dates=' + date_string template = Addressable::Template.new("https://#{options[:user_token]}:@api.80legs.com/v2/results/#{options[:crawl_name]}/{?query*}") template = template.expand({ "query" => { "fields" => ["url"], "dates" => date_string } }) response = RestClient.get(template.to_s) do |response, request| @code = response.code @body = response.body end return {body: @body, status: @code} end
s3_csv_file(url)
click to toggle source
# File lib/result2csv/converter.rb, line 37 def self.s3_csv_file(url) key = "#{s3_object_key(url).split('.').first}_csv.csv" s3_object(key).url_for(:get, endpoint: "s3.amazonaws.com", :response_content_disposition => "attachment", :response_content_type => "application/csv").to_s end
s3_object(key)
click to toggle source
# File lib/result2csv/converter.rb, line 32 def self.s3_object(key) s3 = AWS::S3.new s3.buckets["datafiniti-voltron-results"].objects[key] end
s3_object_key(url)
click to toggle source
# File lib/result2csv/converter.rb, line 27 def self.s3_object_key(url) uri = Addressable::URI.parse(url) uri.path.split('/')[1,2].join('/') end
s3_url(url, content_type="application/json")
click to toggle source
# File lib/result2csv/converter.rb, line 63 def self.s3_url(url, content_type="application/json") uri = Addressable::URI.parse(url) s3 = AWS::S3.new obj = s3.buckets["datafiniti-voltron-results"].objects[uri.path.split('/')[1,2].join('/')] url = obj.url_for(:get, endpoint: "s3.amazonaws.com", :response_content_disposition => "attachment", :resonse_content_type => content_type) return url.to_s end
test_csv(result_url)
click to toggle source
# File lib/result2csv/converter.rb, line 159 def self.test_csv(result_url) object_name = "#{Result.s3_object_key(result_url).split('.').first}_csv.csv" csv = Result.convert_to_csv(result_url, 1, 1) Result.write_csv_to_s3(object_name, csv) puts Result.s3_csv_file(result_url).to_s end
toggle_downloaded_state(user, id, state)
click to toggle source
# File lib/result2csv/converter.rb, line 152 def self.toggle_downloaded_state(user, id, state) # return EightyLegsApi.conn(token: user.token).put("/results/#{id}", {downloaded: state}.to_json, :content_type => :json) RestClient.put("https://#{user.token}:@api.80legs.com/v2/results/#{id}", {downloaded: state}.to_json, :content_type => :json) do |response| return response.code end end
truncate_to_max_cell_size(string)
click to toggle source
# File lib/result2csv/converter.rb, line 81 def self.truncate_to_max_cell_size(string) unless string.nil? begin JSON.parse(string).to_s.gsub('"', "").gsub(",", ";") rescue string.to_s[0,32767].gsub('"', "").gsub(",", ";") end else "" end end
write_csv_to_file(object_name, csv)
click to toggle source
# File lib/result2csv/converter.rb, line 75 def self.write_csv_to_file(object_name, csv) File.open("#{object_name.split('/').last}", 'w') { |file| file.write(csv) } end
write_csv_to_s3(object_name, csv)
click to toggle source
# File lib/result2csv/converter.rb, line 71 def self.write_csv_to_s3(object_name, csv) obj = bucket.objects["#{object_name}"].write(csv) end