class Orefine::CSVUtil
Public Class Methods
add_column(csv, field, value)
click to toggle source
# File lib/orefine.rb, line 186 def add_column(csv, field, value) self.perform_operation(csv, %Q{ [ { "op": "core/column-addition", "engineConfig": { "facets": [], "mode": "record-based" }, "newColumnName": "#{field}", "columnInsertIndex": 0, "baseColumnName": "email_stripped", "expression": "grel:\\\"#{value}\\\"", "onError": "set-to-blank" } ] }) end
clear_all_csvs()
click to toggle source
# File lib/orefine.rb, line 7 def clear_all_csvs Refine.get_all_project_metadata["projects"] .select { |k, v| v["name"].start_with?('csv_') } .keys .map { |project_id| Refine.new("project_id" => project_id) } .map(&:delete_project) end
common_facet(flag = true)
click to toggle source
# File lib/orefine.rb, line 205 def common_facet(flag = true) { "invert" => false, "expression" => "value", "selectError" => false, "omitError" => false, "selectBlank" => false, "name" => "exists", "omitBlank" => false, "columnName" => "exists", "type" => "list", "selection" => [ { "v" => { # string vs boolean matters here... be careful "v" => flag, "l" => flag, } } ] } end
create_common_flag(project_a, project_b)
click to toggle source
# File lib/orefine.rb, line 128 def create_common_flag(project_a, project_b) if project_a.get_columns_info.map { |c| c["name"] }.include? 'exists' STDERR.puts "'exists' column already exists in csv_a, deleting" self.delete_column(project_a, "exists") end self.perform_operation(project_a, %Q{ [ { "op": "core/column-addition", "engineConfig": { "facets": [], "mode": "record-based" }, "newColumnName": "exists", "columnInsertIndex": 0, "baseColumnName": "email_stripped", "expression": "grel:cell.cross(\\\"#{project_b.project_name}\\\", \\\"email_stripped\\\").cells.length() > 0", "onError": "set-to-blank" } ] }) end
delete_column(csv, field)
click to toggle source
# File lib/orefine.rb, line 175 def delete_column(csv, field) self.perform_operation(csv, %Q{ [ { "op": "core/column-removal", "columnName": "#{field}" } ] }) end
merge_common_field(csv_a, csv_b, common_field)
click to toggle source
# File lib/orefine.rb, line 171 def merge_common_field(csv_a, csv_b, common_field) end
merge_field(project_a, project_b, field)
click to toggle source
# File lib/orefine.rb, line 152 def merge_field(project_a, project_b, field) self.perform_operation(project_a, %Q{ [ { "op": "core/column-addition", "engineConfig": { "facets": [], "mode": "record-based" }, "newColumnName": "#{field}_merged", "columnInsertIndex": 0, "baseColumnName": "email_stripped", "expression": "grel:cell.cross(\\\"#{project_b.project_name}\\\", \\\"email_stripped\\\").cells[\\\"#{field}\\\"].value[0]", "onError": "set-to-blank" } ] }) end
normalize_column_names(projects)
click to toggle source
# File lib/orefine.rb, line 15 def normalize_column_names(projects) self.normalize_email_column_name(projects) self.normalize_zip_column_name(projects) self.normalize_full_name_column_name(projects) end
normalize_email_column_content(projects)
click to toggle source
# File lib/orefine.rb, line 87 def normalize_email_column_content(projects) self.perform_operation(projects, %q{ [ { "op": "core/column-addition", "engineConfig": { "facets": [], "mode": "record-based" }, "newColumnName": "email_stripped", "columnInsertIndex": 0, "baseColumnName": "email", "expression": "grel:strip(value.toLowercase())", "onError": "set-to-blank" } ] }) end
normalize_email_column_name(projects)
click to toggle source
# File lib/orefine.rb, line 21 def normalize_email_column_name(projects) self.perform_operation(projects, %q{ [ { "op": "core/column-rename", "oldColumnName": "E-mail", "newColumnName": "email" }, { "op": "core/column-rename", "oldColumnName": "Email Address", "newColumnName": "email" }, { "op": "core/column-rename", "oldColumnName": "Email", "newColumnName": "email" }, { "op": "core/column-rename", "oldColumnName": "[email]", "newColumnName": "email" }, { "op": "core/column-rename", "oldColumnName": "email_stripped", "newColumnName": "email" }, ] }) end
normalize_full_name_column_name(projects)
click to toggle source
# File lib/orefine.rb, line 70 def normalize_full_name_column_name(projects) self.perform_operation(projects, %q{ [ { "op": "core/column-rename", "oldColumnName": "Name", "newColumnName": "full_name" }, { "op": "core/column-rename", "oldColumnName": "Full Name", "newColumnName": "full_name" } ] }) end
normalize_zip_column_name(projects)
click to toggle source
# File lib/orefine.rb, line 53 def normalize_zip_column_name(projects) self.perform_operation(projects, %q{ [ { "op": "core/column-rename", "oldColumnName": "Zip", "newColumnName": "zip" }, { "op": "core/column-rename", "oldColumnName": "[zip]", "newColumnName": "zip" } ] }) end
perform_operation(projects, operation)
click to toggle source
# File lib/orefine.rb, line 228 def perform_operation(projects, operation) projects = [projects] if !projects.is_a?(Array) projects.each { |p| p.apply_operations(operation) } end
split_full_name(projects)
click to toggle source
# File lib/orefine.rb, line 106 def split_full_name(projects) self.perform_operation(projects, %q{ [ { "op": "core/column-split", "description": "Split column Name by separator", "engineConfig": { "facets": [], "mode": "row-based" }, "columnName": "full_name", "guessCellType": false, "removeOriginalColumn": false, "mode": "separator", "separator": "(?<=[a-z]) ", "regex": true, "maxColumns": 2 } ] }) end