class Slicing::Base

Public Instance Methods

add(path, output, *headers) click to toggle source
# File lib/slicing.rb, line 40
def add path, output, *headers
  index = 0
  CSV.foreach(path) do |row|
    CSV.open(output, "a+") do |csv|
      if index == 0
        csv << headers
      end
      csv << row
    end
    index = index +1
  end
end
clean(path, output, name, value) click to toggle source
# File lib/slicing.rb, line 35
def clean path, output, name, value
  # puts "add header"
end
count(csv_file) click to toggle source
# File lib/slicing.rb, line 214
def count csv_file
  data = CSV.read(csv_file, :headers => false, encoding: "ISO8859-1:utf-8")
  puts "#{data.count} rows #{data[0].count} columns"
  puts "---"
  puts "#{data[0]}"
  puts "---"
  print_header(data[0])
end
first(csv_file) click to toggle source
# File lib/slicing.rb, line 180
def first csv_file #, value=100
  stop = options[:line]
  counter = 0
  CSV.foreach(csv_file, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
    exit if counter == stop
    begin
      counter = counter + 1
      puts row
    rescue
    end
  end
end
freq(path, column_name, output_path) click to toggle source
# File lib/slicing.rb, line 99
def freq path, column_name, output_path
  file_to_count = "./#{path}.csv"
  output = "./#{path}-counted.csv"
  file_to_count_csv = CSV.read(file_to_count,:headers=> true, :encoding => "ISO8859-1:utf-8")
  unique_nric_array = file_to_count_csv[column_name]
  unique_nric = []
  unique_nric_array.each_with_index do |value, index|
    unique_nric.push(value) if index !=0
  end

  final_hash = score(unique_nric)
  CSV.open(output, "a+") do |csv|
    final_hash.each do |value|
      csv << [value[0], value[1]]
    end
  end
end
head(csv_file) click to toggle source
# File lib/slicing.rb, line 194
def head csv_file
  CSV.foreach(csv_file, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
    puts row
    puts "----"
    puts "#{row.count} columns"
    puts "----"
    print_header(row)
    exit
  end
end
list(path, name) click to toggle source
# File lib/slicing.rb, line 66
def list path, name
  file_csv = CSV.read(path,:headers=> true, :encoding => "ISO8859-1:utf-8")
  array = file_csv[name]
  puts array.uniq
  puts "--"
  puts "#{array.uniq.count} items"
end
mask(path, column_name, output_path) click to toggle source
# File lib/slicing.rb, line 118
def mask path, column_name, output_path
  original = CSV.read(path, { headers: true, return_headers: true, :encoding => "ISO8859-1:utf-8"})
  CSV.open(output_path, 'a+') do |csv|
    original.each do |row|
      csv << array
    end
  end
end
reduce(path, output, start) click to toggle source
# File lib/slicing.rb, line 75
def reduce path, output, start
  index = 0
  CSV.foreach(path) do |csv|
    CSV.open(output, "a+") do |row|
      if start.to_i > index #dangerous
        csv << row
      end
    end
    index = index +1
  end
end
retain(path, output, *names) click to toggle source
# File lib/slicing.rb, line 128
def retain path, output, *names
  value = ""
  CSV.foreach(path) do |data|
    value = data
    break
  end

  array = []
  names.each do |each_name|
    if value.index(each_name) == nil
      puts "#{each_name} is not a column name."
      puts "--"
      puts value
      exit
    end
    array.push(value.index(each_name)) if value.index(each_name) != nil
  end
  # puts array.count
  answer =
  CSV.open(output,"a+") do |csv|
    CSV.foreach(path) do |row|
      answer = []
      array.each do |each|
        answer.push(row[each])
      end
      csv << answer
    end
  end

end
rm(path, column_name, output) click to toggle source
# File lib/slicing.rb, line 163
def rm path, column_name, output
  # headers, rowsep, utf = process_options(options[:headers], options[:rowsep], options[:utf])
  if options[:rowsep] != nil
    original = CSV.read(path, { headers: options[:headers], return_headers: options[:headers], :row_sep=> options[:rowsep], :encoding => options[:utf]})
  else
    original = CSV.read(path, { headers: options[:headers], return_headers: options[:headers], :encoding => options[:utf]})
  end
  original.delete(column_name)
  CSV.open(output, 'a+') do |csv|
    original.each do |row|
      csv << row
    end
  end
end
sample(path, output_path, size) click to toggle source
# File lib/slicing.rb, line 88
def sample path, output_path, size
  file_csv = CSV.read(path,:headers=> true, :encoding => "ISO8859-1:utf-8")
  sample = file_csv.sample(size)
  CSV.open(output_path, "a+") do |csv|
    sample.each do |value|
      csv << value
    end
  end
end
show(path, output, start) click to toggle source
# File lib/slicing.rb, line 54
def show path, output, start
  index = 1
  CSV.foreach(path) do |csv|
    if index == start.to_i
      puts csv
      break
    end
    index = index + 1
  end
end
subset(csv_file, output) click to toggle source
# File lib/slicing.rb, line 225
def subset(csv_file, output)
  path = csv_file
  output_directory =  output #"/Users/ytbryan/Desktop/output/subset-2015.csv" #output directory
  # options[:num] == nil ? (stop = 10) : (stop = options[:num])
  stop = options[:line]
  counter = 0
  CSV.foreach(path, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
    exit if counter == stop
    begin
      counter = counter + 1
      CSV.open(output_directory, "a+") do |csv|
        csv << row
      end
    rescue
    end
  end
end
unique(path, column_name) click to toggle source
# File lib/slicing.rb, line 206
def unique path, column_name
  data = CSV.read(path, :headers => true, return_headers: true, encoding: "ISO8859-1:utf-8")
  array = data[column_name]
  puts array.uniq.count if array != nil
end

Private Instance Methods

masking(value) click to toggle source
# File lib/slicing.rb, line 276
def masking(value)
  value != nil ? answer = Digest::MD5.hexdigest(value) : answer
end
print_header(array) click to toggle source

desc :subsetagain, “” def subsetagain csv_file, output, value=10

path = csv_file
output_directory =  output #"/Users/ytbryan/Desktop/output/subset-2015.csv" #output directory
stop = value
counter = 0
CSV.foreach(path, :headers => false, :row_sep => "\r\n", encoding: "ISO8859-1:utf-8") do |row|
  exit if counter == stop
  begin
    counter = counter + 1
    CSV.open(output_directory, "a+") do |csv|
      csv << row
    end
  rescue
  end
end

end

print_progress(current, total) click to toggle source
process_options(headers, rowsep, utf) click to toggle source
# File lib/slicing.rb, line 267
def process_options headers, rowsep, utf
  if headers == nil
    headers = true
  else
    headers = headers
  end
  return true, "\r\n" , "ISO8859-1:utf-8"
end
score( array ) click to toggle source
# File lib/slicing.rb, line 280
def score( array )
  hash = Hash.new(0)
  array.each{|key| hash[key] += 1}
  hash
end