class DataKit::CSV::FieldAnalyzer

Attributes

csv[RW]
field_pos[RW]
match_type[RW]
sampling_rate[RW]

Public Class Methods

analyze(csv, field_pos, options = {}) click to toggle source
# File lib/data_kit/csv/field_analyzer.rb, line 39
def analyze(csv, field_pos, options = {})
  new(csv, field_pos, options).execute
end
new(csv, field_pos, options = {}) click to toggle source
# File lib/data_kit/csv/field_analyzer.rb, line 9
def initialize(csv, field_pos, options = {})
  @csv = csv
  @field_pos = field_pos
  @match_type = options[:match_type] || :any
  @sampling_rate = options[:sampling_rate] || 0.1
end

Public Instance Methods

execute() click to toggle source
# File lib/data_kit/csv/field_analyzer.rb, line 16
def execute
  first = true
  analysis = nil
  random = Random.new

  csv.each_row do |row|
    if first
      first = false
      field_name = csv.headers[field_pos]
      analysis = FieldAnalysis.new(field_name, { :match_type => match_type })
    end

    analysis.increment_total
    if random.rand <= sampling_rate
      analysis.increment_sample
      analysis.insert(row[field_pos])
    end
  end

  analysis
end