class DataKit::CSV::FieldAnalyzer
Attributes
csv[RW]
field_pos[RW]
match_type[RW]
sampling_rate[RW]
Public Class Methods
analyze(csv, field_pos, options = {})
click to toggle source
# File lib/data_kit/csv/field_analyzer.rb, line 39 def analyze(csv, field_pos, options = {}) new(csv, field_pos, options).execute end
new(csv, field_pos, options = {})
click to toggle source
# File lib/data_kit/csv/field_analyzer.rb, line 9 def initialize(csv, field_pos, options = {}) @csv = csv @field_pos = field_pos @match_type = options[:match_type] || :any @sampling_rate = options[:sampling_rate] || 0.1 end
Public Instance Methods
execute()
click to toggle source
# File lib/data_kit/csv/field_analyzer.rb, line 16 def execute first = true analysis = nil random = Random.new csv.each_row do |row| if first first = false field_name = csv.headers[field_pos] analysis = FieldAnalysis.new(field_name, { :match_type => match_type }) end analysis.increment_total if random.rand <= sampling_rate analysis.increment_sample analysis.insert(row[field_pos]) end end analysis end