class Sinew::CSV
Constants
- ASCII_ONLY
Attributes
columns[R]
count[R]
csv[R]
path[R]
tally[R]
Public Class Methods
new(path)
click to toggle source
# File lib/sinew/csv.rb, line 8 def initialize(path) @count = 0 @csv = nil @path = path end
Public Instance Methods
emit(row)
click to toggle source
append a row
# File lib/sinew/csv.rb, line 31 def emit(row) # convert row to cols, and construct print (our return value) print = {} row = columns.map do value = normalize(row[_1]) if value print[_1] = value tally[_1] += 1 end value end @count += 1 # emit csv << row csv.flush # return in case someone wants to pretty print this print end
normalize(s)
click to toggle source
# File lib/sinew/csv.rb, line 57 def normalize(s) # nokogiri/array/misc => string s = if s.respond_to?(:inner_html) s.inner_html elsif s.is_a?(Array) s.join('|') else s.to_s end return if s.empty? # simple attempt to strip tags. Note that we replace tags with spaces s = s.gsub(/<[^>]+>/, ' ') if s !~ ASCII_ONLY # Converts MS Word 'smart punctuation' to ASCII s = Sterile.plain_format(s) # á & etc. s = Sterile.decode_entities(s) # "šţɽĩɳģ" => "string" s = Sterile.transliterate(s) end # squish s = s.strip.gsub(/\s+/, ' ') return if s.empty? s end
start(columns)
click to toggle source
start writing the csv
# File lib/sinew/csv.rb, line 15 def start(columns) raise 'started twice' if started? @columns = columns @tally = columns.map { [_1, 0] }.to_h @csv = ::CSV.open(path, 'wb').tap do _1 << columns end end
started?()
click to toggle source
has this csv been started?
# File lib/sinew/csv.rb, line 26 def started? @csv != nil end