class Sinew::CSV

Constants

ASCII_ONLY

Attributes

columns[R]
count[R]
csv[R]
path[R]
tally[R]

Public Class Methods

new(path) click to toggle source
# File lib/sinew/csv.rb, line 8
def initialize(path)
  @count = 0
  @csv = nil
  @path = path
end

Public Instance Methods

emit(row) click to toggle source

append a row

# File lib/sinew/csv.rb, line 31
def emit(row)
  # convert row to cols, and construct print (our return value)
  print = {}
  row = columns.map do
    value = normalize(row[_1])
    if value
      print[_1] = value
      tally[_1] += 1
    end
    value
  end
  @count += 1

  # emit
  csv << row
  csv.flush

  # return in case someone wants to pretty print this
  print
end
normalize(s) click to toggle source
# File lib/sinew/csv.rb, line 57
def normalize(s)
  # nokogiri/array/misc => string
  s = if s.respond_to?(:inner_html)
    s.inner_html
  elsif s.is_a?(Array)
    s.join('|')
  else
    s.to_s
  end
  return if s.empty?

  # simple attempt to strip tags. Note that we replace tags with spaces
  s = s.gsub(/<[^>]+>/, ' ')

  if s !~ ASCII_ONLY
    # Converts MS Word 'smart punctuation' to ASCII
    s = Sterile.plain_format(s)

    # &aacute; &amp; etc.
    s = Sterile.decode_entities(s)

    # "šţɽĩɳģ" => "string"
    s = Sterile.transliterate(s)
  end

  # squish
  s = s.strip.gsub(/\s+/, ' ')
  return if s.empty?

  s
end
start(columns) click to toggle source

start writing the csv

# File lib/sinew/csv.rb, line 15
def start(columns)
  raise 'started twice' if started?

  @columns = columns
  @tally = columns.map { [_1, 0] }.to_h
  @csv = ::CSV.open(path, 'wb').tap do
    _1 << columns
  end
end
started?() click to toggle source

has this csv been started?

# File lib/sinew/csv.rb, line 26
def started?
  @csv != nil
end