class Quandl::Format::Dataset::Load

Constants

SYNTAX

Public Class Methods

each_in_file(path, &block) click to toggle source
# File lib/quandl/format/dataset/load.rb, line 11
def each_in_file(path, &block)
  each_line( File.open(path, "r"), &block )
end
each_line(interface, &block) click to toggle source
# File lib/quandl/format/dataset/load.rb, line 15
def each_line(interface, &block)
  node = new_node
  # for each file line
  interface.each_line do |line|
    # process line
    node = process_line(line, node, &block)
  end
  process_tail(node, &block)
end
file(path) click to toggle source
# File lib/quandl/format/dataset/load.rb, line 25
def file(path)
  string( File.read(path) )
end
new_node(line=0) click to toggle source
# File lib/quandl/format/dataset/load.rb, line 45
def new_node(line=0)
  { line: line, section: :attributes, data: '', attributes: '', data_line: 0, offset: line==0 ? 0 : line-1 }
end
parse_node(node) click to toggle source
# File lib/quandl/format/dataset/load.rb, line 112
def parse_node(node)
  # parse attrs as yaml
  node[:attributes] = parse_yaml_attributes(node)
  # we cant continue unless attributes are present
  return false if node[:attributes].blank?
  # parse data as csv
  node[:data] = Quandl::Data::Format.csv_to_array(node[:data])
  node
end
process_line(rline, node, &block) click to toggle source
# File lib/quandl/format/dataset/load.rb, line 55
def process_line(rline, node, &block)
  # increment node line
  node[:line] += 1
  # strip whitespace
  line = rline.strip.rstrip
  # skip comments and blank lines
  if line[0] == SYNTAX[:comment] || line.blank?
    node[:attributes] += "\n" if node[:section] == :attributes
    return node
  end
  # looking at an attribute?
  if line =~ SYNTAX[:attribute]
    # exiting data section?
    if node[:section] == :data
      # we've reached the end of a node
      # send it to the server
      process_node(node, &block)
      # start a new node while retaining current line line
      node = new_node( node[:line] )
    end
    # update the node's section
    node[:section] = :attributes
  # entering the data section?
  elsif line[0] == SYNTAX[:data]
    # update the node
    node[:data_line] = node[:line] + 1
    node[:section] = :data
    # skip to the next line
    return node
  end
  # strip extra commas
  rline = rline.gsub(/,+$/,'') if node[:section] == :attributes
  # append the line to the requested section
  node[ node[:section] ] += ( node[:section] == :data ) ? "#{line}\n" : rline
  # return the updated node
  node
end
process_node(node, &block) click to toggle source
# File lib/quandl/format/dataset/load.rb, line 93
def process_node(node, &block)
  begin
    node = parse_node(node)
    # fail on errored node
    return false if node == false
    # convert node to dataset
    dataset = convert_node_to_dataset(node)
    # do whatever we need to do with the node
    block.call( dataset, nil ) unless dataset.nil?
    # success
    true
    
  rescue Exception => err
    block.call( nil, err )
    false
    
  end
end
process_tail(node, &block) click to toggle source
# File lib/quandl/format/dataset/load.rb, line 49
def process_tail(node, &block)
  # signify end
  process_line('-', node, &block)
  process_line('tail: end', node, &block)
end
string(input) click to toggle source
# File lib/quandl/format/dataset/load.rb, line 29
def string(input)
  # prepare to collect all datasets
  datasets = []
  # initialize blank node
  node = new_node
  # for each line
  input.each_line do |line|
    # process each line when encountering dataset append it to datasets
    node = process_line( line, node ){|d| datasets << d }
  end
  # signify end
  process_tail(node){|d| datasets << d }
  # return datasets
  datasets
end

Protected Class Methods

convert_node_to_dataset(node) click to toggle source
# File lib/quandl/format/dataset/load.rb, line 137
def convert_node_to_dataset(node)
  dataset = Quandl::Format::Dataset.new( node[:attributes] )
  dataset.data = node[:data]
  dataset
rescue Exception => err
  m = generate_dataset_error(node, err)
  raise err, m
end
generate_dataset_error( node, err ) click to toggle source
# File lib/quandl/format/dataset/load.rb, line 175
def generate_dataset_error( node, err )
  message = ''
  message += node[:attributes][:source_code] + '/' if node[:attributes][:source_code].present?
  message += node[:attributes][:code] + ' '
  # include specific line if available
  if err.respond_to?(:line)
    message += "error at line #{node[:data_line].to_i + err.line.to_i}\n"
  else
    message += "error around line #{node[:line]}\n"
  end
  # include original error
  message += "#{$!} (#{err.class})"
  message += "\n"
  message
end
generate_yaml_parse_error(node, err) click to toggle source
# File lib/quandl/format/dataset/load.rb, line 146
def generate_yaml_parse_error(node, err)
  message = ""
  if err.message == 'Unparsable input'
    message = "Input data is unparsable.  Are you missing a colon (:) or a space after a colon?\n"
  elsif err.is_a?(Psych::SyntaxError) && err.respond_to?(:problem)
    if err.problem =~ /mapping values are not allowed in this context/
      message = "Syntax error *before* line #{1+node[:offset] + err.line}.\n"
      if node[:attributes] =~ /:.+:/ # he probably has a colon in a field.
        message += "You might have an illegal colon (:) in one of your fields.  If so, use quotes.\n"
      elsif node[:attributes] =~ /^([^:]+)$/ # he forgot the colon completely
        message += "Did you forget a colon on this line:\n"
        message += "#{$1}\n"
      end
    else
      message += "Error parsing metadata. #{err.problem.capitalize} on line #{node[:offset] + err.line}\n"
      if err.problem =~ /expected ':'/
        message += "Did you forget to delimit the meta data section from the data section with a one or more dashes ('#{SYNTAX[:data]}')?\n"
      end
    end
  elsif err.is_a?(Psych::SyntaxError)
    message = err.to_s + "\n" + node[:attributes]
  else
    message += "Attribute parse error at line #{ node[:line] + err.line } column #{err.column}. #{err.problem} (#{err.class})\n" if node.has_key?(:line) && err.respond_to?(:line)
    message += "Encountered error while parsing: \n  " + node[:attributes].split("\n")[err.line - 1].to_s if err.respond_to?(:line)
  end
  message += "\n"
  message
end
parse_yaml_attributes(node) click to toggle source
# File lib/quandl/format/dataset/load.rb, line 124
def parse_yaml_attributes(node)
  attrs = {}
  attributes = YAML.load( node[:attributes] )
  raise 'Unparsable input'  unless attributes.is_a? Hash
  attributes.symbolize_keys!.each do |key, value|
    attrs[key.to_s.downcase.to_sym] = value
  end
  attrs
rescue Exception => err
  m = generate_yaml_parse_error(node, err)
  raise err, m
end