class Mikon::DataFrame

The main data structure in Mikon gem. DataFrame consists of labels(column name), index(row name), and labels.

Attributes

index[R]
labels[R]
name[R]

Public Class Methods

from_csv(path, options={}) { |csv| ... } click to toggle source

Create Mikon::DataFrame from a csv/tsv file @param [String] path path to csv @param options

:col_sep [String] string to separate by
:headers [Array] headers
# File lib/mikon/core/dataframe.rb, line 113
def self.from_csv(path, options={})
  csv_options = {
    :col_sep => ',',
    :headers => true,
    :converters => :numeric,
    :header_converters => :symbol,
  }

  options = csv_options.merge(options)
  raise ArgumentError, "options[:hearders] should be set" if options[:headers] == false
  options.delete(:header_converters) if options[:headers].is_a?(Array)

  csv = CSV.readlines(path, "r", options)
  yield csv if block_given?

  hash = {}
  csv.by_col.each {|label, arr| hash[label] = arr}
  csv_options.keys.each{|key| options.delete(key)}

  self.new(hash, options)
end
new(source, options={}) click to toggle source
# File lib/mikon/core/dataframe.rb, line 12
def initialize(source, options={})
  options = {
    name: SecureRandom.uuid(),
    index: nil,
    labels: nil
  }.merge(options)

  case
  when source.is_a?(Array)
    case
    when source.length == 0
      @data = [DArray.new([])]
    when source.all? {|el| el.is_a?(Mikon::Series)}
      raise "NotImplementedError" + source.to_s

    when source.all? {|el| el.is_a?(Mikon::DArray)}
      @data = source

    when source.all? {|el| el.is_a?(Mikon::Row)}
      @labels = source.first.labels
      @index = source.map{|row| row.index}
      @data = source.map{|row| row.to_hash.values}.transpose.map do |arr|
        Mikon::DArray.new(arr)
      end

    when source.all? {|el| el.is_a?(Hash)}
      @labels = source.first.keys
      @data = source.map{|hash| hash.values}.transpose.map do |arr|
        Mikon::DArray.new(arr)
      end

    when source.all? {|el| el.is_a?(Array)}
      @data = source.map do |arr|
        Mikon::DArray.new(arr)
      end

    else raise "Non-acceptable Arguments Error"
    end

  when source.is_a?(Hash)
    case
    when source.values.all? {|val| val.is_a?(Array)}
      @labels = source.keys
      @data = source.values.map do |arr|
        Mikon::DArray.new(arr)
      end
    when source.all? {|arr| arr[1].is_a?(Series)}
    else raise "Non-acceptable Arguments Error"
    end

  else raise "Non-acceptable Arguments Error"
  end

  @labels = options[:labels] unless options[:labels].nil?
  @name = options[:name]

  unless (index = options[:index]).nil?
    if index.is_a?(Symbol)
      raise "labels should be set" if @labels.nil?
      pos = @labels.index(index)
      raise "Thre is no column named" + index.to_s if pos.nil?
      name = @labels.delete(index)
      @index = @data.delete(@data[pos])
    elsif index.is_a?(Array)
      @index = index
    else
      raise "Invalid index type"
    end
  end

  _check_if_valid
end

Public Instance Methods

[](arg) click to toggle source

Accessor for column and rows @example

df = DataFrame.new({a: [1, 2, 3], b: [2, 3, 4]})
df[0..1].to_json #-> {a: [1, 2], b: [2, 3]}
df[:a] #-> <Mikon::Series>
# File lib/mikon/core/dataframe.rb, line 140
def [](arg)
  case
  when arg.is_a?(Range)
    index = @index.select{|i| arg.include?(i)}
    Mikon::DataFrame.new(index.map{|i| self.row(i)}, {index: index})

  when arg.is_a?(Symbol)
    self.column(arg)
  end
end
_check_if_valid() click to toggle source
# File lib/mikon/core/dataframe.rb, line 85
def _check_if_valid
  # All array should should have the same length
  length = @data.map{|darr| darr.length}.max
  @data.each{|darr| darr.expand(length) if darr.length < length}

  # DataFrame should have index object
  @index = (0..(length-1)).to_a if @index.nil?
  raise "index should have the same length as arrays" if @index.length != length

  # Labels should be an instance of Symbol
  if @labels.nil?
    @labels = @data.map.with_index{|darr, i| i.to_s.to_sym}
  elsif @labels.any?{|label| !label.is_a?(Symbol)}
    @labels = @labels.map{|label| label.to_sym}
  end
end
all?(&block) click to toggle source

Mikon::Row DSL

# File lib/mikon/core/dataframe.rb, line 246
def all?(&block)
  self.each_row {|row| return false unless row.instance_eval(&block)}
  true
end
any?(&block) click to toggle source

Mikon::Row DSL

# File lib/mikon/core/dataframe.rb, line 252
def any?(&block)
  self.each_row {|row| return true if row.instance_eval(&block)}
  false
end
collect(&block)
Alias for: map
column(label) click to toggle source

Access column with its name

# File lib/mikon/core/dataframe.rb, line 152
def column(label)
  pos = @labels.index(label)
  raise "There is no column named " + label if pos.nil?
  Mikon::Series.new(label, @data[pos], index: @index)
end
delete(label) click to toggle source

Delete column

# File lib/mikon/core/dataframe.rb, line 351
def delete(label)
  pos = @labels.index(label)
  raise "there is no column named " + label.to_s if pos.nil?
  @labels.delete_at(pos)
  @data.delete_at(pos)
end
dup() click to toggle source
# File lib/mikon/core/dataframe.rb, line 358
def dup
  Mikon::DataFrame.new(@data.map{|darr| darr.dup}, {index: @index, label: @labels})
end
dup_only_valid() click to toggle source
# File lib/mikon/core/dataframe.rb, line 362
def dup_only_valid
  self.dup
end
each(&block) click to toggle source

Iterate rows using Mikon::Row DSL

# File lib/mikon/core/dataframe.rb, line 225
def each(&block)
  return self.to_enum(:each) unless block_given?
  self.each_row do |row|
    row.instance_eval(&block)
  end
  self
end
each_row(&block) click to toggle source

Iterate row

# File lib/mikon/core/dataframe.rb, line 334
def each_row(&block)
  return self.to_enum(:each_row) unless block_given?
  @index.each.with_index do |el, i|
    row_arr = @data.map{|darr| darr[i]}
    row = Mikon::Row.new(@labels, row_arr, @index[i])
    block.call(row)
  end
end
fillna(value=0) click to toggle source

Replace NaN with specified value (destructive) @param [Float|Fixnum] value new value to replace NaN

# File lib/mikon/core/dataframe.rb, line 345
def fillna(value=0)
  @data.each {|darr| darr.fillna(value)}
  self
end
filter(&block)
Alias for: select
head(num) click to toggle source

same as head of Linux

# File lib/mikon/core/dataframe.rb, line 159
def head(num)
  self[0..(num-1)]
end
insert_column(*args, &block) click to toggle source

Insert column using Mikon::Row DSL or raw Array @param [label] Symbol the name of new column (optional) @param [Array|Series|DArray] the content of new column (optional) @example

df = Mikon::DataFrame.new({a: [1,2,3], b: [2,3,4]})
df.insert_column(:c){a + b}.to_json #-> {a: [1,2,3], b: [2,3,4], c: [3,5,7]}
df.insert_column(:d, [1, 2, 3]).to_json #-> {a: [1,2,3], b: [2,3,4], c: [3,5,7], d: [1,2,3]}
df.insert_column((df[:d]*2).name(:e)) #-> {a: [1,2,3], b: [2,3,4], c: [3,5,7], d: [1,2,3], e: [2,4,6]
# File lib/mikon/core/dataframe.rb, line 290
def insert_column(*args, &block)
  if block_given?
    rows = []
    name = args[0]
    self.each_row do |row|
      val = row.instance_eval(&block)
      row[name] = val
      rows.push(row)
    end
    @data = rows.map{|row| row.to_hash.values}.transpose.map do |arr|
      Mikon::DArray.new(arr)
    end
    @labels = rows.first.labels
  else
    if args[0].is_a?(Symbol)
      name = args[0]
      case
      when args[1].is_a?(Mikon::DArray)
        @data.push(args[1])
      when args[1].is_a?(Mikon::Series)
        @data.push(args[1].to_darr)
      when args[1].is_a?(Array)
        @data.push(Mikon::DArray.new(args[1]))
      else
        raise ArgumentError
      end
    elsif args[0].is_a?(Mikon::Series)
      @data.push(args[0].to_darr)
      name = args[0].name
    end
    @labels.push(name)
  end
  _check_if_valid
  return self
end
length() click to toggle source

return the length of columns

# File lib/mikon/core/dataframe.rb, line 103
def length
  @data.first.length
end
map(&block) click to toggle source

Iterate rows using Mikon::Row DSL and return new Mikon::Series

# File lib/mikon/core/dataframe.rb, line 234
def map(&block)
  return self.to_enum(:map) unless block_given?
  arr = []
  self.each_row do |row|
    arr.push(row.instance_eval(&block))
  end
  Mikon::Series.new(:new_series, arr, index: @index.clone)
end
Also aliased as: collect
pivot(args={}) click to toggle source

Experimental Implementation. DO NOT USE THIS METHOD

# File lib/mikon/pivot.rb, line 5
def pivot(args={})
  args = {
    column: nil,
    row: nil,
    value: nil,
    fill_value: Float::NAN
  }.merge(args)

  raise ArgumentError unless [:column, :row, :value].all?{|sym| args[sym].is_a?(Symbol)}

  column = self[args[:column]].factors
  index = self[args[:row]].factors

  source = column.reduce({}) do |memo, label|
    arr = []
    df = self.select{|row| row[args[:column]] == label}
    index.each do |i|
      unless df.any?{|row| row[args[:row]] == i}
        arr.push(args[:fill_value])
      else
        column = df.select{|row| row[args[:row]] == i}[args[:value]]
        arr.push(column.to_a[0])
      end
    end
    memo[label] = arr
    memo
  end

  Mikon::DataFrame.new(source, index: index)
end
plot(args={}) click to toggle source
# File lib/mikon/plot.rb, line 24
def plot(args={})
  args = {
    :type => :line,
    :x => nil,
    :y => nil,
    :fill_by => nil,
    :color => nil
  }.merge(args)

  plot = Nyaplot::Plot.new
  plot.x_label("")
  plot.y_label("")

  unless args[:color].nil?
    colors = Nyaplot::Colors.send(args[:color]).to_a
  else
    colors = Nyaplot::Colors.qual.to_a
  end

  case args[:type]
  when :line
    @data.each.with_index do |darr, i|
      line = plot.add(:line, @index, darr.to_a)
      line.color(colors.pop)
      line.title(@labels[i])
    end
    plot.legend(true)

  when :box
    plot.add_with_df(self, :box, *@labels)

  when :scatter
    sc = plot.add_with_df(self, :scatter, args[:x], args[:y])
    sc.color(colors)
    sc.fill_by(args[:fill_by]) unless args[:fill_by].nil?
    plot.x_label(args[:x])
    plot.y_label(args[:y])
  end

  plot
end
row(index) click to toggle source

Access row using index

# File lib/mikon/core/dataframe.rb, line 327
def row(index)
  pos = @index.index(index)
  arr = @data.map{|column| column[pos]}
  Mikon::Row.new(@labels, arr, index)
end
select(&block) click to toggle source

Select rows using Mikon::Row DSL and create new DataFrame @example

df = Mikon::DataFrame.new({a: [1,2,3], b: [2,3,4]})
df.select{a%2==0}[:a].to_a #-> [2]
# File lib/mikon/core/dataframe.rb, line 210
def select(&block)
  return self.to_enum(:select) unless block_given?
  rows = []
  i = 0
  self.each_row do |row|
    if row.instance_eval(&block)
      rows.push(row)
    end
  end
  Mikon::DataFrame.new(rows)
end
Also aliased as: filter
sort(label, ascending=true) click to toggle source

Sort by label @param [Symbol] label column name to sort by @param [Bool] ascending default true

# File lib/mikon/core/dataframe.rb, line 273
def sort(label, ascending=true)
  i = @labels.index(label)
  raise "No column named" + label.to_s if i.nil?
  order = @data[i].sorted_indices
  order.reverse! unless ascending
  self.sort_by.with_index{|val, i| order.index(i)}
end
sort_by(ascending=true, &block) click to toggle source

Sort using Mikon::Row DSL @param [Bool] ascending default true

# File lib/mikon/core/dataframe.rb, line 260
def sort_by(ascending=true, &block)
  return self.to_enum(:sort_by) unless block_given?
  order = self.map(&block).to_darr.sorted_indices
  order.reverse! unless ascending
  data = @data.map{|darr| darr.sort_by.with_index{|val, i| order.index(i)}}
  index = @index.sort_by.with_index{|val, i| order.index(i)}
  Mikon::DataFrame.new(data, {index: index, labels: @labels})
end
tail(num) click to toggle source

same as tail of Linux

# File lib/mikon/core/dataframe.rb, line 164
def tail(num)
  last = self.length-1
  self[(last-num+1)..last]
end
to_html(threshold=50) click to toggle source

IRuby notebook automatically call this method

# File lib/mikon/core/dataframe.rb, line 179
def to_html(threshold=50)
  html = "<html><table><tr><td></td>"
  html += @labels.map{|label| "<th>" + label.to_s +  "</th>"}.join
  html += "</tr>"
  self.each_row.with_index do |row, pos|
    next if pos > threshold && pos != self.length-1
    html += "<tr><th>" + @index[pos].to_s + "</th>"
    html += @labels.map{|label| "<td>" + row[label].to_s + "</td>"}.join
    html += "</tr>"
    html += "<tr><th>...</th>" + "<td>...</td>"*@labels.length + "</tr>" if pos == threshold
  end
  html += "</table>"
end
to_json(*args) click to toggle source

Compartible with Nyaplot::DataFrame.to_json

# File lib/mikon/core/dataframe.rb, line 170
def to_json(*args)
  rows = []
  self.each_row do |row|
    rows.push(row.to_hash)
  end
  rows.to_json
end
to_s(threshold=50) click to toggle source
# File lib/mikon/core/dataframe.rb, line 193
def to_s(threshold=50)
  arr = []
  self.each_row.with_index do |row, pos|
    next nil if pos > threshold && pos != self.length-1
    arr.push({"" => @index[pos]}.merge(row.to_hash))
    if pos == threshold
        arr.push(@labels.reduce({"" => "..."}){|memo, label| memo[label] = "..."; memo})
    end
  end
  Formatador.display_table(arr.select{|el| !(el.nil?)})
end