class Rust::DataFrame
Public Class Methods
new(labels_or_data)
click to toggle source
# File lib/rust-core.rb, line 125 def initialize(labels_or_data) @data = {} if labels_or_data.is_a? Array @labels = labels_or_data.map { |l| l.to_s } @labels.each { |label| @data[label] = [] } elsif labels_or_data.is_a? Hash @labels = labels_or_data.keys.map { |l| l.to_s } labels_or_data.each do |key, value| @data[key.to_s] = value.clone end end end
pull_variable(variable)
click to toggle source
# File lib/rust-core.rb, line 116 def self.pull_variable(variable) hash = {} colnames = Rust._pull("colnames(#{variable})") colnames.each do |col| hash[col] = Rust._pull("#{variable}$#{col}") end return DataFrame.new(hash) end
Public Instance Methods
[](rows, cols=nil)
click to toggle source
# File lib/rust-core.rb, line 170 def [](rows, cols=nil) raise "You must specify either rows or columns to select" if !rows && !cols result = self if rows && (rows.is_a?(Range) || rows.is_a?(Array)) result = result.select_rows { |row, i| rows.include?(i) } end if cols && cols.is_a?(Array) cols = cols.map { |c| c.to_s } result = result.select_columns(cols) end return result end
add_column(name, values=nil) { |row| ... }
click to toggle source
# File lib/rust-core.rb, line 307 def add_column(name, values=nil) raise "Column already exists" if @labels.include?(name) raise "Values or block required" if !values && !block_given? raise "Number of values not matching" if values && values.size != self.rows @labels << name if values @data[name] = values.clone else @data[name] = [] self.each_with_index do |row, i| @data[name][i] = yield row end end end
add_row(row)
click to toggle source
# File lib/rust-core.rb, line 284 def add_row(row) if row.is_a?(Array) raise "Expected an array of size #{@data.size}" unless row.size == @data.size @labels.each_with_index do |label, i| @data[label] << row[i] end return true elsif row.is_a?(Hash) raise "Expected a hash with the following keys: #{@data.keys}" unless row.keys.map { |l| l.to_s }.sort == @data.keys.sort row.each do |key, value| @data[key.to_s] << value end return true else raise TypeError, "Expected an Array or a Hash" end end
Also aliased as: <<
aggregate(by, **aggregators) { |column| ... }
click to toggle source
# File lib/rust-core.rb, line 474 def aggregate(by, **aggregators) raise TypeError, "Expected a string" unless by.is_a?(String) raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) } raise "Expected a block for default aggregator" unless block_given? aggregators = aggregators.map { |label, callable| [label.to_s, callable] }.to_h sorted = self.sort_by(by) current_value = nil partials = [] partial = nil sorted.column(by).each_with_index do |value, index| if current_value != value current_value = value partials << partial if partial partial = Rust::DataFrame.new(self.column_names) end partial << sorted.fast_row(index) end partials << partial result = Rust::DataFrame.new(self.column_names) partials.each do |partial| aggregated_row = {} aggregated_row[by] = partial.column(by)[0] (self.column_names - [by]).each do |column| if aggregators[column] aggregated_row[column] = aggregators[column].call(partial.column(column)) else aggregated_row[column] = yield partial.column(column) end end result << aggregated_row end return result end
bind_columns(dataframe)
click to toggle source
# File lib/rust-core.rb, line 577 def bind_columns(dataframe) result = self.clone result.bind_columns!(dataframe) return result end
Also aliased as: cbind
bind_columns!(dataframe)
click to toggle source
# File lib/rust-core.rb, line 557 def bind_columns!(dataframe) raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame) raise "The number of rows are not compatible" if self.rows != dataframe.rows raise "The dataset would override some columns" if (self.column_names & dataframe.column_names).size > 0 dataframe.column_names.each do |column_name| self.add_column(column_name, dataframe.column(column_name)) end return true end
Also aliased as: cbind!
bind_rows(dataframe)
click to toggle source
# File lib/rust-core.rb, line 570 def bind_rows(dataframe) result = self.clone result.bind_rows!(dataframe) return result end
Also aliased as: rbind
bind_rows!(dataframe)
click to toggle source
# File lib/rust-core.rb, line 545 def bind_rows!(dataframe) raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame) raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns dataframe.each do |row| self << row end return true end
Also aliased as: rbind!
clone()
click to toggle source
# File lib/rust-core.rb, line 584 def clone DataFrame.new(@data) end
column(name)
click to toggle source
# File lib/rust-core.rb, line 185 def column(name) return @data[name] end
Also aliased as: |
column_names()
click to toggle source
# File lib/rust-core.rb, line 271 def column_names return @labels.map { |k| k.to_s } end
Also aliased as: colnames
columns()
click to toggle source
# File lib/rust-core.rb, line 280 def columns @labels.size end
delete_column(column)
click to toggle source
# File lib/rust-core.rb, line 232 def delete_column(column) @labels.delete(column) @data.delete(column) end
delete_row(i)
click to toggle source
# File lib/rust-core.rb, line 237 def delete_row(i) @data.each do |label, column| column.delete_at(i) end end
each() { |element| ... }
click to toggle source
# File lib/rust-core.rb, line 323 def each self.each_with_index do |element, i| yield element end return self end
each_with_index() { |element, i| ... }
click to toggle source
# File lib/rust-core.rb, line 339 def each_with_index for i in 0...self.rows element = {} @labels.each do |label| element[label] = @data[label][i] end yield element, i end return self end
fast_each() { |element| ... }
click to toggle source
# File lib/rust-core.rb, line 331 def fast_each self.fast_each_with_index do |element, i| yield element end return self end
fast_each_with_index() { |element, i| ... }
click to toggle source
# File lib/rust-core.rb, line 352 def fast_each_with_index for i in 0...self.rows element = [] @labels.each do |label| element << @data[label][i] end yield element, i end return self end
fast_row(i)
click to toggle source
# File lib/rust-core.rb, line 148 def fast_row(i) if i < 0 || i >= self.rows return nil else return @labels.map { |label| @data[label][i] } end end
has_row?() { |row, i| ... }
click to toggle source
# File lib/rust-core.rb, line 210 def has_row? self.each_with_index do |row, i| return true if yield row, i end return false end
head(n=10)
click to toggle source
# File lib/rust-core.rb, line 400 def head(n=10) result = DataFrame.new(self.column_names) self.each_with_index do |row, i| result << row if i < n end return result end
inspect()
click to toggle source
# File lib/rust-core.rb, line 379 def inspect separator = " | " col_widths = self.column_names.map { |colname| [colname, ([colname.length] + @data[colname].map { |e| e.inspect.length }).max] }.to_h col_widths[:rowscol] = (self.rows - 1).inspect.length + 3 result = "" result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n" result << (" " * col_widths[:rowscol]) + self.column_names.map { |colname| (" " * (col_widths[colname] - colname.length)) + colname }.join(separator) + "\n" result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n" self.each_with_index do |row, i| index_part = "[" + (" " * (col_widths[:rowscol] - i.inspect.length - 3)) + "#{i}] " row_part = row.map { |colname, value| (" " * (col_widths[colname] - value.inspect.length)) + value.inspect }.join(separator) result << index_part + row_part + "\n" end result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) return result end
load_in_r_as(variable_name)
click to toggle source
# File lib/rust-core.rb, line 365 def load_in_r_as(variable_name) command = [] command << "#{variable_name} <- data.frame()" row_index = 1 self.each do |row| command << "#{variable_name}[#{row_index.to_R}, #{row.keys.to_R}] <- #{row.values.to_R}" row_index += 1 end Rust._eval_big(command) end
merge(other, by, first_alias = "x", second_alias = "y")
click to toggle source
# File lib/rust-core.rb, line 408 def merge(other, by, first_alias = "x", second_alias = "y") raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame) raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) } raise "This dataset should have all the columns in #{by}" unless (by & self.column_names).size == by.size raise "The passed dataset should have all the columns in #{by}" unless (by & other.column_names).size == by.size if first_alias == second_alias if first_alias == "" my_columns = self.column_names - by other_columns = other.column_names - by intersection = my_columns & other_columns raise "Cannot merge because the following columns would overlap: #{intersection}" if intersection.size > 0 else raise "The aliases can not have the same value" end end my_keys = {} self.each_with_index do |row, i| key = [] by.each do |colname| key << row[colname] end my_keys[key] = i end merged_column_self = (self.column_names - by) merged_column_other = (other.column_names - by) first_alias = first_alias + "." if first_alias.length > 0 second_alias = second_alias + "." if second_alias.length > 0 merged_columns = merged_column_self.map { |colname| "#{first_alias}#{colname}" } + merged_column_other.map { |colname| "#{second_alias}#{colname}" } columns = by + merged_columns result = DataFrame.new(columns) other.each do |other_row| key = [] by.each do |colname| key << other_row[colname] end my_row_index = my_keys[key] if my_row_index my_row = self.row(my_row_index) to_add = {} by.each do |colname| to_add[colname] = my_row[colname] end merged_column_self.each do |colname| to_add["#{first_alias}#{colname}"] = my_row[colname] end merged_column_other.each do |colname| to_add["#{second_alias}#{colname}"] = other_row[colname] end result << to_add end end return result end
rename_column!(old_name, new_name)
click to toggle source
# File lib/rust-core.rb, line 190 def rename_column!(old_name, new_name) raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name) raise "This DataFrame already contains a column named #{new_name}" if @labels.include?(new_name) @data[new_name.to_s] = @data.delete(old_name) @labels[@labels.index(old_name)] = new_name end
row(i)
click to toggle source
# File lib/rust-core.rb, line 140 def row(i) if i < 0 || i >= self.rows return nil else return @data.map { |label, values| [label, values[i]] }.to_h end end
rows()
click to toggle source
# File lib/rust-core.rb, line 276 def rows @data.values[0].size end
select_columns(cols=nil) { |label| ... }
click to toggle source
# File lib/rust-core.rb, line 217 def select_columns(cols=nil) raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given? result = self.clone @labels.each do |label| if cols result.delete_column(label) unless cols.include?(label) else result.delete_column(label) unless yield label end end return result end
Also aliased as: select_cols
select_rows() { |row, i| ... }
click to toggle source
# File lib/rust-core.rb, line 202 def select_rows result = DataFrame.new(self.column_names) self.each_with_index do |row, i| result << row if yield row, i end return result end
shuffle(*args)
click to toggle source
# File lib/rust-core.rb, line 156 def shuffle(*args) result = DataFrame.new(@labels) buffer = [] self.each do |row| buffer << row end buffer.shuffle!(*args).each do |row| result << row end return result end
sort_by(column)
click to toggle source
# File lib/rust-core.rb, line 514 def sort_by(column) result = self.clone result.sort_by!(column) return result end
sort_by!(by)
click to toggle source
# File lib/rust-core.rb, line 520 def sort_by!(by) copy = @data[by].clone copy.sort! indices = [] @data[by].each_with_index do |value, i| index = copy.index(value) indices << index copy[index] = NilClass end (self.column_names - [by]).each do |column_name| sorted = [] column = self.column(column_name) column_i = 0 indices.each do |i| sorted[i] = column[column_i] column_i += 1 end @data[column_name] = sorted end @data[by].sort! end
transform_column!(column) { |e| ... }
click to toggle source
# File lib/rust-core.rb, line 198 def transform_column!(column) @data[column].map! { |e| yield e } end
uniq_by(by)
click to toggle source
# File lib/rust-core.rb, line 243 def uniq_by(by) result = self.clone result.uniq_by!(by) return result end
uniq_by!(by)
click to toggle source
# File lib/rust-core.rb, line 249 def uniq_by!(by) my_keys = {} to_delete = [] self.each_with_index do |row, i| key = [] by.each do |colname| key << row[colname] end unless my_keys[key] my_keys[key] = i else to_delete << (i-to_delete.size) end end to_delete.each do |i| self.delete_row(i) end return self end