class Rust::DataFrame

Public Class Methods

new(labels_or_data) click to toggle source
# File lib/rust-core.rb, line 125
def initialize(labels_or_data)
    @data = {}
    
    if labels_or_data.is_a? Array
        @labels = labels_or_data.map { |l| l.to_s }
        @labels.each { |label| @data[label] = [] }
    elsif labels_or_data.is_a? Hash
        @labels = labels_or_data.keys.map { |l| l.to_s }
        
        labels_or_data.each do |key, value|
            @data[key.to_s] = value.clone
        end
    end
end
pull_variable(variable) click to toggle source
# File lib/rust-core.rb, line 116
def self.pull_variable(variable)
    hash = {}
    colnames = Rust._pull("colnames(#{variable})")
    colnames.each do |col|
        hash[col] = Rust._pull("#{variable}$#{col}")
    end
    return DataFrame.new(hash)
end

Public Instance Methods

<<(row)
Alias for: add_row
[](rows, cols=nil) click to toggle source
# File lib/rust-core.rb, line 170
def [](rows, cols=nil)
    raise "You must specify either rows or columns to select" if !rows && !cols
    result = self
    if rows && (rows.is_a?(Range) || rows.is_a?(Array))
        result = result.select_rows { |row, i| rows.include?(i) }
    end
    
    if cols && cols.is_a?(Array)
        cols = cols.map { |c| c.to_s }
        result = result.select_columns(cols)
    end
    
    return result
end
add_column(name, values=nil) { |row| ... } click to toggle source
# File lib/rust-core.rb, line 307
def add_column(name, values=nil)
    raise "Column already exists" if @labels.include?(name)
    raise "Values or block required" if !values && !block_given?
    raise "Number of values not matching" if values && values.size != self.rows
    
    @labels << name
    if values
        @data[name] = values.clone
    else
        @data[name] = []
        self.each_with_index do |row, i|
            @data[name][i] = yield row
        end
    end
end
add_row(row) click to toggle source
# File lib/rust-core.rb, line 284
def add_row(row)
    if row.is_a?(Array)
        raise "Expected an array of size #{@data.size}" unless row.size == @data.size
        
        @labels.each_with_index do |label, i|
            @data[label] << row[i]
        end
        
        return true
    elsif row.is_a?(Hash)
        raise "Expected a hash with the following keys: #{@data.keys}" unless row.keys.map { |l| l.to_s }.sort == @data.keys.sort
        
        row.each do |key, value|
            @data[key.to_s] << value
        end
        
        return true
    else
        raise TypeError, "Expected an Array or a Hash"
    end
end
Also aliased as: <<
aggregate(by, **aggregators) { |column| ... } click to toggle source
# File lib/rust-core.rb, line 474
def aggregate(by, **aggregators)
    raise TypeError, "Expected a string" unless by.is_a?(String)
    raise TypeError, "All the aggregators should be procs" unless aggregators.values.all? { |v| v.is_a?(Proc) }
    raise "Expected a block for default aggregator" unless block_given?
    
    aggregators = aggregators.map { |label, callable| [label.to_s, callable] }.to_h
    
    sorted = self.sort_by(by)
    
    current_value = nil
    partials = []
    partial = nil
    sorted.column(by).each_with_index do |value, index|
        if current_value != value
            current_value = value
            partials << partial if partial
            partial = Rust::DataFrame.new(self.column_names)
        end
        partial << sorted.fast_row(index)
    end
    partials << partial
    
    result = Rust::DataFrame.new(self.column_names)
    partials.each do |partial|
        aggregated_row = {}
        aggregated_row[by] = partial.column(by)[0]
        (self.column_names - [by]).each do |column|
            if aggregators[column]
                aggregated_row[column] = aggregators[column].call(partial.column(column))
            else
                aggregated_row[column] = yield partial.column(column)
            end
        end
        
        result << aggregated_row
    end
    
    return result
end
bind_columns(dataframe) click to toggle source
# File lib/rust-core.rb, line 577
def bind_columns(dataframe)
    result = self.clone
    result.bind_columns!(dataframe)
    return result
end
Also aliased as: cbind
bind_columns!(dataframe) click to toggle source
# File lib/rust-core.rb, line 557
def bind_columns!(dataframe)
    raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
    raise "The number of rows are not compatible" if self.rows != dataframe.rows
    raise "The dataset would override some columns" if (self.column_names & dataframe.column_names).size > 0
    
    dataframe.column_names.each do |column_name|
        self.add_column(column_name, dataframe.column(column_name))
    end
    
    return true
end
Also aliased as: cbind!
bind_rows(dataframe) click to toggle source
# File lib/rust-core.rb, line 570
def bind_rows(dataframe)
    result = self.clone
    result.bind_rows!(dataframe)
    return result
end
Also aliased as: rbind
bind_rows!(dataframe) click to toggle source
# File lib/rust-core.rb, line 545
def bind_rows!(dataframe)
    raise TypeError, "DataFrame expected" unless dataframe.is_a?(DataFrame)
    raise "The columns are not compatible: #{self.column_names - dataframe.column_names} - #{dataframe.column_names - self.column_names}" unless (self.column_names & dataframe.column_names).size == self.columns
    
    dataframe.each do |row|
        self << row
    end
    
    return true
end
Also aliased as: rbind!
cbind(dataframe)
Alias for: bind_columns
cbind!(dataframe)
Alias for: bind_columns!
clone() click to toggle source
# File lib/rust-core.rb, line 584
def clone
    DataFrame.new(@data)
end
colnames()
Alias for: column_names
column(name) click to toggle source
# File lib/rust-core.rb, line 185
def column(name)
    return @data[name]
end
Also aliased as: |
column_names() click to toggle source
# File lib/rust-core.rb, line 271
def column_names
    return @labels.map { |k| k.to_s }
end
Also aliased as: colnames
columns() click to toggle source
# File lib/rust-core.rb, line 280
def columns
    @labels.size
end
delete_column(column) click to toggle source
# File lib/rust-core.rb, line 232
def delete_column(column)
    @labels.delete(column)
    @data.delete(column)
end
delete_row(i) click to toggle source
# File lib/rust-core.rb, line 237
def delete_row(i)
    @data.each do |label, column|
        column.delete_at(i)
    end
end
each() { |element| ... } click to toggle source
# File lib/rust-core.rb, line 323
def each
    self.each_with_index do |element, i|
        yield element
    end
    
    return self
end
each_with_index() { |element, i| ... } click to toggle source
# File lib/rust-core.rb, line 339
def each_with_index
    for i in 0...self.rows
        element = {}
        @labels.each do |label|
            element[label] = @data[label][i]
        end
        
        yield element, i
    end
    
    return self
end
fast_each() { |element| ... } click to toggle source
# File lib/rust-core.rb, line 331
def fast_each
    self.fast_each_with_index do |element, i|
        yield element
    end
    
    return self
end
fast_each_with_index() { |element, i| ... } click to toggle source
# File lib/rust-core.rb, line 352
def fast_each_with_index
    for i in 0...self.rows
        element = []
        @labels.each do |label|
            element << @data[label][i]
        end
        
        yield element, i
    end
    
    return self
end
fast_row(i) click to toggle source
# File lib/rust-core.rb, line 148
def fast_row(i)
    if i < 0 || i >= self.rows
        return nil
    else
        return @labels.map { |label| @data[label][i] }
    end
end
has_row?() { |row, i| ... } click to toggle source
# File lib/rust-core.rb, line 210
def has_row?
    self.each_with_index do |row, i|
        return true if yield row, i
    end
    return false
end
head(n=10) click to toggle source
# File lib/rust-core.rb, line 400
def head(n=10)
    result = DataFrame.new(self.column_names)
    self.each_with_index do |row, i|
        result << row if i < n
    end
    return result
end
inspect() click to toggle source
# File lib/rust-core.rb, line 379
def inspect
    separator = " | "
    col_widths = self.column_names.map { |colname| [colname, ([colname.length] + @data[colname].map { |e| e.inspect.length }).max] }.to_h
    col_widths[:rowscol] = (self.rows - 1).inspect.length + 3
    
    result = ""
    result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
    result << (" " * col_widths[:rowscol]) + self.column_names.map { |colname| (" " * (col_widths[colname] - colname.length)) + colname }.join(separator) + "\n"
    result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
    self.each_with_index do |row, i|
        index_part = "[" + (" " * (col_widths[:rowscol] - i.inspect.length - 3)) + "#{i}] "
        row_part   = row.map { |colname, value| (" " * (col_widths[colname] - value.inspect.length)) + value.inspect }.join(separator)
        
        result << index_part + row_part + "\n"
    end
    
    result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length))
    
    return result
end
load_in_r_as(variable_name) click to toggle source
# File lib/rust-core.rb, line 365
def load_in_r_as(variable_name)
    command = []
    
    command << "#{variable_name} <- data.frame()"
    row_index = 1
    self.each do |row|
        command << "#{variable_name}[#{row_index.to_R}, #{row.keys.to_R}] <- #{row.values.to_R}"
        
        row_index += 1
    end
    
    Rust._eval_big(command)
end
merge(other, by, first_alias = "x", second_alias = "y") click to toggle source
# File lib/rust-core.rb, line 408
def merge(other, by, first_alias = "x", second_alias = "y")
    raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
    raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
    raise "This dataset should have all the columns in #{by}" unless (by & self.column_names).size == by.size
    raise "The passed dataset should have all the columns in #{by}" unless (by & other.column_names).size == by.size
    
    if first_alias == second_alias
        if first_alias == ""
            my_columns = self.column_names - by
            other_columns = other.column_names - by
            intersection = my_columns & other_columns
            raise "Cannot merge because the following columns would overlap: #{intersection}" if intersection.size > 0
        else
            raise "The aliases can not have the same value"
        end
    end
    
    my_keys = {}
    self.each_with_index do |row, i|
        key = []
        by.each do |colname|
            key << row[colname]
        end
        
        my_keys[key] = i
    end
    
    merged_column_self  = (self.column_names - by)
    merged_column_other = (other.column_names - by)
    
    first_alias =  first_alias + "."     if first_alias.length > 0
    second_alias = second_alias + "."    if second_alias.length > 0
    
    merged_columns = merged_column_self.map { |colname| "#{first_alias}#{colname}" } + merged_column_other.map { |colname| "#{second_alias}#{colname}" }
    columns = by + merged_columns
    result = DataFrame.new(columns)
    other.each do |other_row|
        key = []
        by.each do |colname|
            key << other_row[colname]
        end
        
        my_row_index = my_keys[key]
        if my_row_index
            my_row = self.row(my_row_index)
            
            to_add = {}
            by.each do |colname|
                to_add[colname] = my_row[colname]
            end
            
            merged_column_self.each do |colname|
                to_add["#{first_alias}#{colname}"] = my_row[colname]
            end
            
            merged_column_other.each do |colname|
                to_add["#{second_alias}#{colname}"] = other_row[colname]
            end
            
            result << to_add
        end
    end
    
    return result
end
rbind(dataframe)
Alias for: bind_rows
rbind!(dataframe)
Alias for: bind_rows!
rename_column!(old_name, new_name) click to toggle source
# File lib/rust-core.rb, line 190
def rename_column!(old_name, new_name)
    raise "This DataFrame does not contain a column named #{old_name}" unless @labels.include?(old_name)
    raise "This DataFrame already contains a column named #{new_name}" if @labels.include?(new_name)
    
    @data[new_name.to_s] = @data.delete(old_name)
    @labels[@labels.index(old_name)] = new_name
end
row(i) click to toggle source
# File lib/rust-core.rb, line 140
def row(i)
    if i < 0 || i >= self.rows
        return nil
    else
        return @data.map { |label, values| [label, values[i]] }.to_h
    end
end
rows() click to toggle source
# File lib/rust-core.rb, line 276
def rows
    @data.values[0].size
end
select_cols(cols=nil)
Alias for: select_columns
select_columns(cols=nil) { |label| ... } click to toggle source
# File lib/rust-core.rb, line 217
def select_columns(cols=nil)
    raise "You must specify either the columns you want to select or a selection block" if !cols && !block_given?
    
    result = self.clone
    @labels.each do |label|
        if cols
            result.delete_column(label) unless cols.include?(label)
        else
            result.delete_column(label) unless yield label
        end
    end
    return result
end
Also aliased as: select_cols
select_rows() { |row, i| ... } click to toggle source
# File lib/rust-core.rb, line 202
def select_rows
    result = DataFrame.new(self.column_names)
    self.each_with_index do |row, i|
        result << row if yield row, i
    end
    return result
end
shuffle(*args) click to toggle source
# File lib/rust-core.rb, line 156
def shuffle(*args)
    result = DataFrame.new(@labels)
    
    buffer = []
    self.each do |row|
        buffer << row
    end
    buffer.shuffle!(*args).each do |row|
        result << row
    end
    
    return result
end
sort_by(column) click to toggle source
# File lib/rust-core.rb, line 514
def sort_by(column)
    result = self.clone
    result.sort_by!(column)
    return result
end
sort_by!(by) click to toggle source
# File lib/rust-core.rb, line 520
def sort_by!(by)
    copy = @data[by].clone
    copy.sort!
    
    indices = []
    @data[by].each_with_index do |value, i|
        index = copy.index(value)
        indices << index
        
        copy[index] = NilClass
    end
                
    (self.column_names - [by]).each do |column_name|
        sorted = []
        column = self.column(column_name)
        column_i = 0
        indices.each do |i|
            sorted[i] = column[column_i]
            column_i += 1
        end
        @data[column_name] = sorted
    end
    @data[by].sort!
end
transform_column!(column) { |e| ... } click to toggle source
# File lib/rust-core.rb, line 198
def transform_column!(column)
    @data[column].map! { |e| yield e }
end
uniq_by(by) click to toggle source
# File lib/rust-core.rb, line 243
def uniq_by(by)
    result = self.clone
    result.uniq_by!(by)
    return result
end
uniq_by!(by) click to toggle source
# File lib/rust-core.rb, line 249
def uniq_by!(by)
    my_keys = {}
    to_delete = []
    self.each_with_index do |row, i|
        key = []
        by.each do |colname|
            key << row[colname]
        end
        unless my_keys[key]
            my_keys[key] = i
        else
            to_delete << (i-to_delete.size)
        end
    end
    
    to_delete.each do |i|
        self.delete_row(i)
    end
    
    return self
end
|(name)
Alias for: column