module DatasetsArrow::Arrowable
Public Instance Methods
each_record_batch() { |record_batch| ... }
click to toggle source
# File lib/datasets-arrow/arrowable.rb, line 22 def each_record_batch(&block) return to_enum(__method__) unless block_given? data_path = arrow_data_path if data_path.exist? input = Arrow::MemoryMappedInputStream.new(data_path.to_path) reader = Arrow::RecordBatchFileReader.new(input) reader.each do |record_batch| record_batch.instance_variable_set(:@input, input) yield(record_batch) end else to_arrow.each_record_batch(&block) end end
to_arrow()
click to toggle source
# File lib/datasets-arrow/arrowable.rb, line 5 def to_arrow data_path = arrow_data_path if data_path.exist? Arrow::Table.load(data_path) else raw_table = {} to_table.to_h.each do |name, values| raw_table[name] = Arrow::ArrayBuilder.build(values) end table = Arrow::Table.new(raw_table) directory = data_path.parent directory.mkpath unless directory.exist? table.save(data_path) table end end
Private Instance Methods
arrow_data_path()
click to toggle source
# File lib/datasets-arrow/arrowable.rb, line 39 def arrow_data_path cache_dir_path + "data.arrow" end