class Groonga::Command::Load::ArrowTableBuilder
Public Class Methods
new(columns, values)
click to toggle source
# File lib/groonga/command/load.rb, line 124 def initialize(columns, values) @columns = columns @values = values end
Public Instance Methods
build()
click to toggle source
# File lib/groonga/command/load.rb, line 129 def build raw_table = build_raw_table return nil if raw_table.empty? build_arrow_table(raw_table) end
Private Instance Methods
arrow_weight_vector_data_type(raw_value)
click to toggle source
# File lib/groonga/command/load.rb, line 229 def arrow_weight_vector_data_type(raw_value) if raw_value.values.any? {|value| value.is_a?(Float)} weight_type = :float else weight_type = :int32 end Arrow::StructDataType.new("value" => :string, "weight" => weight_type) end
build_arrow_array(data_type, raw_array)
click to toggle source
# File lib/groonga/command/load.rb, line 222 def build_arrow_array(data_type, raw_array) arrow_list_field = Arrow::Field.new("item", data_type) arrow_list_data_type = Arrow::ListDataType.new(arrow_list_field) raw_array = prepare_raw_array(raw_array) Arrow::ListArrayBuilder.build(arrow_list_data_type, raw_array) end
build_arrow_table(raw_table)
click to toggle source
# File lib/groonga/command/load.rb, line 168 def build_arrow_table(raw_table) arrow_fields = [] arrow_arrays = [] raw_table.each do |name, raw_array| sample = raw_array.find {|element| not element.nil?} case sample when Array data_type = nil raw_array.each do |sub_raw_array| next if sub_raw_array.nil? data_type = detect_arrow_data_type(sub_raw_array) break if data_type end data_type ||= :string arrow_array = build_arrow_array(data_type, raw_array) when Hash data_type = arrow_weight_vector_data_type(sample) arrow_array = build_arrow_array(data_type, raw_array) else data_type = detect_arrow_data_type(raw_array) || :string if data_type == :string raw_array = raw_array.collect do |element| element&.to_s end end data_type = Arrow::DataType.resolve(data_type) arrow_array = data_type.build_array(raw_array) end arrow_fields << Arrow::Field.new(name, arrow_array.value_data_type) arrow_arrays << arrow_array end arrow_schema = Arrow::Schema.new(arrow_fields) Arrow::Table.new(arrow_schema, arrow_arrays) end
build_raw_table()
click to toggle source
# File lib/groonga/command/load.rb, line 136 def build_raw_table raw_table = {} if @values.first.is_a?(Array) columns = @columns if columns records = @values else columns = @values.first records = @values[1..-1] end records.each_with_index do |record, i| columns.zip(record).each do |name, value| raw_table[name] ||= [] raw_table[name][i] = value end end else @values.each_with_index do |record, i| record.each do |name, value| raw_table[name] ||= [] raw_table[name][i] = value end end raw_table.each_key do |key| if @values.size > raw_table[key].size raw_table[key][@values.size - 1] = nil end end end raw_table end
detect_arrow_data_type(raw_array)
click to toggle source
# File lib/groonga/command/load.rb, line 239 def detect_arrow_data_type(raw_array) type = nil raw_array.each do |element| case element when nil when true, false type ||= :boolean when Integer if element >= (2 ** 63) type = nil if type == :int64 type ||= :uint64 else type ||= :int64 end when Float type = nil if type == :int64 type ||= :double when Hash item_type = arrow_weight_vector_data_type(element) arrow_list_field = Arrow::Field.new("item", item_type) arrow_list_data_type = Arrow::ListDataType.new(arrow_list_field) return arrow_list_data_type else return :string end end type end
prepare_raw_array(raw_array)
click to toggle source
# File lib/groonga/command/load.rb, line 204 def prepare_raw_array(raw_array) raw_array.collect do |element| case element when Array prepare_raw_array(element) when Hash element.collect do |value, weight| { "value" => value, "weight" => weight, } end else element end end end