class Groonga::Command::Load::ArrowTableBuilder

Public Class Methods

new(columns, values) click to toggle source
# File lib/groonga/command/load.rb, line 124
def initialize(columns, values)
  @columns = columns
  @values = values
end

Public Instance Methods

build() click to toggle source
# File lib/groonga/command/load.rb, line 129
def build
  raw_table = build_raw_table
  return nil if raw_table.empty?
  build_arrow_table(raw_table)
end

Private Instance Methods

arrow_weight_vector_data_type(raw_value) click to toggle source
# File lib/groonga/command/load.rb, line 229
def arrow_weight_vector_data_type(raw_value)
  if raw_value.values.any? {|value| value.is_a?(Float)}
    weight_type = :float
  else
    weight_type = :int32
  end
  Arrow::StructDataType.new("value" => :string,
                            "weight" => weight_type)
end
build_arrow_array(data_type, raw_array) click to toggle source
# File lib/groonga/command/load.rb, line 222
def build_arrow_array(data_type, raw_array)
  arrow_list_field = Arrow::Field.new("item", data_type)
  arrow_list_data_type = Arrow::ListDataType.new(arrow_list_field)
  raw_array = prepare_raw_array(raw_array)
  Arrow::ListArrayBuilder.build(arrow_list_data_type, raw_array)
end
build_arrow_table(raw_table) click to toggle source
# File lib/groonga/command/load.rb, line 168
def build_arrow_table(raw_table)
  arrow_fields = []
  arrow_arrays = []
  raw_table.each do |name, raw_array|
    sample = raw_array.find {|element| not element.nil?}
    case sample
    when Array
      data_type = nil
      raw_array.each do |sub_raw_array|
        next if sub_raw_array.nil?
        data_type = detect_arrow_data_type(sub_raw_array)
        break if data_type
      end
      data_type ||= :string
      arrow_array = build_arrow_array(data_type, raw_array)
    when Hash
      data_type = arrow_weight_vector_data_type(sample)
      arrow_array = build_arrow_array(data_type, raw_array)
    else
      data_type = detect_arrow_data_type(raw_array) || :string
      if data_type == :string
        raw_array = raw_array.collect do |element|
          element&.to_s
        end
      end
      data_type = Arrow::DataType.resolve(data_type)
      arrow_array = data_type.build_array(raw_array)
    end
    arrow_fields << Arrow::Field.new(name,
                                     arrow_array.value_data_type)
    arrow_arrays << arrow_array
  end
  arrow_schema = Arrow::Schema.new(arrow_fields)
  Arrow::Table.new(arrow_schema, arrow_arrays)
end
build_raw_table() click to toggle source
# File lib/groonga/command/load.rb, line 136
def build_raw_table
  raw_table = {}
  if @values.first.is_a?(Array)
    columns = @columns
    if columns
      records = @values
    else
      columns = @values.first
      records = @values[1..-1]
    end
    records.each_with_index do |record, i|
      columns.zip(record).each do |name, value|
        raw_table[name] ||= []
        raw_table[name][i] = value
      end
    end
  else
    @values.each_with_index do |record, i|
      record.each do |name, value|
        raw_table[name] ||= []
        raw_table[name][i] = value
      end
    end
    raw_table.each_key do |key|
      if @values.size > raw_table[key].size
        raw_table[key][@values.size - 1] = nil
      end
    end
  end
  raw_table
end
detect_arrow_data_type(raw_array) click to toggle source
# File lib/groonga/command/load.rb, line 239
def detect_arrow_data_type(raw_array)
  type = nil
  raw_array.each do |element|
    case element
    when nil
    when true, false
      type ||= :boolean
    when Integer
      if element >= (2 ** 63)
        type = nil if type == :int64
        type ||= :uint64
      else
        type ||= :int64
      end
    when Float
      type = nil if type == :int64
      type ||= :double
    when Hash
      item_type = arrow_weight_vector_data_type(element)
      arrow_list_field = Arrow::Field.new("item", item_type)
      arrow_list_data_type = Arrow::ListDataType.new(arrow_list_field)
      return arrow_list_data_type
    else
      return :string
    end
  end
  type
end
prepare_raw_array(raw_array) click to toggle source
# File lib/groonga/command/load.rb, line 204
def prepare_raw_array(raw_array)
  raw_array.collect do |element|
    case element
    when Array
      prepare_raw_array(element)
    when Hash
      element.collect do |value, weight|
        {
          "value" => value,
          "weight" => weight,
        }
      end
    else
      element
    end
  end
end