class RBHive::TCLISchemaDefinition
Constants
- INFINITY
- NAN
- TYPES
Attributes
schema[R]
Public Class Methods
new(schema, example_row)
click to toggle source
# File lib/rbhive/t_c_l_i_schema_definition.rb 20 def initialize(schema, example_row) 21 @schema = schema 22 @example_row = example_row ? example_row.colVals : [] 23 end
Public Instance Methods
coerce_column(column_name, value)
click to toggle source
# File lib/rbhive/t_c_l_i_schema_definition.rb 67 def coerce_column(column_name, value) 68 type = column_type_map[column_name] 69 return INFINITY if (type != :string && value == "Infinity") 70 return NAN if (type != :string && value == "NaN") 71 return coerce_complex_value(value) if type.to_s =~ /^array/ 72 conversion_method = TYPES[type] 73 conversion_method ? value.send(conversion_method) : value 74 end
coerce_complex_value(value)
click to toggle source
# File lib/rbhive/t_c_l_i_schema_definition.rb 80 def coerce_complex_value(value) 81 return nil if value.nil? 82 return nil if value.length == 0 83 return nil if value == 'null' 84 JSON.parse(value) 85 end
coerce_row(row)
click to toggle source
# File lib/rbhive/t_c_l_i_schema_definition.rb 60 def coerce_row(row) 61 column_names.zip(row.colVals.map(&:get_value).map(&:value)).inject({}) do |hsh, (column_name, value)| 62 hsh[column_name] = coerce_column(column_name, value) 63 hsh 64 end 65 end
coerce_row_to_array(row)
click to toggle source
# File lib/rbhive/t_c_l_i_schema_definition.rb 76 def coerce_row_to_array(row) 77 column_names.map { |n| row[n] } 78 end
column_names()
click to toggle source
# File lib/rbhive/t_c_l_i_schema_definition.rb 25 def column_names 26 @column_names ||= begin 27 schema_names = @schema.columns.map {|c| c.columnName } 28 29 # In rare cases Hive can return two identical column names 30 # consider SELECT a.foo, b.foo... 31 # in this case you get two columns called foo with no disambiguation. 32 # as a (far from ideal) solution we detect this edge case and rename them 33 # a.foo => foo1, b.foo => foo2 34 # otherwise we will trample one of the columns during Hash mapping. 35 s = Hash.new(0) 36 schema_names.map! { |c| s[c] += 1; s[c] > 1 ? "#{c}---|---#{s[c]}" : c } 37 schema_names.map! { |c| s[c] > 1 ? "#{c}---|---1" : c } 38 schema_names.map! { |c| c.gsub('---|---', '_').to_sym } 39 40 # Lets fix the fact that Hive doesn't return schema data for partitions on SELECT * queries 41 # For now we will call them :_p1, :_p2, etc. to avoid collisions. 42 offset = 0 43 while schema_names.length < @example_row.length 44 schema_names.push(:"_p#{offset+=1}") 45 end 46 schema_names 47 end 48 end
column_type_map()
click to toggle source
# File lib/rbhive/t_c_l_i_schema_definition.rb 50 def column_type_map 51 @column_type_map ||= column_names.inject({}) do |hsh, c| 52 definition = @schema.columns.find {|s| s.columnName.to_sym == c } 53 # If the column isn't in the schema (eg partitions in SELECT * queries) assume they are strings 54 type = TYPE_NAMES[definition.typeDesc.types.first.primitiveEntry.type].downcase rescue nil 55 hsh[c] = definition && type ? type.to_sym : :string 56 hsh 57 end 58 end