class RBHive::TCLISchemaDefinition

Constants

INFINITY
NAN
TYPES

Attributes

schema[R]

Public Class Methods

new(schema, example_row) click to toggle source
   # File lib/rbhive/t_c_l_i_schema_definition.rb
20 def initialize(schema, example_row)
21   @schema = schema
22   @example_row = example_row ? example_row.colVals : []
23 end

Public Instance Methods

coerce_column(column_name, value) click to toggle source
   # File lib/rbhive/t_c_l_i_schema_definition.rb
67 def coerce_column(column_name, value)
68   type = column_type_map[column_name]
69   return INFINITY if (type != :string && value == "Infinity")
70   return NAN if (type != :string && value == "NaN")
71   return coerce_complex_value(value) if type.to_s =~ /^array/
72   conversion_method = TYPES[type]
73   conversion_method ? value.send(conversion_method) : value
74 end
coerce_complex_value(value) click to toggle source
   # File lib/rbhive/t_c_l_i_schema_definition.rb
80 def coerce_complex_value(value)
81   return nil if value.nil?
82   return nil if value.length == 0
83   return nil if value == 'null'
84   JSON.parse(value)
85 end
coerce_row(row) click to toggle source
   # File lib/rbhive/t_c_l_i_schema_definition.rb
60 def coerce_row(row)
61   column_names.zip(row.colVals.map(&:get_value).map(&:value)).inject({}) do |hsh, (column_name, value)|
62     hsh[column_name] = coerce_column(column_name, value)
63     hsh
64   end
65 end
coerce_row_to_array(row) click to toggle source
   # File lib/rbhive/t_c_l_i_schema_definition.rb
76 def coerce_row_to_array(row)
77   column_names.map { |n| row[n] }
78 end
column_names() click to toggle source
   # File lib/rbhive/t_c_l_i_schema_definition.rb
25 def column_names
26   @column_names ||= begin
27     schema_names = @schema.columns.map {|c| c.columnName }
28 
29     # In rare cases Hive can return two identical column names
30     # consider SELECT a.foo, b.foo...
31     # in this case you get two columns called foo with no disambiguation.
32     # as a (far from ideal) solution we detect this edge case and rename them
33     # a.foo => foo1, b.foo => foo2
34     # otherwise we will trample one of the columns during Hash mapping.
35     s = Hash.new(0)
36     schema_names.map! { |c| s[c] += 1; s[c] > 1 ? "#{c}---|---#{s[c]}" : c }
37     schema_names.map! { |c| s[c] > 1 ? "#{c}---|---1" : c }
38     schema_names.map! { |c| c.gsub('---|---', '_').to_sym }
39 
40     # Lets fix the fact that Hive doesn't return schema data for partitions on SELECT * queries
41     # For now we will call them :_p1, :_p2, etc. to avoid collisions.
42     offset = 0
43     while schema_names.length < @example_row.length
44       schema_names.push(:"_p#{offset+=1}")
45     end
46     schema_names
47   end
48 end
column_type_map() click to toggle source
   # File lib/rbhive/t_c_l_i_schema_definition.rb
50 def column_type_map
51   @column_type_map ||= column_names.inject({}) do |hsh, c|
52     definition = @schema.columns.find {|s| s.columnName.to_sym == c }
53     # If the column isn't in the schema (eg partitions in SELECT * queries) assume they are strings
54     type = TYPE_NAMES[definition.typeDesc.types.first.primitiveEntry.type].downcase rescue nil
55     hsh[c] = definition && type ? type.to_sym : :string
56     hsh
57   end
58 end