module Sequel::Impala::DatabaseMethods
Public Instance Methods
# File lib/sequel/adapters/shared/impala.rb 25 def compute_stats(table_name) 26 run(compute_stats_sql(table_name)) 27 end
Do not use a composite primary key, foreign keys, or an index when creating a join table, as Impala
doesn't support those.
# File lib/sequel/adapters/shared/impala.rb 12 def create_join_table(hash, options=OPTS) 13 keys = hash.keys.sort_by(&:to_s) 14 create_table(join_table_name(hash, options), options) do 15 keys.each do |key| 16 Integer key 17 end 18 end 19 end
Create a database/schema in Imapala.
Options:
- :if_not_exists
-
Don't raise an error if the schema already exists.
- :location
-
Set the file system location to store the data for tables in the created schema.
Examples:
create_schema(:s) # CREATE SCHEMA `s` create_schema(:s, :if_not_exists=>true) # CREATE SCHEMA IF NOT EXISTS `s` create_schema(:s, :location=>'/a/b') # CREATE SCHEMA `s` LOCATION '/a/b'
# File lib/sequel/adapters/shared/impala.rb 46 def create_schema(schema, options=OPTS) 47 run(create_schema_sql(schema, options)) 48 end
# File lib/sequel/adapters/shared/impala.rb 50 def create_table(name, options=OPTS) 51 super 52 if im = options[:invalidate_metadata] 53 invalidate_metadata((name unless im == :all)) 54 end 55 end
Set the database_type
for this database to :impala.
# File lib/sequel/adapters/shared/impala.rb 58 def database_type 59 :impala 60 end
Return the DESCRIBE output for the table, showing table columns, types, and comments. If the :formatted option is given, use DESCRIBE FORMATTED and return a lot more information about the table. Both of these return arrays of hashes.
Examples:
describe(:t) # DESCRIBE `t` describe(:t, :formatted=>true) # DESCRIBE FORMATTED `t`
# File lib/sequel/adapters/shared/impala.rb 75 def describe(table, opts=OPTS) 76 if ds = opts[:dataset] 77 ds = ds.naked 78 else 79 ds = dataset 80 end 81 ds.with_sql("DESCRIBE #{'FORMATTED ' if opts[:formatted]} ?", table).all 82 end
Drop a database/schema from Imapala.
Options:
- :if_exists
-
Don't raise an error if the schema doesn't exist.
Examples:
drop_schema(:s) # DROP SCHEMA `s` create_schema(:s, :if_exists=>true) # DROP SCHEMA IF EXISTS `s`
# File lib/sequel/adapters/shared/impala.rb 96 def drop_schema(schema, options=OPTS) 97 run(drop_schema_sql(schema, options)) 98 end
# File lib/sequel/adapters/shared/impala.rb 100 def drop_table(*names) 101 # CASCADE isn't a supported option in Impala 102 if names.last.is_a?(Hash) 103 names.last.delete(:cascade) 104 end 105 super 106 end
Implicitly quailfy the table if using the :search_path option. This will look at all of the tables and views in the schemas, and if an unqualified table is used and appears in one of the schemas, it will be implicitly qualified with the given schema name.
# File lib/sequel/adapters/shared/impala.rb 113 def implicit_qualify(table) 114 return table unless opts[:search_path] 115 116 case table 117 when Symbol 118 s, t, a = Sequel.split_symbol(table) 119 if s 120 return table 121 end 122 t = implicit_qualify(t) 123 a ? Sequel.as(t, a) : t 124 when String 125 if schema = search_path_table_schemas[table] 126 Sequel.qualify(schema, table) 127 else 128 invalidate_table_schemas 129 if schema = search_path_table_schemas[table] 130 Sequel.qualify(schema, table) 131 else 132 Sequel.identifier(table) 133 end 134 end 135 when SQL::Identifier 136 implicit_qualify(table.value.to_s) 137 when SQL::AliasedExpression 138 SQL::AliasedExpression.new(implicit_qualify(table.expression), table.alias) 139 else 140 table 141 end 142 end
Invalidate the metadata for the given table, or for all tables if no argument is given.
# File lib/sequel/adapters/shared/impala.rb 146 def invalidate_metadata(identifier=nil) 147 run("INVALIDATE METADATA #{quote_schema_table(identifier) if identifier}") 148 end
# File lib/sequel/adapters/shared/impala.rb 218 def invalidate_table_schemas 219 @search_path_table_schemas = nil 220 end
Load data from HDFS into Impala
.
Options:
- :overwrite
-
Overwrite the existing table instead of appending to it.
Examples:
load_data('/user/foo', :bar) LOAD DATA INPATH '/user/foo' INTO TABLE `bar` load_data('/user/foo', :bar, :overwrite=>true) LOAD DATA INPATH '/user/foo' OVERWRITE INTO TABLE `bar`
# File lib/sequel/adapters/shared/impala.rb 162 def load_data(path, table, options=OPTS) 163 run(load_data_sql(path, table, options)) 164 end
# File lib/sequel/adapters/shared/impala.rb 21 def refresh(table_name) 22 run(refresh_sql(table_name)) 23 end
Sets options in the current db connection for each key/value pair
# File lib/sequel/adapters/shared/impala.rb 223 def set(opts) 224 set_sql(opts).each do |sql| 225 run(sql) 226 end 227 end
Impala
supports CREATE TABLE IF NOT EXISTS.
# File lib/sequel/adapters/shared/impala.rb 173 def supports_create_table_if_not_exists? 174 true 175 end
Impala
does not support foreign keys.
# File lib/sequel/adapters/shared/impala.rb 178 def supports_foreign_key_parsing? 179 false 180 end
Impala
does not support indexes.
# File lib/sequel/adapters/shared/impala.rb 183 def supports_index_parsing? 184 false 185 end
Check that the tables returned by the JDBC
driver are actually valid tables and not views. The Hive2
JDBC
driver returns views when listing tables and nothing when listing views.
# File lib/sequel/adapters/shared/impala.rb 190 def tables(opts=OPTS) 191 _tables(opts).select{|t| is_valid_table?(t, opts)} 192 end
Impala
doesn't support transactions, so instead of issuing a transaction, just checkout a connection. This ensures the same connection is used for the transaction block, but as Impala
doesn't support transactions, you can't rollback.
# File lib/sequel/adapters/shared/impala.rb 198 def transaction(opts=OPTS) 199 synchronize(opts[:server]) do |c| 200 yield c 201 end 202 end
Creates a dataset that uses the VALUES clause:
DB.values([[1, 2], [3, 4]]) VALUES ((1, 2), (3, 4))
# File lib/sequel/adapters/shared/impala.rb 214 def values(v) 215 @default_dataset.clone(:values=>v) 216 end
Determine the available views for listing all tables via JDBC
(which includes both tables and views), and removing all valid tables.
# File lib/sequel/adapters/shared/impala.rb 206 def views(opts=OPTS) 207 _tables(opts).reject{|t| is_valid_table?(t, opts)} 208 end
Private Instance Methods
Metadata queries on JDBC
use uppercase keys, so set the identifier output method to downcase so that metadata queries work correctly.
# File lib/sequel/adapters/shared/impala.rb 357 def _metadata_dataset 358 super.with_extend do 359 def output_identifier(v) 360 v.downcase.to_sym 361 end 362 end 363 end
# File lib/sequel/adapters/shared/impala.rb 231 def _tables(opts) 232 m = output_identifier_meth 233 metadata_dataset.with_sql("SHOW TABLES#{" IN #{quote_identifier(opts[:schema])}" if opts[:schema]}"). 234 select_map(:name).map do |table| 235 m.call(table) 236 end 237 end
Impala
uses ADD COLUMNS instead of ADD COLUMN. As its use of ADD COLUMNS implies, it supports adding multiple columns at once, but this adapter doesn't offer an API for that.
# File lib/sequel/adapters/shared/impala.rb 242 def alter_table_add_column_sql(table, op) 243 "ADD COLUMNS (#{column_definition_sql(op)})" 244 end
Impala
uses CHANGE instead of having separate RENAME syntax for renaming tables. As CHANGE requires a type, look up the type from the database schema.
# File lib/sequel/adapters/shared/impala.rb 249 def alter_table_rename_column_sql(table, op) 250 old_name = op[:name] 251 opts = schema(table).find{|x| x.first == old_name} 252 opts = opts ? opts.last : {} 253 unless opts[:db_type] 254 raise Error, "cannot determine database type to use for CHANGE COLUMN operation" 255 end 256 new_col = op.merge(:type=>opts[:db_type], :name=>op[:new_name]) 257 "CHANGE #{quote_identifier(old_name)} #{column_definition_sql(new_col)}" 258 end
# File lib/sequel/adapters/shared/impala.rb 260 def alter_table_set_column_type_sql(table, op) 261 "CHANGE #{quote_identifier(op[:name])} #{column_definition_sql(op)}" 262 end
Add COMMENT when defining the column, if :comment is present.
# File lib/sequel/adapters/shared/impala.rb 265 def column_definition_comment_sql(sql, column) 266 sql << " COMMENT #{literal(column[:comment])}" if column[:comment] 267 end
# File lib/sequel/adapters/shared/impala.rb 269 def column_definition_order 270 [:comment] 271 end
# File lib/sequel/adapters/shared/impala.rb 315 def compute_stats_sql(table_name) 316 "COMPUTE STATS #{quote_schema_table(table_name)}" 317 end
# File lib/sequel/adapters/shared/impala.rb 273 def create_schema_sql(schema, options) 274 "CREATE SCHEMA #{'IF NOT EXISTS ' if options[:if_not_exists]}#{quote_identifier(schema)}#{" LOCATION #{literal(options[:location])}" if options[:location]}" 275 end
Support using table parameters for CREATE TABLE AS, necessary for creating parquet files from datasets.
# File lib/sequel/adapters/shared/impala.rb 279 def create_table_as_sql(name, sql, options) 280 "#{create_table_prefix_sql(name, options)}#{create_table_parameters_sql(options) } AS #{sql}" 281 end
# File lib/sequel/adapters/shared/impala.rb 293 def create_table_parameters_sql(options) 294 sql = String.new 295 sql << " COMMENT #{literal(options[:comment])}" if options[:comment] 296 if options[:field_term] || options[:line_term] 297 sql << " ROW FORMAT DELIMITED" 298 if options[:field_term] 299 sql << " FIELDS TERMINATED BY #{literal(options[:field_term])}" 300 sql << " ESCAPED BY #{literal(options[:field_escape])}" if options[:field_escape] 301 end 302 if options[:line_term] 303 sql << " LINES TERMINATED BY #{literal(options[:line_term])}" 304 end 305 end 306 sql << " STORED AS #{options[:stored_as]}" if options[:stored_as] 307 sql << " LOCATION #{literal(options[:location])}" if options[:location] 308 sql 309 end
# File lib/sequel/adapters/shared/impala.rb 283 def create_table_prefix_sql(name, options) 284 "CREATE #{'EXTERNAL ' if options[:external]}TABLE#{' IF NOT EXISTS' if options[:if_not_exists]} #{quote_schema_table(name)}" 285 end
# File lib/sequel/adapters/shared/impala.rb 287 def create_table_sql(name, generator, options) 288 sql = super 289 sql += create_table_parameters_sql(options) 290 sql 291 end
# File lib/sequel/adapters/shared/impala.rb 319 def drop_schema_sql(schema, options) 320 "DROP SCHEMA #{'IF EXISTS ' if options[:if_exists]}#{quote_identifier(schema)}#{' CASCADE' if options[:cascade]}" 321 end
# File lib/sequel/adapters/shared/impala.rb 414 def force_database(conn, database) 415 if database 416 log_connection_execute(conn, "USE #{database}") 417 end 418 conn 419 end
SHOW TABLE STATS will raise an error if given a view and not a table, so use that to differentiate tables from views.
# File lib/sequel/adapters/shared/impala.rb 339 def is_valid_table?(t, opts=OPTS) 340 t = Sequel.qualify(opts[:schema], t) if opts[:schema] 341 rows = describe(t, :formatted=>true) 342 if row = rows.find{|r| r[:name].to_s.strip == 'Table Type:'} 343 row[:type].to_s.strip !~ /VIEW/ 344 end 345 rescue Sequel::DatabaseError 346 # This can be raised for Hive tables that Impala returns via SHOW TABLES, 347 # but which it raises an exception when you try to DESCRIBE them. 348 false 349 end
# File lib/sequel/adapters/shared/impala.rb 351 def load_data_sql(path, table, options) 352 "LOAD DATA INPATH #{literal(path)}#{' OVERWRITE' if options[:overwrite]} INTO TABLE #{literal(table)}" 353 end
# File lib/sequel/adapters/shared/impala.rb 359 def output_identifier(v) 360 v.downcase.to_sym 361 end
# File lib/sequel/adapters/shared/impala.rb 311 def refresh_sql(table_name) 312 "REFRESH #{quote_schema_table(table_name)}" 313 end
# File lib/sequel/adapters/shared/impala.rb 323 def search_path_table_schemas 324 @search_path_table_schemas ||= begin 325 search_path = opts[:search_path] 326 search_path = search_path.split(',') if search_path.is_a?(String) 327 table_schemas = {} 328 search_path.reverse_each do |schema| 329 _tables(:schema=>schema).each do |table| 330 table_schemas[table.to_s] = schema.to_s 331 end 332 end 333 table_schemas 334 end 335 end
# File lib/sequel/adapters/shared/impala.rb 410 def set_sql(opts) 411 opts.map { |k, v| "SET #{k}=#{v}" } 412 end
Impala
doesn't like the word “biginteger”
# File lib/sequel/adapters/shared/impala.rb 376 def type_literal_generic_bignum(column) 377 :bigint 378 end
Impala
doesn't like the word “biginteger”
# File lib/sequel/adapters/shared/impala.rb 371 def type_literal_generic_bignum_symbol(column) 372 :bigint 373 end
Impala
doesn't support date columns yet, so use timestamp until date is natively supported.
# File lib/sequel/adapters/shared/impala.rb 382 def type_literal_generic_date(column) 383 :timestamp 384 end
Impala
uses double instead of “double precision” for floating point values.
# File lib/sequel/adapters/shared/impala.rb 388 def type_literal_generic_float(column) 389 :double 390 end
Impala
doesn't like the word “integer”
# File lib/sequel/adapters/shared/impala.rb 366 def type_literal_generic_integer(column) 367 :int 368 end
Impala
uses decimal instead of numeric for arbitrary precision numeric values.
# File lib/sequel/adapters/shared/impala.rb 394 def type_literal_generic_numeric(column) 395 column[:size] ? "decimal(#{Array(column[:size]).join(', ')})" : :decimal 396 end
Use char or varchar if given a size, otherwise use string. Using a size is not recommend, as Impala
doesn't implicitly cast string values to char or varchar, and doesn't implicitly cast from different sizes of varchar.
# File lib/sequel/adapters/shared/impala.rb 402 def type_literal_generic_string(column) 403 if size = column[:size] 404 "#{'var' unless column[:fixed]}char(#{size})" 405 else 406 :string 407 end 408 end