module Sequel::Impala::DatasetMethods
Constants
- APOS
- BACKTICK
- BOOL_FALSE
- BOOL_TRUE
- CONSTANT_LITERAL_MAP
- EXCEPT_SOURCE_COLUMN
- EXCEPT_STRATEGIES
- NOT
- PAREN_CLOSE
- PAREN_OPEN
- REGEXP
- SELECT_VALUES
- SPACE
- STRING_ESCAPE_RE
- STRING_ESCAPE_REPLACE
Public Instance Methods
Handle string concatenation using the concat string function. Don't use the ESCAPE syntax when using LIKE/NOT LIKE, as Impala
doesn't support escaping LIKE metacharacters. Support regexps on Impala
using the REGEXP
operator. For cast insensitive regexps, cast both values to uppercase first.
# File lib/sequel/adapters/shared/impala.rb 432 def complex_expression_sql_append(sql, op, args) 433 case op 434 when :'||' 435 literal_append(sql, Sequel.function(:concat, *args)) 436 when :LIKE, :'NOT LIKE' 437 sql << PAREN_OPEN 438 literal_append(sql, args.at(0)) 439 sql << SPACE << op.to_s << SPACE 440 literal_append(sql, args.at(1)) 441 sql << PAREN_CLOSE 442 when :~, :'!~', :'~*', :'!~*' 443 if op == :'~*' || op == :'!~*' 444 args = args.map{|a| Sequel.function(:upper, a)} 445 end 446 sql << NOT if op == :'!~' || op == :'!~*' 447 sql << PAREN_OPEN 448 literal_append(sql, args.at(0)) 449 sql << REGEXP 450 literal_append(sql, args.at(1)) 451 sql << PAREN_CLOSE 452 else 453 super 454 end 455 end
Use now() for current timestamp, as Impala
doesn't support CURRENT_TIMESTAMP.
# File lib/sequel/adapters/shared/impala.rb 459 def constant_sql_append(sql, constant) 460 sql << CONSTANT_LITERAL_MAP.fetch(constant, constant.to_s) 461 end
Use the addition operator combined with interval types to handle date arithmetic when using the date_arithmetic extension.
# File lib/sequel/adapters/shared/impala.rb 466 def date_add_sql_append(sql, da) 467 h = da.interval 468 expr = da.expr 469 intervals = [] 470 each_valid_interval_unit(h, Sequel::SQL::DateAdd::DatasetMethods::DEF_DURATION_UNITS) do |value, sql_unit| 471 intervals << Sequel.lit("INTERVAL #{value} #{sql_unit}") 472 end 473 if intervals.empty? 474 return literal_append(sql, Sequel.cast(expr, Time)) 475 else 476 intervals.unshift(Sequel.cast(expr, Time)) 477 return complex_expression_sql_append(sql, :+, intervals) 478 end 479 end
DELETE is emulated on Impala
and doesn't return the number of modified rows.
# File lib/sequel/adapters/shared/impala.rb 483 def delete 484 super 485 nil 486 end
Emulate DELETE using INSERT OVERWRITE selecting all columns from the table, with a reversed condition used for WHERE.
# File lib/sequel/adapters/shared/impala.rb 490 def delete_sql 491 sql = "INSERT OVERWRITE " 492 source_list_append(sql, opts[:from]) 493 sql << " SELECT * FROM " 494 source_list_append(sql, opts[:from]) 495 if where = opts[:where] 496 sql << " WHERE NOT (" 497 literal_append(sql, where) 498 sql << ")" 499 else 500 sql << " WHERE false" 501 end 502 sql 503 end
Don't remove an order, because that breaks things when offsets are used, as Impala
requires an order when using an offset.
# File lib/sequel/adapters/shared/impala.rb 527 def empty? 528 get(Sequel::SQL::AliasedExpression.new(1, :one)).nil? 529 end
Emulate EXCEPT using a chosen strategy and checking for values in only the first table.
# File lib/sequel/adapters/shared/impala.rb 532 def except(other, opts=OPTS) 533 raise(InvalidOperation, "EXCEPT ALL not supported") if opts[:all] 534 raise(InvalidOperation, "The :from_self=>false option to except is not supported") if opts[:from_self] == false 535 536 strategy, *keys = @opts[:except_strategy] 537 ds = from_self(:alias=>:t1) 538 539 ds = case strategy 540 when :not_exists 541 ds.exclude(other. 542 from_self(:alias=>:t2). 543 where(keys.map{|key| [Sequel.qualify(:t1, key), Sequel.qualify(:t2, key)]}). 544 select(nil). 545 exists) 546 when :not_in 547 raise Sequel::Error, ":not_in EXCEPT strategy only supports a single key" unless keys.length == 1 548 key = keys.first 549 ds.exclude(Sequel.qualify(:t1, key)=>other.from_self(:alias=>:t2).select(key)) 550 when :left_join 551 ds.left_join(other.from_self(:alias=>:t2).as(:t2), keys.map{|key| [key, key]}). 552 where(Sequel.or(keys.map{|key| [Sequel.qualify(:t2, key), nil]})). 553 select_all(:t1) 554 else 555 cols = columns 556 rhs = other.from_self.select_group(*other.columns).select_append(Sequel.expr(2).as(EXCEPT_SOURCE_COLUMN)) 557 ds.select_group(*cols). 558 select_append(Sequel.expr(1).as(EXCEPT_SOURCE_COLUMN)). 559 union(rhs, all: true). 560 select_group(*cols). 561 having{{count{}.* => 1, min(EXCEPT_SOURCE_COLUMN) => 1}} 562 end 563 564 ds.from_self(opts) 565 end
The strategy to use for EXCEPT emulation. By default, uses a GROUP BY emulation, as that doesn't require you provide a key column, but you can use this to choose a NOT
EXISTS, NOT
IN, or LEFT JOIN emulation, providing the unique key column.
# File lib/sequel/adapters/shared/impala.rb 570 def except_strategy(strategy, *keys) 571 raise Sequel::Error, "invalid EXCEPT strategy: #{strategy.inspect}" unless EXCEPT_STRATEGIES.include?(strategy) 572 clone(:except_strategy=>[strategy, *keys]) 573 end
Implicitly qualify tables if using the :search_path database option.
# File lib/sequel/adapters/shared/impala.rb 506 def from(*) 507 ds = super 508 ds.opts[:from].map!{|t| db.implicit_qualify(t)} 509 ds 510 end
Use INSERT OVERWRITE instead of INSERT INTO when inserting into this dataset:
DB[:table].insert_overwrite.insert(DB[:other]) # INSERT OVERWRITE table SELECT * FROM other
# File lib/sequel/adapters/shared/impala.rb 579 def insert_overwrite 580 clone(:insert_overwrite=>true) 581 end
Impala
does not support INSERT DEFAULT VALUES.
# File lib/sequel/adapters/shared/impala.rb 584 def insert_supports_empty_values? 585 false 586 end
Emulate INTERSECT using a UNION ALL and checking for values in both tables.
# File lib/sequel/adapters/shared/impala.rb 589 def intersect(other, opts=OPTS) 590 raise(InvalidOperation, "INTERSECT ALL not supported") if opts[:all] 591 raise(InvalidOperation, "The :from_self=>false option to intersect is not supported") if opts[:from_self] == false 592 cols = columns 593 (from_self(alias: :l) 594 .join(other, cols) 595 .select_all(:l)) 596 .from_self(opts) 597 end
Implicitly qualify tables if using the :search_path database option.
# File lib/sequel/adapters/shared/impala.rb 513 def join_table(type, table, expr=nil, options=OPTS, &block) 514 super(type, db.implicit_qualify(table), expr, options, &block) 515 end
Impala
supports non-recursive common table expressions.
# File lib/sequel/adapters/shared/impala.rb 600 def supports_cte?(type=:select) 601 true 602 end
# File lib/sequel/adapters/shared/impala.rb 604 def supports_cte_in_subqueries? 605 true 606 end
Impala
doesn't support derived column lists when aliasing tables.
# File lib/sequel/adapters/shared/impala.rb 610 def supports_derived_column_lists? 611 false 612 end
Impala
doesn't support EXCEPT or INTERSECT, but support is emulated for them. However, EXCEPT ALL and INTERSECT ALL are not emulated.
# File lib/sequel/adapters/shared/impala.rb 616 def supports_intersect_except_all? 617 false 618 end
Impala
only support IS NULL, not IS TRUE or IS FALSE.
# File lib/sequel/adapters/shared/impala.rb 621 def supports_is_true? 622 false 623 end
Impala
doesn't support IN when used with multiple columns.
# File lib/sequel/adapters/shared/impala.rb 626 def supports_multiple_column_in? 627 false 628 end
Impala
supports window functions.
# File lib/sequel/adapters/shared/impala.rb 636 def supports_window_functions? 637 true 638 end
Create a parquet file from this dataset. table
should be the table name to create. To specify a path for the parquet file, use the :location option.
Examples:
DB[:t].to_parquet(:p) # CREATE TABLE `p` STORED AS parquet AS # SELECT * FROM `t` DB[:t].to_parquet(:p, :location=>'/a/b') # CREATE TABLE `p` STORED AS parquet LOCATION '/a/b' # SELECT * FROM `t`
# File lib/sequel/adapters/shared/impala.rb 653 def to_parquet(table, options=OPTS) 654 db.create_table(table, options.merge(:as=>self, :stored_as=>:parquet)) 655 end
Emulate TRUNCATE by using INSERT OVERWRITE selecting all columns from the table, with WHERE false.
# File lib/sequel/adapters/shared/impala.rb 519 def truncate_sql 520 ds = clone 521 ds.opts.delete(:where) 522 ds.delete_sql 523 end
UPDATE is emulated on Impala
, and returns nil instead of the number of modified rows
# File lib/sequel/adapters/shared/impala.rb 659 def update(values=OPTS) 660 super 661 nil 662 end
Emulate UPDATE using INSERT OVERWRITE AS SELECT. For all columns used in the given values
, use a CASE statement. In the CASE statement, set the value to the new value if the row matches WHERE conditions of the current dataset, otherwise use the existing value.
# File lib/sequel/adapters/shared/impala.rb 668 def update_sql(values) 669 sql = "INSERT OVERWRITE " 670 source_list_append(sql, opts[:from]) 671 sql << " SELECT " 672 comma = false 673 674 if where = opts[:where] 675 where = Sequel.lit(literal(where)) 676 else 677 where = true 678 end 679 680 select_all.columns.each do |c| 681 if comma 682 sql << comma 683 else 684 comma = ', ' 685 end 686 687 if values.has_key?(c) 688 new_value = values[c] 689 literal_append(sql, Sequel.case({where=>new_value}, c).as(c)) 690 else 691 quote_identifier_append(sql, c) 692 end 693 end 694 sql << " FROM " 695 source_list_append(sql, opts[:from]) 696 sql 697 end
Private Instance Methods
# File lib/sequel/adapters/shared/impala.rb 715 def insert_into_sql(sql) 716 sql << (@opts[:insert_overwrite] ? ' OVERWRITE ' : ' INTO ') 717 identifier_append(sql, unaliased_identifier(@opts[:from].first)) 718 end
# File lib/sequel/adapters/shared/impala.rb 711 def literal_false 712 BOOL_FALSE 713 end
Double backslashes in all strings, and escape all apostrophes with backslashes.
# File lib/sequel/adapters/shared/impala.rb 722 def literal_string_append(sql, s) 723 sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE, STRING_ESCAPE_REPLACE) << APOS 724 end
# File lib/sequel/adapters/shared/impala.rb 707 def literal_true 708 BOOL_TRUE 709 end
# File lib/sequel/adapters/shared/impala.rb 726 def multi_insert_sql_strategy 727 :values 728 end
Impala
doesn't support esacping of identifiers, so you can't use backtick in an identifier name.
# File lib/sequel/adapters/shared/impala.rb 732 def quoted_identifier_append(sql, name) 733 sql << BACKTICK << name.to_s << BACKTICK 734 end
Don't include a LIMIT clause if there is no FROM clause. In general, such queries can only return 1 row.
# File lib/sequel/adapters/shared/impala.rb 738 def select_limit_sql(sql) 739 return unless opts[:from] 740 super 741 end
Support VALUES clause instead of the SELECT clause to return rows.
# File lib/sequel/adapters/shared/impala.rb 745 def select_values_sql(sql) 746 sql << SELECT_VALUES 747 expression_list_append(sql, opts[:values]) 748 end