module Sequel::Impala::DatasetMethods
Constants
- APOS
- BACKTICK
- BOOL_FALSE
- BOOL_TRUE
- CONSTANT_LITERAL_MAP
- EXCEPT_SOURCE_COLUMN
- EXCEPT_STRATEGIES
- NOT
- PAREN_CLOSE
- PAREN_OPEN
- REGEXP
- SELECT_VALUES
- SPACE
- STRING_ESCAPE_RE
- STRING_ESCAPE_REPLACE
Public Instance Methods
Handle string concatenation using the concat string function. Don't use the ESCAPE syntax when using LIKE/NOT LIKE, as Impala
doesn't support escaping LIKE metacharacters. Support regexps on Impala
using the REGEXP
operator. For cast insensitive regexps, cast both values to uppercase first.
# File lib/sequel/adapters/shared/impala.rb 448 def complex_expression_sql_append(sql, op, args) 449 case op 450 when :'||' 451 literal_append(sql, Sequel.function(:concat, *args)) 452 when :LIKE, :'NOT LIKE' 453 sql << PAREN_OPEN 454 literal_append(sql, args.at(0)) 455 sql << SPACE << op.to_s << SPACE 456 literal_append(sql, args.at(1)) 457 sql << PAREN_CLOSE 458 when :~, :'!~', :'~*', :'!~*' 459 if op == :'~*' || op == :'!~*' 460 args = args.map{|a| Sequel.function(:upper, a)} 461 end 462 sql << NOT if op == :'!~' || op == :'!~*' 463 sql << PAREN_OPEN 464 literal_append(sql, args.at(0)) 465 sql << REGEXP 466 literal_append(sql, args.at(1)) 467 sql << PAREN_CLOSE 468 else 469 super 470 end 471 end
Use now() for current timestamp, as Impala
doesn't support CURRENT_TIMESTAMP.
# File lib/sequel/adapters/shared/impala.rb 475 def constant_sql_append(sql, constant) 476 sql << CONSTANT_LITERAL_MAP.fetch(constant, constant.to_s) 477 end
Use the addition operator combined with interval types to handle date arithmetic when using the date_arithmetic extension.
# File lib/sequel/adapters/shared/impala.rb 482 def date_add_sql_append(sql, da) 483 h = da.interval 484 expr = da.expr 485 intervals = [] 486 each_valid_interval_unit(h, Sequel::SQL::DateAdd::DatasetMethods::DEF_DURATION_UNITS) do |value, sql_unit| 487 intervals << Sequel.lit("INTERVAL #{value} #{sql_unit}") 488 end 489 if intervals.empty? 490 return literal_append(sql, Sequel.cast(expr, Time)) 491 else 492 intervals.unshift(Sequel.cast(expr, Time)) 493 return complex_expression_sql_append(sql, :+, intervals) 494 end 495 end
DELETE is emulated on Impala
and doesn't return the number of modified rows.
# File lib/sequel/adapters/shared/impala.rb 499 def delete 500 super 501 nil 502 end
Emulate DELETE using INSERT OVERWRITE selecting all columns from the table, with a reversed condition used for WHERE.
# File lib/sequel/adapters/shared/impala.rb 506 def delete_sql 507 return @opts[:prepared_sql] if @opts[:prepared_sql] 508 sql = @opts[:append_sql] || sql_string_origin 509 sql << "INSERT OVERWRITE " 510 source_list_append(sql, opts[:from]) 511 sql << " SELECT * FROM " 512 source_list_append(sql, opts[:from]) 513 if where = opts[:where] 514 sql << " WHERE NOT (" 515 literal_append(sql, where) 516 sql << ")" 517 else 518 sql << " WHERE false" 519 end 520 sql 521 end
Don't remove an order, because that breaks things when offsets are used, as Impala
requires an order when using an offset.
# File lib/sequel/adapters/shared/impala.rb 542 def empty? 543 get(Sequel::SQL::AliasedExpression.new(1, :one)).nil? 544 end
Emulate EXCEPT using a chosen strategy and checking for values in only the first table.
# File lib/sequel/adapters/shared/impala.rb 547 def except(other, opts=OPTS) 548 raise(InvalidOperation, "EXCEPT ALL not supported") if opts[:all] 549 raise(InvalidOperation, "The :from_self=>false option to except is not supported") if opts[:from_self] == false 550 551 strategy, *keys = @opts[:except_strategy] 552 ds = from_self(:alias=>:t1) 553 554 ds = case strategy 555 when :not_exists 556 ds.exclude(other. 557 from_self(:alias=>:t2). 558 where(keys.map{|key| [Sequel.qualify(:t1, key), Sequel.qualify(:t2, key)]}). 559 select(nil). 560 exists) 561 when :not_in 562 raise Sequel::Error, ":not_in EXCEPT strategy only supports a single key" unless keys.length == 1 563 key = keys.first 564 ds.exclude(Sequel.qualify(:t1, key)=>other.from_self(:alias=>:t2).select(key)) 565 when :left_join 566 ds.left_join(other.from_self(:alias=>:t2).as(:t2), keys.map{|key| [key, key]}). 567 where(Sequel.or(keys.map{|key| [Sequel.qualify(:t2, key), nil]})). 568 select_all(:t1) 569 else 570 cols = columns 571 rhs = other.from_self.select_group(*other.columns).select_append(Sequel.expr(2).as(EXCEPT_SOURCE_COLUMN)) 572 ds.select_group(*cols). 573 select_append(Sequel.expr(1).as(EXCEPT_SOURCE_COLUMN)). 574 union(rhs, all: true). 575 select_group(*cols). 576 having{{count.function.* => 1, min(EXCEPT_SOURCE_COLUMN) => 1}} 577 end 578 579 ds.from_self(opts) 580 end
The strategy to use for EXCEPT emulation. By default, uses a GROUP BY emulation, as that doesn't require you provide a key column, but you can use this to choose a NOT
EXISTS, NOT
IN, or LEFT JOIN emulation, providing the unique key column.
# File lib/sequel/adapters/shared/impala.rb 585 def except_strategy(strategy, *keys) 586 raise Sequel::Error, "invalid EXCEPT strategy: #{strategy.inspect}" unless EXCEPT_STRATEGIES.include?(strategy) 587 clone(:except_strategy=>[strategy, *keys]) 588 end
Implicitly qualify tables if using the :search_path database option.
# File lib/sequel/adapters/shared/impala.rb 524 def from(*) 525 ds = super 526 ds.clone(:from => ds.opts[:from].map{|t| db.implicit_qualify(t)}) 527 end
Use INSERT OVERWRITE instead of INSERT INTO when inserting into this dataset:
DB[:table].insert_overwrite.insert(DB[:other]) # INSERT OVERWRITE table SELECT * FROM other
# File lib/sequel/adapters/shared/impala.rb 594 def insert_overwrite 595 clone(:insert_overwrite=>true) 596 end
Impala
does not support INSERT DEFAULT VALUES.
# File lib/sequel/adapters/shared/impala.rb 599 def insert_supports_empty_values? 600 false 601 end
Emulate INTERSECT using a join and checking for values in both tables.
# File lib/sequel/adapters/shared/impala.rb 604 def intersect(other, opts=OPTS) 605 raise(InvalidOperation, "INTERSECT ALL not supported") if opts[:all] 606 raise(InvalidOperation, "The :from_self=>false option to intersect is not supported") if opts[:from_self] == false 607 raise(Error, "Attempt to INTERSECT on dataset with no columns: #{inspect}") if columns.empty? 608 raise(Error, "Attempt to INTERSECT other dataset with no columns: #{other.inspect}") if other.columns.empty? 609 610 cols = columns.zip(other.columns) 611 from_self(alias: :l) 612 .join(other){|lj, j, _| Sequel.&(*cols.map{|c1,c2| Sequel.expr(Sequel.qualify(lj, c2)=>Sequel.qualify(j, c1)) | {Sequel.qualify(lj, c2)=>nil, Sequel.qualify(j, c1)=>nil}})} 613 .select_all(:l) 614 .distinct 615 .from_self(opts) 616 end
Implicitly qualify tables if using the :search_path database option.
# File lib/sequel/adapters/shared/impala.rb 530 def join_table(type, table, expr=nil, options=OPTS, &block) 531 super(type, db.implicit_qualify(table), expr, options, &block) 532 end
Impala
supports non-recursive common table expressions.
# File lib/sequel/adapters/shared/impala.rb 619 def supports_cte?(type=:select) 620 true 621 end
# File lib/sequel/adapters/shared/impala.rb 623 def supports_cte_in_subqueries? 624 true 625 end
Impala
doesn't support derived column lists when aliasing tables.
# File lib/sequel/adapters/shared/impala.rb 629 def supports_derived_column_lists? 630 false 631 end
Impala
doesn't support EXCEPT or INTERSECT, but support is emulated for them. However, EXCEPT ALL and INTERSECT ALL are not emulated.
# File lib/sequel/adapters/shared/impala.rb 635 def supports_intersect_except_all? 636 false 637 end
Impala
only support IS NULL, not IS TRUE or IS FALSE.
# File lib/sequel/adapters/shared/impala.rb 640 def supports_is_true? 641 false 642 end
Impala
doesn't support IN when used with multiple columns.
# File lib/sequel/adapters/shared/impala.rb 645 def supports_multiple_column_in? 646 false 647 end
Impala
supports window functions.
# File lib/sequel/adapters/shared/impala.rb 655 def supports_window_functions? 656 true 657 end
Create a parquet file from this dataset. table
should be the table name to create. To specify a path for the parquet file, use the :location option.
Examples:
DB[:t].to_parquet(:p) # CREATE TABLE `p` STORED AS parquet AS # SELECT * FROM `t` DB[:t].to_parquet(:p, :location=>'/a/b') # CREATE TABLE `p` STORED AS parquet LOCATION '/a/b' # SELECT * FROM `t`
# File lib/sequel/adapters/shared/impala.rb 672 def to_parquet(table, options=OPTS) 673 db.create_table(table, options.merge(:as=>self, :stored_as=>:parquet)) 674 end
Emulate TRUNCATE by using INSERT OVERWRITE selecting all columns from the table, with WHERE false.
# File lib/sequel/adapters/shared/impala.rb 536 def truncate_sql 537 unfiltered.delete_sql 538 end
UPDATE is emulated on Impala
, and returns nil instead of the number of modified rows
# File lib/sequel/adapters/shared/impala.rb 678 def update(values=OPTS) 679 super 680 nil 681 end
Emulate UPDATE using INSERT OVERWRITE AS SELECT. For all columns used in the given values
, use a CASE statement. In the CASE statement, set the value to the new value if the row matches WHERE conditions of the current dataset, otherwise use the existing value.
# File lib/sequel/adapters/shared/impala.rb 687 def update_sql(values) 688 sql = String.new 689 sql << "INSERT OVERWRITE " 690 source_list_append(sql, opts[:from]) 691 sql << " SELECT " 692 comma = false 693 694 if where = opts[:where] 695 where = Sequel.lit(literal(where)) 696 else 697 where = true 698 end 699 700 select_all.columns.each do |c| 701 if comma 702 sql << comma 703 else 704 comma = ', ' 705 end 706 707 if values.has_key?(c) 708 new_value = values[c] 709 literal_append(sql, Sequel.case({where=>new_value}, c).as(c)) 710 else 711 quote_identifier_append(sql, c) 712 end 713 end 714 sql << " FROM " 715 source_list_append(sql, opts[:from]) 716 sql 717 end
# File lib/sequel/adapters/shared/impala.rb 719 def with(name, dataset, opts={}) 720 if has_cte?(dataset) 721 s, ds = hoist_cte(dataset) 722 s.with(name, ds, opts) 723 else 724 super 725 end 726 end
# File lib/sequel/adapters/shared/impala.rb 728 def with_recursive(name, nonrecursive, recursive, opts={}) 729 if has_cte?(nonrecursive) 730 s, ds = hoist_cte(nonrecursive) 731 s.with_recursive(name, ds, recursive, opts) 732 elsif has_cte?(recursive) 733 s, ds = hoist_cte(recursive) 734 s.with_recursive(name, nonrecursive, ds, opts) 735 else 736 super 737 end 738 end
Protected Instance Methods
Add the dataset to the list of compounds
# File lib/sequel/adapters/shared/impala.rb 743 def compound_clone(type, dataset, opts) 744 if has_cte?(dataset) 745 s, ds = hoist_cte(dataset) 746 s.compound_clone(type, ds, opts) 747 else 748 super 749 end 750 end
Private Instance Methods
# File lib/sequel/adapters/shared/impala.rb 754 def has_cte?(ds) 755 ds.is_a?(Dataset) && ds.opts[:with] 756 end
# File lib/sequel/adapters/shared/impala.rb 772 def insert_into_sql(sql) 773 sql << (@opts[:insert_overwrite] ? ' OVERWRITE ' : ' INTO ') 774 identifier_append(sql, unaliased_identifier(@opts[:from].first)) 775 end
# File lib/sequel/adapters/shared/impala.rb 768 def literal_false 769 BOOL_FALSE 770 end
Double backslashes in all strings, and escape all apostrophes with backslashes.
# File lib/sequel/adapters/shared/impala.rb 779 def literal_string_append(sql, s) 780 sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE, STRING_ESCAPE_REPLACE) << APOS 781 end
# File lib/sequel/adapters/shared/impala.rb 764 def literal_true 765 BOOL_TRUE 766 end
# File lib/sequel/adapters/shared/impala.rb 783 def multi_insert_sql_strategy 784 :values 785 end
Impala
doesn't support esacping of identifiers, so you can't use backtick in an identifier name.
# File lib/sequel/adapters/shared/impala.rb 789 def quoted_identifier_append(sql, name) 790 sql << BACKTICK << name.to_s << BACKTICK 791 end
Don't include a LIMIT clause if there is no FROM clause. In general, such queries can only return 1 row.
# File lib/sequel/adapters/shared/impala.rb 795 def select_limit_sql(sql) 796 return unless opts[:from] 797 super 798 end
Support VALUES clause instead of the SELECT clause to return rows.
# File lib/sequel/adapters/shared/impala.rb 802 def select_values_sql(sql) 803 sql << SELECT_VALUES 804 expression_list_append(sql, opts[:values]) 805 end