module Sequel::Impala::DatasetMethods

Constants

APOS
BACKTICK
BOOL_FALSE
BOOL_TRUE
CONSTANT_LITERAL_MAP
EXCEPT_SOURCE_COLUMN
EXCEPT_STRATEGIES
NOT
PAREN_CLOSE
PAREN_OPEN
REGEXP
SELECT_VALUES
SPACE
STRING_ESCAPE_RE
STRING_ESCAPE_REPLACE

Public Instance Methods

complex_expression_sql_append(sql, op, args) click to toggle source

Handle string concatenation using the concat string function. Don't use the ESCAPE syntax when using LIKE/NOT LIKE, as Impala doesn't support escaping LIKE metacharacters. Support regexps on Impala using the REGEXP operator. For cast insensitive regexps, cast both values to uppercase first.

Calls superclass method
    # File lib/sequel/adapters/shared/impala.rb
432 def complex_expression_sql_append(sql, op, args)
433   case op
434   when :'||'
435     literal_append(sql, Sequel.function(:concat, *args))
436   when :LIKE, :'NOT LIKE'
437     sql << PAREN_OPEN
438     literal_append(sql, args.at(0))
439     sql << SPACE << op.to_s << SPACE
440     literal_append(sql, args.at(1))
441     sql << PAREN_CLOSE
442   when :~, :'!~', :'~*', :'!~*'
443     if op == :'~*'  || op == :'!~*'
444       args = args.map{|a| Sequel.function(:upper, a)}
445     end
446     sql << NOT if op == :'!~'  || op == :'!~*'
447     sql << PAREN_OPEN
448     literal_append(sql, args.at(0))
449     sql << REGEXP
450     literal_append(sql, args.at(1))
451     sql << PAREN_CLOSE
452   else
453     super
454   end
455 end
constant_sql_append(sql, constant) click to toggle source

Use now() for current timestamp, as Impala doesn't support CURRENT_TIMESTAMP.

    # File lib/sequel/adapters/shared/impala.rb
459 def constant_sql_append(sql, constant)
460   sql << CONSTANT_LITERAL_MAP.fetch(constant, constant.to_s)
461 end
date_add_sql_append(sql, da) click to toggle source

Use the addition operator combined with interval types to handle date arithmetic when using the date_arithmetic extension.

    # File lib/sequel/adapters/shared/impala.rb
466 def date_add_sql_append(sql, da)
467   h = da.interval
468   expr = da.expr
469   intervals = []
470   each_valid_interval_unit(h, Sequel::SQL::DateAdd::DatasetMethods::DEF_DURATION_UNITS) do |value, sql_unit|
471     intervals << Sequel.lit("INTERVAL #{value} #{sql_unit}")
472   end
473   if intervals.empty?
474     return literal_append(sql, Sequel.cast(expr, Time))
475   else
476     intervals.unshift(Sequel.cast(expr, Time))
477     return complex_expression_sql_append(sql, :+, intervals)
478   end
479 end
delete() click to toggle source

DELETE is emulated on Impala and doesn't return the number of modified rows.

Calls superclass method
    # File lib/sequel/adapters/shared/impala.rb
483 def delete
484   super
485   nil
486 end
delete_sql() click to toggle source

Emulate DELETE using INSERT OVERWRITE selecting all columns from the table, with a reversed condition used for WHERE.

    # File lib/sequel/adapters/shared/impala.rb
490 def delete_sql
491   sql = "INSERT OVERWRITE "
492   source_list_append(sql, opts[:from])
493   sql << " SELECT * FROM "
494   source_list_append(sql, opts[:from])
495   if where = opts[:where]
496     sql << " WHERE NOT ("
497     literal_append(sql, where)
498     sql << ")"
499   else
500     sql << " WHERE false"
501   end
502   sql
503 end
empty?() click to toggle source

Don't remove an order, because that breaks things when offsets are used, as Impala requires an order when using an offset.

    # File lib/sequel/adapters/shared/impala.rb
527 def empty?
528   get(Sequel::SQL::AliasedExpression.new(1, :one)).nil?
529 end
except(other, opts=OPTS) click to toggle source

Emulate EXCEPT using a chosen strategy and checking for values in only the first table.

    # File lib/sequel/adapters/shared/impala.rb
532 def except(other, opts=OPTS)
533   raise(InvalidOperation, "EXCEPT ALL not supported") if opts[:all]
534   raise(InvalidOperation, "The :from_self=>false option to except is not supported") if opts[:from_self] == false
535 
536   strategy, *keys = @opts[:except_strategy]
537   ds = from_self(:alias=>:t1)
538 
539   ds = case strategy
540   when :not_exists
541     ds.exclude(other.
542         from_self(:alias=>:t2).
543         where(keys.map{|key| [Sequel.qualify(:t1, key), Sequel.qualify(:t2, key)]}).
544         select(nil).
545         exists)
546   when :not_in
547     raise Sequel::Error, ":not_in EXCEPT strategy only supports a single key" unless keys.length == 1
548     key = keys.first
549     ds.exclude(Sequel.qualify(:t1, key)=>other.from_self(:alias=>:t2).select(key))
550   when :left_join
551     ds.left_join(other.from_self(:alias=>:t2).as(:t2), keys.map{|key| [key, key]}).
552       where(Sequel.or(keys.map{|key| [Sequel.qualify(:t2, key), nil]})).
553       select_all(:t1)
554   else
555     cols = columns
556     rhs = other.from_self.select_group(*other.columns).select_append(Sequel.expr(2).as(EXCEPT_SOURCE_COLUMN))
557     ds.select_group(*cols).
558       select_append(Sequel.expr(1).as(EXCEPT_SOURCE_COLUMN)).
559       union(rhs, all: true).
560       select_group(*cols).
561       having{{count{}.* => 1, min(EXCEPT_SOURCE_COLUMN) => 1}}
562   end
563 
564   ds.from_self(opts)
565 end
except_strategy(strategy, *keys) click to toggle source

The strategy to use for EXCEPT emulation. By default, uses a GROUP BY emulation, as that doesn't require you provide a key column, but you can use this to choose a NOT EXISTS, NOT IN, or LEFT JOIN emulation, providing the unique key column.

    # File lib/sequel/adapters/shared/impala.rb
570 def except_strategy(strategy, *keys)
571   raise Sequel::Error, "invalid EXCEPT strategy: #{strategy.inspect}" unless EXCEPT_STRATEGIES.include?(strategy)
572   clone(:except_strategy=>[strategy, *keys])
573 end
from(*) click to toggle source

Implicitly qualify tables if using the :search_path database option.

Calls superclass method
    # File lib/sequel/adapters/shared/impala.rb
506 def from(*)
507   ds = super
508   ds.opts[:from].map!{|t| db.implicit_qualify(t)}
509   ds
510 end
insert_overwrite() click to toggle source

Use INSERT OVERWRITE instead of INSERT INTO when inserting into this dataset:

DB[:table].insert_overwrite.insert(DB[:other])
# INSERT OVERWRITE table SELECT * FROM other
    # File lib/sequel/adapters/shared/impala.rb
579 def insert_overwrite
580   clone(:insert_overwrite=>true)
581 end
insert_supports_empty_values?() click to toggle source

Impala does not support INSERT DEFAULT VALUES.

    # File lib/sequel/adapters/shared/impala.rb
584 def insert_supports_empty_values?
585   false
586 end
intersect(other, opts=OPTS) click to toggle source

Emulate INTERSECT using a UNION ALL and checking for values in both tables.

    # File lib/sequel/adapters/shared/impala.rb
589 def intersect(other, opts=OPTS)
590   raise(InvalidOperation, "INTERSECT ALL not supported") if opts[:all]
591   raise(InvalidOperation, "The :from_self=>false option to intersect is not supported") if opts[:from_self] == false
592   cols = columns
593   (from_self(alias: :l)
594     .join(other, cols)
595     .select_all(:l))
596     .from_self(opts)
597 end
join_table(type, table, expr=nil, options=OPTS, &block) click to toggle source

Implicitly qualify tables if using the :search_path database option.

Calls superclass method
    # File lib/sequel/adapters/shared/impala.rb
513 def join_table(type, table, expr=nil, options=OPTS, &block)
514   super(type, db.implicit_qualify(table), expr, options, &block)
515 end
supports_cte?(type=:select) click to toggle source

Impala supports non-recursive common table expressions.

    # File lib/sequel/adapters/shared/impala.rb
600 def supports_cte?(type=:select)
601   true
602 end
supports_cte_in_subqueries?() click to toggle source
    # File lib/sequel/adapters/shared/impala.rb
604 def supports_cte_in_subqueries?
605   true
606 end
supports_derived_column_lists?() click to toggle source

Impala doesn't support derived column lists when aliasing tables.

    # File lib/sequel/adapters/shared/impala.rb
610 def supports_derived_column_lists?
611   false
612 end
supports_intersect_except_all?() click to toggle source

Impala doesn't support EXCEPT or INTERSECT, but support is emulated for them. However, EXCEPT ALL and INTERSECT ALL are not emulated.

    # File lib/sequel/adapters/shared/impala.rb
616 def supports_intersect_except_all?
617   false
618 end
supports_is_true?() click to toggle source

Impala only support IS NULL, not IS TRUE or IS FALSE.

    # File lib/sequel/adapters/shared/impala.rb
621 def supports_is_true?
622   false
623 end
supports_multiple_column_in?() click to toggle source

Impala doesn't support IN when used with multiple columns.

    # File lib/sequel/adapters/shared/impala.rb
626 def supports_multiple_column_in?
627   false
628 end
supports_regexp?() click to toggle source

Impala supports regexps using the REGEXP operator.

    # File lib/sequel/adapters/shared/impala.rb
631 def supports_regexp?
632   true
633 end
supports_window_functions?() click to toggle source

Impala supports window functions.

    # File lib/sequel/adapters/shared/impala.rb
636 def supports_window_functions?
637   true
638 end
to_parquet(table, options=OPTS) click to toggle source

Create a parquet file from this dataset. table should be the table name to create. To specify a path for the parquet file, use the :location option.

Examples:

DB[:t].to_parquet(:p)
# CREATE TABLE `p` STORED AS parquet AS
# SELECT * FROM `t`

DB[:t].to_parquet(:p, :location=>'/a/b')
# CREATE TABLE `p` STORED AS parquet LOCATION '/a/b'
# SELECT * FROM `t`
    # File lib/sequel/adapters/shared/impala.rb
653 def to_parquet(table, options=OPTS)
654   db.create_table(table, options.merge(:as=>self, :stored_as=>:parquet))
655 end
truncate_sql() click to toggle source

Emulate TRUNCATE by using INSERT OVERWRITE selecting all columns from the table, with WHERE false.

    # File lib/sequel/adapters/shared/impala.rb
519 def truncate_sql
520   ds = clone
521   ds.opts.delete(:where)
522   ds.delete_sql
523 end
update(values=OPTS) click to toggle source

UPDATE is emulated on Impala, and returns nil instead of the number of modified rows

Calls superclass method
    # File lib/sequel/adapters/shared/impala.rb
659 def update(values=OPTS)
660   super
661   nil
662 end
update_sql(values) click to toggle source

Emulate UPDATE using INSERT OVERWRITE AS SELECT. For all columns used in the given values, use a CASE statement. In the CASE statement, set the value to the new value if the row matches WHERE conditions of the current dataset, otherwise use the existing value.

    # File lib/sequel/adapters/shared/impala.rb
668 def update_sql(values)
669   sql = "INSERT OVERWRITE "
670   source_list_append(sql, opts[:from])
671   sql << " SELECT "
672   comma = false
673 
674   if where = opts[:where]
675     where = Sequel.lit(literal(where))
676   else
677     where = true
678   end
679 
680   select_all.columns.each do |c|
681     if comma
682       sql <<  comma
683     else
684       comma = ', '
685     end
686 
687     if values.has_key?(c)
688       new_value = values[c]
689       literal_append(sql, Sequel.case({where=>new_value}, c).as(c))
690     else
691       quote_identifier_append(sql, c)
692     end
693   end
694   sql << " FROM "
695   source_list_append(sql, opts[:from])
696   sql
697 end

Private Instance Methods

insert_empty_columns_values() click to toggle source

Impala doesn't handle the DEFAULT keyword used in inserts, as all default values in Impala are NULL, so just use a NULL value.

    # File lib/sequel/adapters/shared/impala.rb
703 def insert_empty_columns_values
704   [[columns.last], [nil]]
705 end
insert_into_sql(sql) click to toggle source
    # File lib/sequel/adapters/shared/impala.rb
715 def insert_into_sql(sql)
716   sql << (@opts[:insert_overwrite] ? ' OVERWRITE ' : ' INTO ')
717   identifier_append(sql, unaliased_identifier(@opts[:from].first))
718 end
literal_false() click to toggle source
    # File lib/sequel/adapters/shared/impala.rb
711 def literal_false
712   BOOL_FALSE
713 end
literal_string_append(sql, s) click to toggle source

Double backslashes in all strings, and escape all apostrophes with backslashes.

    # File lib/sequel/adapters/shared/impala.rb
722 def literal_string_append(sql, s)
723   sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE, STRING_ESCAPE_REPLACE) << APOS
724 end
literal_true() click to toggle source
    # File lib/sequel/adapters/shared/impala.rb
707 def literal_true
708   BOOL_TRUE
709 end
multi_insert_sql_strategy() click to toggle source
    # File lib/sequel/adapters/shared/impala.rb
726 def multi_insert_sql_strategy
727   :values
728 end
quoted_identifier_append(sql, name) click to toggle source

Impala doesn't support esacping of identifiers, so you can't use backtick in an identifier name.

    # File lib/sequel/adapters/shared/impala.rb
732 def quoted_identifier_append(sql, name)
733   sql << BACKTICK << name.to_s << BACKTICK
734 end
select_limit_sql(sql) click to toggle source

Don't include a LIMIT clause if there is no FROM clause. In general, such queries can only return 1 row.

Calls superclass method
    # File lib/sequel/adapters/shared/impala.rb
738 def select_limit_sql(sql)
739   return unless opts[:from]
740   super
741 end
select_values_sql(sql) click to toggle source

Support VALUES clause instead of the SELECT clause to return rows.

    # File lib/sequel/adapters/shared/impala.rb
745 def select_values_sql(sql)
746   sql << SELECT_VALUES
747   expression_list_append(sql, opts[:values])
748 end