module Sequel::Impala::DatasetMethods

Constants

APOS
BACKTICK
BOOL_FALSE
BOOL_TRUE
CONSTANT_LITERAL_MAP
EXCEPT_SOURCE_COLUMN
EXCEPT_STRATEGIES
NOT
PAREN_CLOSE
PAREN_OPEN
REGEXP
SELECT_VALUES
SPACE
STRING_ESCAPE_RE
STRING_ESCAPE_REPLACE

Public Instance Methods

complex_expression_sql_append(sql, op, args) click to toggle source

Handle string concatenation using the concat string function. Don't use the ESCAPE syntax when using LIKE/NOT LIKE, as Impala doesn't support escaping LIKE metacharacters. Support regexps on Impala using the REGEXP operator. For cast insensitive regexps, cast both values to uppercase first.

Calls superclass method
    # File lib/sequel/adapters/shared/impala.rb
448 def complex_expression_sql_append(sql, op, args)
449   case op
450   when :'||'
451     literal_append(sql, Sequel.function(:concat, *args))
452   when :LIKE, :'NOT LIKE'
453     sql << PAREN_OPEN
454     literal_append(sql, args.at(0))
455     sql << SPACE << op.to_s << SPACE
456     literal_append(sql, args.at(1))
457     sql << PAREN_CLOSE
458   when :~, :'!~', :'~*', :'!~*'
459     if op == :'~*'  || op == :'!~*'
460       args = args.map{|a| Sequel.function(:upper, a)}
461     end
462     sql << NOT if op == :'!~'  || op == :'!~*'
463     sql << PAREN_OPEN
464     literal_append(sql, args.at(0))
465     sql << REGEXP
466     literal_append(sql, args.at(1))
467     sql << PAREN_CLOSE
468   else
469     super
470   end
471 end
constant_sql_append(sql, constant) click to toggle source

Use now() for current timestamp, as Impala doesn't support CURRENT_TIMESTAMP.

    # File lib/sequel/adapters/shared/impala.rb
475 def constant_sql_append(sql, constant)
476   sql << CONSTANT_LITERAL_MAP.fetch(constant, constant.to_s)
477 end
date_add_sql_append(sql, da) click to toggle source

Use the addition operator combined with interval types to handle date arithmetic when using the date_arithmetic extension.

    # File lib/sequel/adapters/shared/impala.rb
482 def date_add_sql_append(sql, da)
483   h = da.interval
484   expr = da.expr
485   intervals = []
486   each_valid_interval_unit(h, Sequel::SQL::DateAdd::DatasetMethods::DEF_DURATION_UNITS) do |value, sql_unit|
487     intervals << Sequel.lit("INTERVAL #{value} #{sql_unit}")
488   end
489   if intervals.empty?
490     return literal_append(sql, Sequel.cast(expr, Time))
491   else
492     intervals.unshift(Sequel.cast(expr, Time))
493     return complex_expression_sql_append(sql, :+, intervals)
494   end
495 end
delete() click to toggle source

DELETE is emulated on Impala and doesn't return the number of modified rows.

Calls superclass method
    # File lib/sequel/adapters/shared/impala.rb
499 def delete
500   super
501   nil
502 end
delete_sql() click to toggle source

Emulate DELETE using INSERT OVERWRITE selecting all columns from the table, with a reversed condition used for WHERE.

    # File lib/sequel/adapters/shared/impala.rb
506 def delete_sql
507   return @opts[:prepared_sql] if @opts[:prepared_sql]
508   sql = @opts[:append_sql] || sql_string_origin
509   sql << "INSERT OVERWRITE "
510   source_list_append(sql, opts[:from])
511   sql << " SELECT * FROM "
512   source_list_append(sql, opts[:from])
513   if where = opts[:where]
514     sql << " WHERE NOT ("
515     literal_append(sql, where)
516     sql << ")"
517   else
518     sql << " WHERE false"
519   end
520   sql
521 end
empty?() click to toggle source

Don't remove an order, because that breaks things when offsets are used, as Impala requires an order when using an offset.

    # File lib/sequel/adapters/shared/impala.rb
542 def empty?
543   get(Sequel::SQL::AliasedExpression.new(1, :one)).nil?
544 end
except(other, opts=OPTS) click to toggle source

Emulate EXCEPT using a chosen strategy and checking for values in only the first table.

    # File lib/sequel/adapters/shared/impala.rb
547 def except(other, opts=OPTS)
548   raise(InvalidOperation, "EXCEPT ALL not supported") if opts[:all]
549   raise(InvalidOperation, "The :from_self=>false option to except is not supported") if opts[:from_self] == false
550 
551   strategy, *keys = @opts[:except_strategy]
552   ds = from_self(:alias=>:t1)
553 
554   ds = case strategy
555   when :not_exists
556     ds.exclude(other.
557         from_self(:alias=>:t2).
558         where(keys.map{|key| [Sequel.qualify(:t1, key), Sequel.qualify(:t2, key)]}).
559         select(nil).
560         exists)
561   when :not_in
562     raise Sequel::Error, ":not_in EXCEPT strategy only supports a single key" unless keys.length == 1
563     key = keys.first
564     ds.exclude(Sequel.qualify(:t1, key)=>other.from_self(:alias=>:t2).select(key))
565   when :left_join
566     ds.left_join(other.from_self(:alias=>:t2).as(:t2), keys.map{|key| [key, key]}).
567       where(Sequel.or(keys.map{|key| [Sequel.qualify(:t2, key), nil]})).
568       select_all(:t1)
569   else
570     cols = columns
571     rhs = other.from_self.select_group(*other.columns).select_append(Sequel.expr(2).as(EXCEPT_SOURCE_COLUMN))
572     ds.select_group(*cols).
573       select_append(Sequel.expr(1).as(EXCEPT_SOURCE_COLUMN)).
574       union(rhs, all: true).
575       select_group(*cols).
576       having{{count.function.* => 1, min(EXCEPT_SOURCE_COLUMN) => 1}}
577   end
578 
579   ds.from_self(opts)
580 end
except_strategy(strategy, *keys) click to toggle source

The strategy to use for EXCEPT emulation. By default, uses a GROUP BY emulation, as that doesn't require you provide a key column, but you can use this to choose a NOT EXISTS, NOT IN, or LEFT JOIN emulation, providing the unique key column.

    # File lib/sequel/adapters/shared/impala.rb
585 def except_strategy(strategy, *keys)
586   raise Sequel::Error, "invalid EXCEPT strategy: #{strategy.inspect}" unless EXCEPT_STRATEGIES.include?(strategy)
587   clone(:except_strategy=>[strategy, *keys])
588 end
from(*) click to toggle source

Implicitly qualify tables if using the :search_path database option.

Calls superclass method
    # File lib/sequel/adapters/shared/impala.rb
524 def from(*)
525   ds = super
526   ds.clone(:from => ds.opts[:from].map{|t| db.implicit_qualify(t)})
527 end
insert_overwrite() click to toggle source

Use INSERT OVERWRITE instead of INSERT INTO when inserting into this dataset:

DB[:table].insert_overwrite.insert(DB[:other])
# INSERT OVERWRITE table SELECT * FROM other
    # File lib/sequel/adapters/shared/impala.rb
594 def insert_overwrite
595   clone(:insert_overwrite=>true)
596 end
insert_supports_empty_values?() click to toggle source

Impala does not support INSERT DEFAULT VALUES.

    # File lib/sequel/adapters/shared/impala.rb
599 def insert_supports_empty_values?
600   false
601 end
intersect(other, opts=OPTS) click to toggle source

Emulate INTERSECT using a join and checking for values in both tables.

    # File lib/sequel/adapters/shared/impala.rb
604 def intersect(other, opts=OPTS)
605   raise(InvalidOperation, "INTERSECT ALL not supported") if opts[:all]
606   raise(InvalidOperation, "The :from_self=>false option to intersect is not supported") if opts[:from_self] == false
607   raise(Error, "Attempt to INTERSECT on dataset with no columns: #{inspect}") if columns.empty?
608   raise(Error, "Attempt to INTERSECT other dataset with no columns: #{other.inspect}") if other.columns.empty?
609 
610   cols = columns.zip(other.columns)
611   from_self(alias: :l)
612     .join(other){|lj, j, _| Sequel.&(*cols.map{|c1,c2| Sequel.expr(Sequel.qualify(lj, c2)=>Sequel.qualify(j, c1)) | {Sequel.qualify(lj, c2)=>nil, Sequel.qualify(j, c1)=>nil}})}
613     .select_all(:l)
614     .distinct
615     .from_self(opts)
616 end
join_table(type, table, expr=nil, options=OPTS, &block) click to toggle source

Implicitly qualify tables if using the :search_path database option.

Calls superclass method
    # File lib/sequel/adapters/shared/impala.rb
530 def join_table(type, table, expr=nil, options=OPTS, &block)
531   super(type, db.implicit_qualify(table), expr, options, &block)
532 end
supports_cte?(type=:select) click to toggle source

Impala supports non-recursive common table expressions.

    # File lib/sequel/adapters/shared/impala.rb
619 def supports_cte?(type=:select)
620   true
621 end
supports_cte_in_subqueries?() click to toggle source
    # File lib/sequel/adapters/shared/impala.rb
623 def supports_cte_in_subqueries?
624   true
625 end
supports_derived_column_lists?() click to toggle source

Impala doesn't support derived column lists when aliasing tables.

    # File lib/sequel/adapters/shared/impala.rb
629 def supports_derived_column_lists?
630   false
631 end
supports_intersect_except_all?() click to toggle source

Impala doesn't support EXCEPT or INTERSECT, but support is emulated for them. However, EXCEPT ALL and INTERSECT ALL are not emulated.

    # File lib/sequel/adapters/shared/impala.rb
635 def supports_intersect_except_all?
636   false
637 end
supports_is_true?() click to toggle source

Impala only support IS NULL, not IS TRUE or IS FALSE.

    # File lib/sequel/adapters/shared/impala.rb
640 def supports_is_true?
641   false
642 end
supports_multiple_column_in?() click to toggle source

Impala doesn't support IN when used with multiple columns.

    # File lib/sequel/adapters/shared/impala.rb
645 def supports_multiple_column_in?
646   false
647 end
supports_regexp?() click to toggle source

Impala supports regexps using the REGEXP operator.

    # File lib/sequel/adapters/shared/impala.rb
650 def supports_regexp?
651   true
652 end
supports_window_functions?() click to toggle source

Impala supports window functions.

    # File lib/sequel/adapters/shared/impala.rb
655 def supports_window_functions?
656   true
657 end
to_parquet(table, options=OPTS) click to toggle source

Create a parquet file from this dataset. table should be the table name to create. To specify a path for the parquet file, use the :location option.

Examples:

DB[:t].to_parquet(:p)
# CREATE TABLE `p` STORED AS parquet AS
# SELECT * FROM `t`

DB[:t].to_parquet(:p, :location=>'/a/b')
# CREATE TABLE `p` STORED AS parquet LOCATION '/a/b'
# SELECT * FROM `t`
    # File lib/sequel/adapters/shared/impala.rb
672 def to_parquet(table, options=OPTS)
673   db.create_table(table, options.merge(:as=>self, :stored_as=>:parquet))
674 end
truncate_sql() click to toggle source

Emulate TRUNCATE by using INSERT OVERWRITE selecting all columns from the table, with WHERE false.

    # File lib/sequel/adapters/shared/impala.rb
536 def truncate_sql
537   unfiltered.delete_sql
538 end
update(values=OPTS) click to toggle source

UPDATE is emulated on Impala, and returns nil instead of the number of modified rows

Calls superclass method
    # File lib/sequel/adapters/shared/impala.rb
678 def update(values=OPTS)
679   super
680   nil
681 end
update_sql(values) click to toggle source

Emulate UPDATE using INSERT OVERWRITE AS SELECT. For all columns used in the given values, use a CASE statement. In the CASE statement, set the value to the new value if the row matches WHERE conditions of the current dataset, otherwise use the existing value.

    # File lib/sequel/adapters/shared/impala.rb
687 def update_sql(values)
688   sql = String.new
689   sql << "INSERT OVERWRITE "
690   source_list_append(sql, opts[:from])
691   sql << " SELECT "
692   comma = false
693 
694   if where = opts[:where]
695     where = Sequel.lit(literal(where))
696   else
697     where = true
698   end
699 
700   select_all.columns.each do |c|
701     if comma
702       sql <<  comma
703     else
704       comma = ', '
705     end
706 
707     if values.has_key?(c)
708       new_value = values[c]
709       literal_append(sql, Sequel.case({where=>new_value}, c).as(c))
710     else
711       quote_identifier_append(sql, c)
712     end
713   end
714   sql << " FROM "
715   source_list_append(sql, opts[:from])
716   sql
717 end
with(name, dataset, opts={}) click to toggle source
Calls superclass method
    # File lib/sequel/adapters/shared/impala.rb
719 def with(name, dataset, opts={})
720   if has_cte?(dataset)
721     s, ds = hoist_cte(dataset)
722     s.with(name, ds, opts)
723   else
724     super
725   end
726 end
with_recursive(name, nonrecursive, recursive, opts={}) click to toggle source
Calls superclass method
    # File lib/sequel/adapters/shared/impala.rb
728 def with_recursive(name, nonrecursive, recursive, opts={})
729   if has_cte?(nonrecursive)
730     s, ds = hoist_cte(nonrecursive)
731     s.with_recursive(name, ds, recursive, opts)
732   elsif has_cte?(recursive)
733     s, ds = hoist_cte(recursive)
734     s.with_recursive(name, nonrecursive, ds, opts)
735   else
736     super
737   end
738 end

Protected Instance Methods

compound_clone(type, dataset, opts) click to toggle source

Add the dataset to the list of compounds

Calls superclass method
    # File lib/sequel/adapters/shared/impala.rb
743 def compound_clone(type, dataset, opts)
744   if has_cte?(dataset)
745     s, ds = hoist_cte(dataset)
746     s.compound_clone(type, ds, opts)
747   else
748     super
749   end
750 end

Private Instance Methods

has_cte?(ds) click to toggle source
    # File lib/sequel/adapters/shared/impala.rb
754 def has_cte?(ds)
755   ds.is_a?(Dataset) && ds.opts[:with]
756 end
insert_empty_columns_values() click to toggle source

Impala doesn't handle the DEFAULT keyword used in inserts, as all default values in Impala are NULL, so just use a NULL value.

    # File lib/sequel/adapters/shared/impala.rb
760 def insert_empty_columns_values
761   [[columns.last], [nil]]
762 end
insert_into_sql(sql) click to toggle source
    # File lib/sequel/adapters/shared/impala.rb
772 def insert_into_sql(sql)
773   sql << (@opts[:insert_overwrite] ? ' OVERWRITE ' : ' INTO ')
774   identifier_append(sql, unaliased_identifier(@opts[:from].first))
775 end
literal_false() click to toggle source
    # File lib/sequel/adapters/shared/impala.rb
768 def literal_false
769   BOOL_FALSE
770 end
literal_string_append(sql, s) click to toggle source

Double backslashes in all strings, and escape all apostrophes with backslashes.

    # File lib/sequel/adapters/shared/impala.rb
779 def literal_string_append(sql, s)
780   sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE, STRING_ESCAPE_REPLACE) << APOS
781 end
literal_true() click to toggle source
    # File lib/sequel/adapters/shared/impala.rb
764 def literal_true
765   BOOL_TRUE
766 end
multi_insert_sql_strategy() click to toggle source
    # File lib/sequel/adapters/shared/impala.rb
783 def multi_insert_sql_strategy
784   :values
785 end
quoted_identifier_append(sql, name) click to toggle source

Impala doesn't support esacping of identifiers, so you can't use backtick in an identifier name.

    # File lib/sequel/adapters/shared/impala.rb
789 def quoted_identifier_append(sql, name)
790   sql << BACKTICK << name.to_s << BACKTICK
791 end
select_limit_sql(sql) click to toggle source

Don't include a LIMIT clause if there is no FROM clause. In general, such queries can only return 1 row.

Calls superclass method
    # File lib/sequel/adapters/shared/impala.rb
795 def select_limit_sql(sql)
796   return unless opts[:from]
797   super
798 end
select_values_sql(sql) click to toggle source

Support VALUES clause instead of the SELECT clause to return rows.

    # File lib/sequel/adapters/shared/impala.rb
802 def select_values_sql(sql)
803   sql << SELECT_VALUES
804   expression_list_append(sql, opts[:values])
805 end