class ActiveFacts::Generators::ETL::Unidex

Constants

MM

Public Class Methods

compatibility() click to toggle source
# File lib/activefacts/generator/etl/unidex.rb, line 31
def self.compatibility
  # REVISIT: Remove the dependency on the "persistent" option of the staging compositor.
  [1, %i{relational}]   # one relational composition
end
new(constellation, composition, options = {}) click to toggle source
# File lib/activefacts/generator/etl/unidex.rb, line 36
def initialize constellation, composition, options = {}
  @constellation = constellation
  @composition = composition
  @options = options

  @trait = ActiveFacts::Generators::Traits::SQL
  if @dialect = options.delete("dialect")
    require 'activefacts/generator/traits/sql/'+@dialect
    trait_name = ActiveFacts::Generators::Traits::SQL.constants.detect{|c| c.to_s =~ %r{#{@dialect}}i}
    @trait = @trait.const_get(trait_name)
  end
  self.class.include @trait
  self.class.extend @trait
  extend @trait

  process_options options
end
options() click to toggle source
# File lib/activefacts/generator/etl/unidex.rb, line 19
def self.options
  # REVISIT: There's no way to support SQL dialect options here
  sql_trait = ActiveFacts::Generators::Traits::SQL
  Class.new.extend(sql_trait).  # Anonymous class to enable access to traits module instance methods
  options.
  merge(
    {
      dialect: [String, "SQL Dialect to use"]
    }
  )
end

Public Instance Methods

all_union(unions) click to toggle source
# File lib/activefacts/generator/etl/unidex.rb, line 70
def all_union unions
  return '' if unions.empty?
  create_or_replace("#{schema_name}_unidex", 'VIEW') + " AS\n" +
  unions.compact.map{|s| "SELECT * FROM "+s } *
  "\nUNION ALL " +
  ";\n"
end
field_names() click to toggle source
# File lib/activefacts/generator/etl/unidex.rb, line 315
def field_names
  @field_names ||=
    %w{Value Phonetic Processing SourceTable SourceField LoadBatchID RecordGUID}.
    map{|n| stylise_column_name(n)}
end
generate() click to toggle source
# File lib/activefacts/generator/etl/unidex.rb, line 58
def generate
  @all_table_unions = []
  header +
  @composition.
    all_composite.
    sort_by{|c| c.mapping.name}.
    map{|c| generate_composite c}.
    concat([all_union(@all_table_unions)]).
    compact*"\n" +
  trailer
end
generate_composite(composite) click to toggle source
# File lib/activefacts/generator/etl/unidex.rb, line 82
def generate_composite composite
  return nil if composite.mapping.injection_annotation
  return nil if composite.mapping.object_type.is_static

  trace :unidex, "Generating view for #{table_name(composite)}" do
    union =
    composite.mapping.all_member.to_a.sort_by{|m| m.name}.flat_map do |member|
      next nil if member.injection_annotation
      rank_key = member.rank_key

      case key_type = rank_key[0]
      when MM::Component::RANK_SURROGATE,     # A surrogate key; these do not get indexed
          MM::Component::RANK_DISCRIMINATOR,  # Replacement for exclusive indicators, often subtypes
          MM::Component::RANK_MULTIPLE        # Nested absorption
        trace :unidex, "Ignoring #{MM::DataTypes::TypeNames[key_type]} #{column_name member}"
        next nil
      when MM::Component::RANK_INJECTION      # ValueField (index), ValidFrom (don't) or an Absorption with an injection annotation
        if MM::ValueField === member
          generate_value leaf
        else
          nil
        end
      when MM::Component::RANK_INDICATOR
        generate_indicator member
      else
        raise "Unexpected non-Absorption" unless MM::Absorption === member
        if member.foreign_key
          # Index this record by the natural key of the FK target record, if possible
          generate_joined_value member
        # elsif member.full_absorption  # REVISIT: Anything special to do here?
        else
          (member.all_member.size > 0 ? member.all_leaf : [member]).flat_map do |leaf|
            generate_value leaf
          end
        end
      end
    end.compact * "\nUNION ALL"

    if union.size > 0
      union_name = "#{table_name(composite)}_unidex"
      @all_table_unions << union_name

      "/*\n"+
      " * View to extract unified index values for #{table_name(composite)}\n"+
      " */\n"+
      create_or_replace("#{union_name}", 'VIEW') + " AS" +
      union +
      ";\n"
    else
      ''
    end
  end
end
generate_indicator(leaf) click to toggle source
# File lib/activefacts/generator/etl/unidex.rb, line 140
def generate_indicator leaf
  nil # REVISIT: Do we need anything here?
  # select(leaf.root, safe_column_name(leaf), 1, column_name(leaf))
end
generate_joined_value(member) click to toggle source

This foreign key connects two composites (tables)

# File lib/activefacts/generator/etl/unidex.rb, line 146
def generate_joined_value member
  foreign_key = member.foreign_key
  # REVISIT: Is this restriction even necessary?
  return nil unless foreign_key.composite.mapping.object_type.is_static

  # Index the source table by the natural key of the target, if we can find one
  indices = foreign_key.composite.all_index
  return null if indices.empty?

  search_index_by = {}
  searchable_indices =
    indices.select do |ix|
      next false if !ix.is_unique
      non_fk_components = ix.all_index_field.map(&:component) - foreign_key.all_index_field.map(&:component)
      next unless non_fk_components.size == 1
      component = non_fk_components[0]
      next unless MM::Absorption === component
      value_type = component.object_type
      search_methods = value_type.applicable_parameter_restrictions('Search')
      search_methods.reject!{|vtpr| m = vtpr.value_range.minimum_bound and m.value == 'none'}
      search_methods.map!{|sm| sm.value_range.minimum_bound.value.effective_value}
      if search_methods.empty?
        false
      else
        search_index_by[ix] = search_methods
      end
    end
  return nil if search_index_by.empty?

  search_index_by.flat_map do |search_index, search_methods|
    trace :unidex, "Search #{table_name foreign_key.source_composite} via #{table_name search_index.composite}.#{column_name search_index.all_index_field.to_a[0].component} using #{search_methods.map(&:inspect)*', '}"

    fk_pairs =
          foreign_key.all_foreign_key_field.to_a.
      zip foreign_key.all_index_field.to_a
    leaf = search_index.all_index_field.to_a[0].component         # Returning this natural index value
    source_table = table_name(foreign_key.composite)
    source_field = safe_column_name(member)
    type_name, options = leaf.data_type(data_type_context)        # Which has this type_name
    intrinsic_type = MM::DataType.intrinsic_type(type_name)       # Which corresponds to this intrinsic type

    col_expr = Expression.new(
      %Q{
        (SELECT  #{safe_column_name(leaf)}
         FROM    #{source_table} AS f
         WHERE   #{
          fk_pairs.map do |fkf, ixf|
            "#{table_name foreign_key.source_composite}.#{safe_column_name(fkf.component)} = f.#{safe_column_name(ixf.component)}"
          end*' AND '
         })}.
      gsub(/\s+/,' '),
      intrinsic_type,
      foreign_key.all_foreign_key_field.to_a.all?{|fkf| fkf.component.path_mandatory}
    )
    search_expr foreign_key.source_composite, intrinsic_type, col_expr, search_methods, source_field
  end
end
generate_value(leaf) click to toggle source
# File lib/activefacts/generator/etl/unidex.rb, line 204
def generate_value leaf
  return nil unless leaf.is_a?(MM::Absorption)

  value_type = leaf.object_type
  type_name, options = leaf.data_type(data_type_context)
  length = options[:length]
  value_constraint = options[:value_constraint]

  # Look for instructions on how to index this leaf for search:
  search_methods = value_type.applicable_parameter_restrictions('Search')
  search_methods.reject!{|vtpr| m = vtpr.value_range.minimum_bound and m.value == 'none'}
  return nil if search_methods.empty?
  search_methods.map!{|sm| sm.value_range.minimum_bound.value.effective_value}

  # Convert from the model's data type to a metamodel type, if possible
  intrinsic_type = MM::DataType.intrinsic_type(type_name)
  data_type_name = intrinsic_type ? MM::DataType::TypeNames[intrinsic_type] : type_name
  trace :unidex, "Search #{table_name leaf.root}.#{column_name(leaf)} as #{data_type_name} using #{search_methods.map(&:inspect)*', '}"

  col_expr = Expression.new(safe_column_name(leaf), intrinsic_type, leaf.is_mandatory)
  source_field = safe_column_name(leaf)

  search_expr leaf.root, intrinsic_type, col_expr, search_methods, source_field
end
header() click to toggle source
# File lib/activefacts/generator/etl/unidex.rb, line 78
def header
  schema_prefix
end
phonetic_select(expression, select) click to toggle source
# File lib/activefacts/generator/etl/unidex.rb, line 321
def phonetic_select expression, select
  field_list =
    field_names.
    map do |n|
      if n =~ /Phonetic/i
        phonetics(Expression.new(stylise_column_name('Value'), MM::DataType::TYPE_String, true)).to_s + " AS #{n}"
      else
        n
      end
    end.
    join(",\n\t")

  %Q{
    SELECT DISTINCT
            <FIELDS>
    FROM (<SUB>
    ) AS s}.
  unindent.
  sub(/<FIELDS>/, field_list).
  sub(/<SUB>/, select.gsub(/\n/,"\n\t"))
end
process_options(options) click to toggle source
Calls superclass method
# File lib/activefacts/generator/etl/unidex.rb, line 54
def process_options options
  super
end
search_expr(composite, intrinsic_type, col_expr, search_methods, source_field) click to toggle source
# File lib/activefacts/generator/etl/unidex.rb, line 229
def search_expr composite, intrinsic_type, col_expr, search_methods, source_field
  case intrinsic_type
  when MM::DataType::TYPE_Char,
       MM::DataType::TYPE_String,
       MM::DataType::TYPE_Text
    # Produce a select yielding values for the requested search type
    search_methods.flat_map do |sm|
      case sm
      when 'none'         # Do not index this value
        nil

      when 'simple'       # Disregard white-space only
        select(composite, col_expr, 'simple', source_field)

      when 'alpha'        # Strip white space and punctuation, just use alphabetic characters
        select(composite, as_alpha(col_expr), sm, source_field)

      when 'phonetic'     # Use phonetic matching as well as trigrams and alpha
        select(composite, as_alpha(col_expr), 'phonetic', source_field, phonetics(col_expr))

      when 'words'        # Break the text into words and match each word like alpha
        select(composite, unnest(as_words(col_expr)), sm, source_field)

      when 'names'        # Break the text into words and match each word like phonetic
        value = unnest(as_words(col_expr, "''-"))   # N.B. ' is doubled for SQL
        phonetic_select(value, select(composite, value, 'names', source_field))

      when 'text'         # Index a large text field using significant words and phrases
        nil # REVISIT: Implement this type

      when 'number'       # Cast to number and back to text to canonicalise the value;
        # If it doesn't look like a number, we don't index it.
        value = number_or_null(col_expr)
        select(composite, value, 'number', source_field, nil, ["#{value} IS NOT NULL"])

      when 'phone'        # Phone numbers; split, strip each to digits, take the last 8 of each
        select(composite, phone_numbers(col_expr), 'phone', source_field)

      when 'email'        # Use a regexp to find email addresses in this field
        select(composite, email_addresses(col_expr), 'email', source_field)

      when 'date'         # REVISIT: Convert string to standard date format
        # If it doesn't look like a date, we don't index it.
        value = date_or_null(col_expr)
        select(composite, value, 'date', source_field, nil, ["#{value} IS NOT NULL"])

      else
        $stderrs.puts "Unknown search method #{sm}"
      end
    end

  when MM::DataType::TYPE_Boolean
    nil # REVISIT: Implement this type

  when MM::DataType::TYPE_Integer,
       MM::DataType::TYPE_Real,
       MM::DataType::TYPE_Decimal,
       MM::DataType::TYPE_Money
    select(composite, col_expr, 'simple', source_field)

  when MM::DataType::TYPE_Date
    # Produce an ISO representation that sorts lexically (YYYY-MM-DD)
    # REVISIT: Support search methods here
    select(composite, lexical_date(col_expr), 'date', source_field)

  when MM::DataType::TYPE_DateTime,
       MM::DataType::TYPE_Timestamp
    # Produce an ISO representation that sorts lexically (YYYY-MM-DD HH:mm:ss)
    # REVISIT: Support search methods here
    select(composite, lexical_datetime(col_expr), 'datetime', source_field)

  when MM::DataType::TYPE_Time
    # Produce an ISO representation that sorts lexically (YYYY-MM-DD HH:mm:ss)
    select(composite, lexical_time(col_expr), 'time', source_field)

  when MM::DataType::TYPE_Binary
    nil   # No indexing applied
  when nil   # Data Type is unknown
  else
  end
end
select(composite, expression, processing, source_field, phonetic = nil, conditions = []) click to toggle source
# File lib/activefacts/generator/etl/unidex.rb, line 343
def select composite, expression, processing, source_field, phonetic = nil, conditions = []
  # These fields are in order of index precedence, to co-locate
  # comparable values regardless of source record type or column

  select_list =
    [ expression.to_s,
      phonetic ? phonetic.to_s : 'NULL',
      "'"+processing+"'::text",
      "'"+safe_table_name(composite)+"'::text",
      "'"+source_field+"'::text",
      nil,
      nil,
    ].zip(field_names).
    map(&:compact).
    map{|a| a * ' AS '}.
    join(%q{,
            })
  where =
    if conditions.empty?
      ''
    else
      "\nWHERE\t#{conditions*"\n  AND\t"}"
    end
  select = %Q{
    SELECT DISTINCT
            #{select_list}
    FROM    #{safe_table_name(composite)}}.
    unindent+
    where

end
stylise_column_name(name) click to toggle source
# File lib/activefacts/generator/etl/unidex.rb, line 311
def stylise_column_name name
  name.words.send(@column_case)*@column_joiner
end
trailer() click to toggle source
# File lib/activefacts/generator/etl/unidex.rb, line 136
def trailer
  ''
end