class DataMiner::Script

The container that holds each step in the script.

Constants

STACK_THREAD_VAR
UNIQ_THREAD_VAR

Attributes

model[R]

@private

steps[R]

The steps in the script. @return [Array<DataMiner::Step>]

Public Class Methods

current_stack() click to toggle source

@private

# File lib/data_miner/script.rb, line 18
def current_stack
  ::Thread.current[STACK_THREAD_VAR] ||= []
end
current_stack=(stack) click to toggle source

@private

# File lib/data_miner/script.rb, line 23
def current_stack=(stack)
  ::Thread.current[STACK_THREAD_VAR] = stack
end
current_uniq() click to toggle source

@private

# File lib/data_miner/script.rb, line 28
def current_uniq
  ::Thread.current[UNIQ_THREAD_VAR]
end
current_uniq=(uniq) click to toggle source

@private

# File lib/data_miner/script.rb, line 33
def current_uniq=(uniq)
  ::Thread.current[UNIQ_THREAD_VAR] = uniq
end
new(model) click to toggle source

@private

# File lib/data_miner/script.rb, line 49
def initialize(model)
  DataMiner.model_names.add model.name
  @model = model
  @steps = []
end
uniq() { || ... } click to toggle source

@private activerecord-3.2.3/lib/active_record/scoping.rb

# File lib/data_miner/script.rb, line 7
def uniq
  previous_uniq = current_uniq
  Script.current_uniq = true
  begin
    yield
  ensure
    Script.current_uniq = previous_uniq
  end
end

Public Instance Methods

append(*args, &blk) click to toggle source

Append a step to a script. Mostly for internal use.

@return [nil]

# File lib/data_miner/script.rb, line 225
def append(*args, &blk)
  steps << make(*args, &blk)
  nil
end
append_block(blk) click to toggle source

@private

# File lib/data_miner/script.rb, line 56
def append_block(blk)
  instance_eval(&blk)
end
append_once(*args, &blk) click to toggle source

Append a step to a script unless it’s already there. Mostly for internal use.

@return [nil]

# File lib/data_miner/script.rb, line 214
def append_once(*args, &blk)
  step = make(*args, &blk)
  unless steps.include? step
    steps << step
  end
  nil
end
import(description, settings, &blk) click to toggle source

Import rows into your model.

As long as…

  1. you key on the primary key, or

  2. the table has an auto-increment primary key, or

  3. you DON’T enable :validate

… then things will be sped up using the {github.com/seamusabshere/upsert upsert library} in streaming mode.

Otherwise, native ActiveRecord constuctors and validations will be used.

@see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models. @see DataMiner::Step::Import The actual Import class.

@param [String] description A description of the data source. @param [Hash] settings Settings, including URL of the data source, that are used to download/parse (using RemoteTable) and (sometimes) correct (using Errata) the data. @option settings [String] :url The URL of the data source. Passed directly to RemoteTable.new. @option settings [Hash] :errata The :responder and :url settings that will be passed to Errata.new. @option settings [TrueClass,FalseClass] :validate Whether to always run ActiveRecord validations. @option settings [*] anything Any other setting will be passed to RemoteTable.new.

@yield [] A block defining how to key the import (to make it idempotent) and which columns to store.

@note Be sure to check out github.com/seamusabshere/remote_table and github.com/seamusabshere/errata for available settings. @note There are hundreds of import examples in github.com/brighterplanet/earth. The {file:README.markdown README} points to a few (at the bottom.) @note We often use string primary keys to make idempotency easier. github.com/seamusabshere/active_record_inline_schema supports defining these inline. @note Enabling :validate may slow down importing large files because it precludes bulk loading using github.com/seamusabshere/upsert.

@example From the README

data_miner do
  [...]
  import("OpenGeoCode.org's Country Codes to Country Names list",
         :url => 'http://opengeocode.org/download/countrynames.txt',
         :format => :delimited,
         :delimiter => '; ',
         :headers => false,
         :skip => 22) do
    key   :iso_3166_code, :field_number => 0
    store :iso_3166_alpha_3_code, :field_number => 1
    store :iso_3166_numeric_code, :field_number => 2
    store :name, :field_number => 5
  end
  [...]
end

@return [nil]

# File lib/data_miner/script.rb, line 170
def import(description, settings, &blk)
  append(:import, description, settings, &blk)
end
prepend(*args, &blk) click to toggle source

Prepend a step to a script. Mostly for internal use.

@return [nil]

# File lib/data_miner/script.rb, line 206
def prepend(*args, &blk)
  steps.unshift make(*args, &blk)
  nil
end
prepend_once(*args, &blk) click to toggle source

Prepend a step to a script unless it’s already there. Mostly for internal use.

@return [nil]

# File lib/data_miner/script.rb, line 195
def prepend_once(*args, &blk)
  step = make(*args, &blk)
  unless steps.include? step
    steps.unshift step
  end
  nil
end
process(method_id_or_description, &blk) click to toggle source

Identify a single method or a define block of arbitrary code to be executed.

@see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models. @see DataMiner::Step::Process The actual Process class.

@overload process(method_id)

Run a class method on the model.
@param [Symbol] method_id The class method to be run on the model.

@overload process(description, &blk)

Run a block of code.
@param [String] description A description of what the block does.
@yield [] The block to be evaluated in the context of the model (it's instance_eval'ed on the model class)

@example Single class method

data_miner do
  [...]
  process :update_averages!
  [...]
end

@example Arbitrary code

data_miner do
  [...]
  process "do some arbitrary stuff" do
    [...]
  end
  [...]
end

@return [nil]

# File lib/data_miner/script.rb, line 91
def process(method_id_or_description, &blk)
  append(:process, method_id_or_description, &blk)
end
sql(description, url_or_statement) click to toggle source

Execute SQL, provided either as a string or a URL.

@see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models. @see DataMiner::Step::Sql The actual Sql class.

@note url_or_statement is auto-detected by looking for +%r{^[^s]/[^]}+ (non-spaces followed by a slash followed by non-asterisk). Therefore if you’re passing a local file path and want it to be treated like a URL, make it absolute.

@param [String] description What this step does. @param [String] url_or_statement SQL statement as a String or location of the SQL file as a URL.

@example Rapidly get a list of countries from Brighter Planet’s Reference Data web service

data_miner do
  sql "Brighter Planet's countries", 'http://data.brighterplanet.com/countries.sql'
end
# File lib/data_miner/script.rb, line 188
def sql(description, url_or_statement)
  append(:sql, description, url_or_statement)
end
start() click to toggle source

Run the script for this model. Mostly for internal use.

@note Normally you should use Country.run_data_miner! @note A primitive “call stack” is kept that will prevent infinite loops. So, if Country’s data miner script calls Province’s AND vice-versa, each one will only be run once.

@return nil

# File lib/data_miner/script.rb, line 236
def start
  model_name = model.name
  # $stderr.write "0 - #{model_name}\n"
  # $stderr.write "A - current_uniq - #{Script.current_uniq ? 'true' : 'false'}\n"
  # $stderr.write "B - #{Script.current_stack.join(',')}\n"
  if Script.current_uniq and Script.current_stack.include?(model_name)
    # we've already done this in the current stack, so skip it
    return
  end
  if not Script.current_uniq
    # since we're not trying to uniq, ignore the current contents of the stack
    Script.current_stack.clear
  end
  Script.current_stack << model_name
  steps.each do |step|
    steps.each do |other|
      other.register step
    end
  end
  steps.each_with_index do |step, i|
    begin
      DataMiner.logger.info "[DataMiner] START #{step.model.name} step #{i} #{step.description.inspect}"
      step.start
      model.reset_column_information
    rescue
      DataMiner.logger.info "[DataMiner] FAIL #{step.model.name} step #{i} (#{step.description.inspect})"
      raise $!
    end
    DataMiner.logger.info "[DataMiner] DONE #{step.model.name} step #{i} (#{step.description.inspect})"
  end
  nil
end
test(description, settings = {}, &blk) click to toggle source

A step that runs tests and stops the data miner on failures.

rspec-expectations are automatically included.

@see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models. @see DataMiner::Step::Test The actual Test class.

@param [String] description A description of what the block does. @param [Hash] settings Settings @option settings [String] :after After how many rows of the previous step to run the tests. @yield [] Tests to be run

@example Tests

data_miner do
  [...]
  test "make sure something works" do
    expect(Pet.count).to be > 10
  end
  [...]
  test "make sure something works", after: 20 do
    [...]
  end
  [...]
end

@return [nil]

# File lib/data_miner/script.rb, line 121
def test(description, settings = {}, &blk)
  append(:test, description, settings, &blk)
end

Private Instance Methods

make(*args, &blk) click to toggle source

return [DataMiner::Step]

# File lib/data_miner/script.rb, line 272
def make(*args, &blk)
  klass = Step.const_get(args.shift.to_s.camelcase)
  options = args.extract_options!
  if args.empty?
    args = ["#{klass.name.demodulize} step with no description"]
  end
  initializer = [self] + args + [options]
  if block_given?
    klass.new(*initializer, &blk)
  else
    klass.new(*initializer)
  end
end