class Chimp::Executor

Constants

STATUS_DONE
STATUS_ERROR
STATUS_HOLDING
STATUS_NONE
STATUS_RETRYING
STATUS_RUNNING

Attributes

array[RW]
concurrency[RW]
delay[RW]
dry_run[RW]
error[RW]
exec[RW]
group[RW]
inputs[RW]
job_id[RW]
job_notes[RW]
job_uuid[RW]
owner[RW]
quiet[RW]
results[R]
retry_count[RW]
retry_sleep[RW]
server[RW]
status[RW]
template[RW]
time_end[RW]
time_start[RW]
timeout[RW]
verbose[RW]

Public Class Methods

new(h={}) click to toggle source
# File lib/right_chimp/exec/executor.rb, line 20
def initialize(h={})
  @server = h[:server]            || nil
  @array = h[:array]              || nil
  @template = h[:template]        || nil

  @job_id = h[:job_id]            || nil
  @job_uuid = h[:job_uuid]        || nil
  @job_notes = h[:job_notes]      || nil

  @group = h[:group]              || nil
  @exec = h[:exec]                || nil
  @inputs = h[:inputs]            || nil

  @verbose = h[:verbose]          || false

  @retry_count = h[:retry_count].to_i || 0
  @retry_sleep = h[:retry_sleep].to_i || 30
  @timeout = h[:timeout].to_i         || 3600

  @delay = h[:delay].to_i || 0
  @concurrency = h[:concurrency].to_i

  @error = nil
  @status = STATUS_NONE
  @owner = nil
  @dry_run = false
  @quiet = false

  @time_start = nil
  @time_end = nil
end

Public Instance Methods

cancel() click to toggle source

Convenience method to cancel

# File lib/right_chimp/exec/executor.rb, line 82
def cancel
  @group.cancel(self.job_id)
end
get_total_exec_time() click to toggle source

Return total execution time (real) of a job

# File lib/right_chimp/exec/executor.rb, line 55
def get_total_exec_time
  if @time_start == nil
    return 0
  elsif @time_end == nil
    return Time.now.to_i - @time_start.to_i
  else
    return @time_end.to_i- @time_start.to_i
  end
end
info() click to toggle source

return info on what this executor does – eg name of script or command

# File lib/right_chimp/exec/executor.rb, line 93
def info
  raise "unimplemented"
end
queue() click to toggle source

Convenience method to queue a held job

# File lib/right_chimp/exec/executor.rb, line 68
def queue
  @group.queue(self.job_id)
end
requeue() click to toggle source

Convenience method to requeue

# File lib/right_chimp/exec/executor.rb, line 75
def requeue
  @group.requeue(self.job_id)
end
run() click to toggle source
# File lib/right_chimp/exec/executor.rb, line 86
def run
  raise "run method must be overridden"
end
target() click to toggle source
# File lib/right_chimp/exec/executor.rb, line 97
def target
  return "UNKNOWN"
end

Protected Instance Methods

describe_work() click to toggle source

This method should be overridden on Executor subclasses to provide a human readable description of the work being performed.

# File lib/right_chimp/exec/executor.rb, line 186
def describe_work
  return "#{self.class.name} job_id=#{@job_id}"
end
describe_work_done() click to toggle source
# File lib/right_chimp/exec/executor.rb, line 194
def describe_work_done
  return("#{self.describe_work} status=END time=#{@time_end.to_i-@time_start.to_i}s")
end
describe_work_done_long() click to toggle source
# File lib/right_chimp/exec/executor.rb, line 198
def describe_work_done_long
  return("#{self.describe_work} status=END time_start=#{@time_start.to_i} time_end=#{@time_end.to_i} time_total=#{@time_end.to_i-@time_start.to_i}")
end
describe_work_error() click to toggle source
# File lib/right_chimp/exec/executor.rb, line 202
def describe_work_error
  return("#{self.describe_work} status=ERROR")
end
describe_work_start() click to toggle source
# File lib/right_chimp/exec/executor.rb, line 190
def describe_work_start
  return("#{self.describe_work} status=START")
end
run_with_retry() { || ... } click to toggle source

Run a unit of work with retries This is called from the subclass with a code block to yield to

# File lib/right_chimp/exec/executor.rb, line 108
def run_with_retry(&block)
  Log.debug "Running job '#{@job_id}' with status '#{@status}'"
  # If we are not the first job in this group, wait @delay
  ChimpDaemon.instance.semaphore.synchronize do
    if @group.started >= @concurrency && @delay.nonzero?
      Log.info "[#{@job_uuid}] Sleeping #{@delay} seconds between tasks"
      sleep @delay
    end
    @group.started += 1
  end

  @status = STATUS_RUNNING
  @time_start = Time.now

  Log.info self.describe_work_start unless @quiet

  #
  # The inner level of exception handling here tries to catch anything
  # that can be easily retired or failed-- normal exceptions.
  #
  # The outer level of exception handling handles weird stuff; for example,
  # sometimes rest_connection raises RuntimeError exceptions...
  #
  # This fixes acu75562.
  #
  begin
    begin
      yield if not @dry_run

      if @owner != nil
        @status = STATUS_DONE
        @group.job_completed
      else
        Log.warn "[#{@job_uuid}][#{@job_id}] Ownership of job_id #{job_id} lost. User cancelled operation?"
      end

    rescue SystemExit, Interrupt => ex
      $stderr.puts 'Exiting!'
      raise ex

    rescue Interrupt => ex
      name = @array['name'] if @array
      name = @server['name'] || @server['nickname'] if @server
      Log.error self.describe_work_error

      if @retry_count > 0
        @status = STATUS_RETRYING
        Log.error "[#{@job_uuid}][#{@job_id}] Error executing on \"#{name}\". Retrying in #{@retry_sleep} seconds..."
        @retry_count -= 1
        sleep @retry_sleep
        retry
      end

      @status = STATUS_ERROR
      @error = ex
      Log.error "[#{@job_uuid}][#{@job_id}] Error executing on \"#{name}\": #{ex}"

    ensure
      @time_end = Time.now
      Log.info self.describe_work_done unless @quiet
    end

  rescue RuntimeError => ex
    err = ex.message + "IP: #{@server.params["ip_address"]}\n" if @server.params['ip_address']
    err += " Group: #{@group.group_id}\n" if @group.group_id
    err += " Notes: #{@job_notes}\n" if @job_notes
    err += " Notes: #{@job_notes}\n" if @job_notes
    Log.error "[#{@job_uuid}][#{@job_id}] Caught RuntimeError: #{err} Job failed.\n"
    @status = STATUS_ERROR
    @error = ex
  end
end