class Smith::AgentMonitoring

Public Class Methods

new(agent_processes) click to toggle source
# File lib/smith/agent_monitoring.rb, line 6
def initialize(agent_processes)
  @agent_processes = agent_processes
end

Public Instance Methods

start_monitoring() click to toggle source
# File lib/smith/agent_monitoring.rb, line 10
def start_monitoring
  EventMachine::add_periodic_timer(1) do
    @agent_processes.each do |agent_process|
      if agent_process.monitor
        logger.verbose { "Agent state for #{agent_process.name}: #{agent_process.state}" }
        case agent_process.state
        when 'running'
          if agent_process.last_keep_alive
            if agent_process.last_keep_alive > agent_process.started_at
              if (Time.now.to_i - agent_process.last_keep_alive) > 10
                logger.fatal { "Agent not responding: #{agent_process.name}" }
                agent_process.no_process_running
              end
            else
              logger.warn { "Discarding keepalives with timestamp before agent started: #{Time.at(agent_process.started_at)} > #{Time.at(agent_process.last_keep_alive)}" }
            end
          end
        when 'starting'
          if (Time.now.to_i - agent_process.started_at) > 10
            logger.error { "No response from agent for > 10 seconds. Agent probably didn't start" }
            agent_process.not_responding
          else
            logger.debug { "no keep alive from #{agent_process.name}" }
          end
        when 'stopping'
          logger.info { "Agent is shutting down: #{agent_process.name}" }
        when 'dead'
          logger.info { "Restarting dead agent: #{agent_process.name}" }
          Messaging::Sender.new(QueueDefinitions::Agency_control.call) do |sender|
            sender.on_reply { |p, r| logger.debug { "Agent restart message acknowledged: #{agent_process.name}" } }
            sender.publish(ACL::AgencyCommand.new(:command => 'start', :args => [agent_process.name]))
          end
        when 'unknown'
          logger.info { "Agent is in an unknown state: #{agent_process.name}" }
        end
      end
    end
  end
end