class Riemann::Tools::DockerHealth

Public Class Methods

new() click to toggle source
# File bin/riemann-docker, line 31
def initialize

  if (opts[:docker_host] != nil)
    Docker.url = opts[:docker_host]
  end

  @hostname = opts[:host_hostname]
  if (@hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?)
    @hostname = Socket.gethostname
  end

  @cpu_coefficient = 1000 * 1000 * 1000

  @limits = {
    :cpu => {:critical => opts[:cpu_critical], :warning => opts[:cpu_warning]},
    :disk => {:critical => opts[:disk_critical], :warning => opts[:disk_warning]},
    :memory => {:critical => opts[:memory_critical], :warning => opts[:memory_warning]}
  }

  @last_cpu_reads = Hash.new
  @last_uptime_reads = Hash.new

  opts[:checks].each do |check|
    case check
    when 'disk'
      @disk_enabled = true
    when 'cpu'
      @cpu_enabled = true
    when 'memory'
      @memory_enabled = true
    when 'basic'
      @basic_inspection_enabled = true
    end
  end
end

Public Instance Methods

alert(container, service, state, metric, description) click to toggle source
# File bin/riemann-docker, line 67
def alert(container, service, state, metric, description)

  opts = {  :service => service.to_s,
            :state => state.to_s,
            :metric => metric.to_f,
            :description => description }

  if (container != nil)
    opts[:host] = "#{@hostname}-#{container}"
  else
    opts[:host] = @hostname
  end

  report(opts)
end
basic_inspection(id, name, inspection) click to toggle source
# File bin/riemann-docker, line 142
def basic_inspection(id, name, inspection)

  state = inspection['State']
  json_state = JSON.generate(state)

  running = state['Running']

  alert(name, "status",
        running ? "ok" : "critical",
        running ? 1 : 0,
        json_state)

  if (running)
    start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
    now = DateTime.now.to_time.utc.to_i
    uptime = now - start_time

    if (@last_uptime_reads[id] != nil)
      last = @last_uptime_reads[id]
      restarted = start_time != last
      alert(name, "uptime",
            restarted ? "critical" : "ok",
            uptime,
            "last 'StartedAt' measure was #{last} (#{Time.at(last).utc.to_s}), " +
            "now it's #{start_time} (#{Time.at(start_time).utc.to_s})")
    end

    @last_uptime_reads[id] = start_time
  end
end
cpu(id, name, stats) click to toggle source
# File bin/riemann-docker, line 101
def cpu(id, name, stats)

  current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count

  unless current
    alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
    return false
  end

  current_time = Time.parse(stats['read']);
  if (@last_cpu_reads[id] != nil)
    last = @last_cpu_reads[id]
    used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient

    report_pct name, :cpu, used
  end

  @last_cpu_reads[id] = { v: current, t: current_time }
end
disk() click to toggle source
# File bin/riemann-docker, line 130
def disk
  `df -P`.split(/\n/).each do |r|
    f = r.split(/\s+/)
    next if f[0] == 'Filesystem'
    next unless f[0] =~ /\// # Needs at least one slash in the mount path

    # Calculate capacity
    x = f[4].to_f/100
    report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
  end
end
get_container_name(container) click to toggle source
# File bin/riemann-docker, line 27
def get_container_name(container)
  container.json['Name'][1..-1]
end
get_containers() click to toggle source
# File bin/riemann-docker, line 23
def get_containers
  Docker::Container.all
end
memory(id, name, stats) click to toggle source
# File bin/riemann-docker, line 121
def memory(id, name, stats)
  memory_stats = stats['memory_stats']
  usage = memory_stats['usage'].to_f
  total = memory_stats['limit'].to_f
  fraction = (usage / total)

  report_pct name, :memory, fraction, "#{usage} / #{total}"
end
report_pct(container, service, fraction, report = '', name = nil) click to toggle source
# File bin/riemann-docker, line 83
def report_pct(container, service, fraction, report = '', name = nil)
  if fraction

    if (name == nil)
      name = service
    end

    if fraction > @limits[service][:critical]
      alert container, name, :critical, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
    elsif fraction > @limits[service][:warning]
      alert container, name, :warning, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
    else
      alert container, name, :ok, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
    end
  end
end
tick() click to toggle source
# File bin/riemann-docker, line 173
def tick

  # Disk is the same in every container
  if @disk_enabled
    disk()
  end

  # Get CPU, Memory and Load of each container
  containers = get_containers()
  threads = []

  containers.each do |ctr|
    threads << Thread.new(ctr) do |container|

      id = container.id
      name = get_container_name(container)

      stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", {stream:false}))

      if @basic_inspection_enabled
        inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
         basic_inspection(id, name, inspection)
        end
        if @cpu_enabled
          cpu(id, name, stats)
      end
      if @memory_enabled
        memory(id, name, stats)
      end
    end
  end

  threads.each do |thread|
    begin
      thread.join
    rescue => e
      $stderr.puts "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
    end
  end
end