class Object

Public Instance Methods

cpu() click to toggle source
# File lib/remon/checks/redis.rb, line 8
     def cpu
  metric = rand(1..100)
  s = state(metric, warn: o[:cpu_warn], critical: o[:cpu_critical])
  event "cpu", state, metric, description
  event ({
    service: "cpu",
    description: "cpu",
    metric: metric,
    state: s 
  })
end
cpu_and_iowait() click to toggle source
# File lib/remon/checks/system.rb, line 26
def cpu_and_iowait
  old_cpu = @old_cpu
  new_cpu = @sys.cpu_stat

  if not new_cpu
    return e 'cpu', :unknown, nil, "/proc/stat doesn't include a CPU line"
  end
  @old_cpu = new_cpu
  return nil if not old_cpu
  used, iowait = @sys.cpu_usage(old_cpu, new_cpu)
  [cpu_event(used), iowait_event(iowait)]
end
cpu_event(metric) click to toggle source
# File lib/remon/checks/system.rb, line 80
def cpu_event(metric)
  description = "#{(metric * 100).round(2)}% user+nice+system\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
  event service: "cpu",
        description: description,
        metric: metric,
        state: service_state("cpu", metric * 100)
end
description(failed_nodes) click to toggle source
# File lib/remon/checks/consul.rb, line 26
def description(failed_nodes)
  n = failed_nodes.size
  s = "#{n} failed nodes"
  if n > 0
    d = failed_nodes.map {|k,v| "#{k}: #{v}"}.join("\n")
    "#{s}\n#{d}"
  else
    s
  end
end
disk() click to toggle source
# File lib/remon/checks/disk.rb, line 19
def disk
  disks = @disk.disks_usage
  max = disks.max_by { |d| d[:percent] }
  metric = max[:percent]
  s = service_state(metric * 100)
  event({
    service: "disk",
    description: description(disks),
    state: s,
    metric: metric
  })
end
drift_status() click to toggle source
# File lib/remon/checks/salt.rb, line 15
def drift_status
  status = @salt.status
  state = status[:state] == "ok" ? "ok" : "warning"
  metric = state == "ok" ? 0 : 1
  event({
    service: "salt",
    description: "#{status[:state]}: #{status[:ok]}/#{status[:total]}",
    state: state,
    metric: metric
  })
end
http_status() click to toggle source
# File lib/remon/checks/http.rb, line 22
def http_status
  time, status = @http.status(read_timeout: opts[:read_timeout], open_timeout: opts[:open_timeout])
  state = state(status)
  event({
    service: "http #{@url}",
    description: "#{status} in #{(time * 1000).round(2)} ms",
    state: state,
    metric: metric(state)
  })
end
init(host: "127.0.0.1", port: 8500) click to toggle source
# File lib/remon/checks/consul.rb, line 5
def init(host: "127.0.0.1", port: 8500)
  @consul = Metrics::Consul.new(host: host, port: port)
end
iowait_event(metric) click to toggle source
# File lib/remon/checks/system.rb, line 88
def iowait_event(metric)
  description = "#{metric * 100 }% iowait"
  event service: "iowait",
        description: description,
        metric: metric,
        state: service_state("iowait", metric * 100)
end
loadavg() click to toggle source
# File lib/remon/checks/system.rb, line 39
def loadavg
  metric = @sys.loadavg_normalized
  event({
    service: "load",
    metric: metric,
    description: "1-minute load average/core is #{metric}",
    state: service_state("load", metric)
  })
end
members_status() click to toggle source
# File lib/remon/checks/consul.rb, line 15
def members_status
  failed_nodes = @consul.failed_nodes
  state = failed_nodes.size > 0 ? "critical" : "ok"
  event({
    service: "consul members",
    description: description(failed_nodes),
    state: state,
    metric: metric(state)
  })
end
memory() click to toggle source
# File lib/remon/checks/system.rb, line 49
def memory
  metric = @sys.memory
  description = "#{(metric * 100).round(2)}% used\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"

  event({
    service: "memory",
    metric: metric,
    description: description,
    state: service_state("memory", metric)
  })
end
metric(state) click to toggle source
# File lib/remon/checks/consul.rb, line 37
def metric(state)
  state == "ok" ? 0 : 1
end
oom_event(count, tag:) click to toggle source
# File lib/remon/checks/oom.rb, line 16
def oom_event(count, tag:)
  state = count > 0 ? "warning" : "ok"
  event({
    service: "oom log #{tag}",
    description: "#{count} times oom",
    state: state,
    metric: count
  })
end
run() click to toggle source
# File lib/remon/checks/consul.rb, line 9
def run
  members_status
end
salt_state(output) click to toggle source
# File lib/remon/scripts/salt-status, line 4
def salt_state(output)
  out = JSON.parse(output)
  states = out["local"].values
  drifted_states = states.reject { |i| i["result"] }
  num_ok = states.count - drifted_states.count
  total = states.count
  state = num_ok == total ? "ok" : "warning"
  "#{state}:#{num_ok}:#{total}"
end
state(status) click to toggle source
# File lib/remon/checks/http.rb, line 44
def state(status)
  if status >= 500
    "critical"
  elsif status == 444
    "critical"
  else
    "ok"
  end
end
updates_available() click to toggle source
# File lib/remon/checks/yum.rb, line 15
def updates_available
  service = "yum updates"
  count = @yum.updates_available
  state = count > 0 ? "warning" : "ok"
  metric = state == "ok" ? 0 : 1
  event({
    service: service,
    description: "#{count} updates available",
    state: state,
    metric: metric
  })
rescue => e
  logger.error "#{e.class}: #{e.message}"
  warning_event service
end
uptime() click to toggle source
# File lib/remon/checks/system.rb, line 61
  def uptime
    up_seconds = @sys.uptime
    metric = (up_seconds/24/3600).round(2)
    @ips ||= Sysinfo.ips.join(", ")
    description = <<~HEREDOC
      ip: "#{@ips}"
      instance_type: "#{Sysinfo.instance_type}"
    HEREDOC

    event({
      service: "uptime",
      metric: metric,
      description: description,
      state: "ok"
    })
  end