class Object
Public Instance Methods
cpu()
click to toggle source
# File lib/remon/checks/redis.rb, line 8 def cpu metric = rand(1..100) s = state(metric, warn: o[:cpu_warn], critical: o[:cpu_critical]) event "cpu", state, metric, description event ({ service: "cpu", description: "cpu", metric: metric, state: s }) end
cpu_and_iowait()
click to toggle source
# File lib/remon/checks/system.rb, line 26 def cpu_and_iowait old_cpu = @old_cpu new_cpu = @sys.cpu_stat if not new_cpu return e 'cpu', :unknown, nil, "/proc/stat doesn't include a CPU line" end @old_cpu = new_cpu return nil if not old_cpu used, iowait = @sys.cpu_usage(old_cpu, new_cpu) [cpu_event(used), iowait_event(iowait)] end
cpu_event(metric)
click to toggle source
# File lib/remon/checks/system.rb, line 80 def cpu_event(metric) description = "#{(metric * 100).round(2)}% user+nice+system\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}" event service: "cpu", description: description, metric: metric, state: service_state("cpu", metric * 100) end
description(failed_nodes)
click to toggle source
# File lib/remon/checks/consul.rb, line 26 def description(failed_nodes) n = failed_nodes.size s = "#{n} failed nodes" if n > 0 d = failed_nodes.map {|k,v| "#{k}: #{v}"}.join("\n") "#{s}\n#{d}" else s end end
disk()
click to toggle source
# File lib/remon/checks/disk.rb, line 19 def disk disks = @disk.disks_usage max = disks.max_by { |d| d[:percent] } metric = max[:percent] s = service_state(metric * 100) event({ service: "disk", description: description(disks), state: s, metric: metric }) end
drift_status()
click to toggle source
# File lib/remon/checks/salt.rb, line 15 def drift_status status = @salt.status state = status[:state] == "ok" ? "ok" : "warning" metric = state == "ok" ? 0 : 1 event({ service: "salt", description: "#{status[:state]}: #{status[:ok]}/#{status[:total]}", state: state, metric: metric }) end
http_status()
click to toggle source
# File lib/remon/checks/http.rb, line 22 def http_status time, status = @http.status(read_timeout: opts[:read_timeout], open_timeout: opts[:open_timeout]) state = state(status) event({ service: "http #{@url}", description: "#{status} in #{(time * 1000).round(2)} ms", state: state, metric: metric(state) }) end
init(host: "127.0.0.1", port: 8500)
click to toggle source
# File lib/remon/checks/consul.rb, line 5 def init(host: "127.0.0.1", port: 8500) @consul = Metrics::Consul.new(host: host, port: port) end
iowait_event(metric)
click to toggle source
# File lib/remon/checks/system.rb, line 88 def iowait_event(metric) description = "#{metric * 100 }% iowait" event service: "iowait", description: description, metric: metric, state: service_state("iowait", metric * 100) end
loadavg()
click to toggle source
# File lib/remon/checks/system.rb, line 39 def loadavg metric = @sys.loadavg_normalized event({ service: "load", metric: metric, description: "1-minute load average/core is #{metric}", state: service_state("load", metric) }) end
members_status()
click to toggle source
# File lib/remon/checks/consul.rb, line 15 def members_status failed_nodes = @consul.failed_nodes state = failed_nodes.size > 0 ? "critical" : "ok" event({ service: "consul members", description: description(failed_nodes), state: state, metric: metric(state) }) end
memory()
click to toggle source
# File lib/remon/checks/system.rb, line 49 def memory metric = @sys.memory description = "#{(metric * 100).round(2)}% used\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}" event({ service: "memory", metric: metric, description: description, state: service_state("memory", metric) }) end
metric(state)
click to toggle source
# File lib/remon/checks/consul.rb, line 37 def metric(state) state == "ok" ? 0 : 1 end
oom_event(count, tag:)
click to toggle source
# File lib/remon/checks/oom.rb, line 16 def oom_event(count, tag:) state = count > 0 ? "warning" : "ok" event({ service: "oom log #{tag}", description: "#{count} times oom", state: state, metric: count }) end
run()
click to toggle source
# File lib/remon/checks/consul.rb, line 9 def run members_status end
salt_state(output)
click to toggle source
# File lib/remon/scripts/salt-status, line 4 def salt_state(output) out = JSON.parse(output) states = out["local"].values drifted_states = states.reject { |i| i["result"] } num_ok = states.count - drifted_states.count total = states.count state = num_ok == total ? "ok" : "warning" "#{state}:#{num_ok}:#{total}" end
state(status)
click to toggle source
# File lib/remon/checks/http.rb, line 44 def state(status) if status >= 500 "critical" elsif status == 444 "critical" else "ok" end end
updates_available()
click to toggle source
# File lib/remon/checks/yum.rb, line 15 def updates_available service = "yum updates" count = @yum.updates_available state = count > 0 ? "warning" : "ok" metric = state == "ok" ? 0 : 1 event({ service: service, description: "#{count} updates available", state: state, metric: metric }) rescue => e logger.error "#{e.class}: #{e.message}" warning_event service end
uptime()
click to toggle source
# File lib/remon/checks/system.rb, line 61 def uptime up_seconds = @sys.uptime metric = (up_seconds/24/3600).round(2) @ips ||= Sysinfo.ips.join(", ") description = <<~HEREDOC ip: "#{@ips}" instance_type: "#{Sysinfo.instance_type}" HEREDOC event({ service: "uptime", metric: metric, description: description, state: "ok" }) end