class GitLab::Exporter::SidekiqProber
A prober for Sidekiq queues
It takes the Redis URL Sidekiq is connected to
Constants
- POOL_SIZE
- POOL_TIMEOUT
This timeout is configured to higher interval than scrapping of Prometheus to ensure that connection is kept instead of needed to be re-initialized
- PROBE_JOBS_LIMIT
The maximum depth (from the head) of each queue to probe. Probing the entirety of a very large queue will take longer and run the risk of timing out. But when we have a very large queue, we are most in need of reliable metrics. This trades off completeness for predictability by only taking a limited amount of items from the head of the queue.
Public Class Methods
# File lib/gitlab_exporter/sidekiq.rb, line 28 def self.connection_pool @@connection_pool ||= Hash.new do |h, connection_hash| # rubocop:disable Style/ClassVars config = connection_hash.merge(pool_timeout: POOL_TIMEOUT, size: POOL_SIZE) h[connection_hash] = Sidekiq::RedisConnection.create(config) end end
# File lib/gitlab_exporter/sidekiq.rb, line 36 def initialize(metrics: PrometheusMetrics.new, logger: nil, **opts) @opts = opts @metrics = metrics @logger = logger end
Public Instance Methods
# File lib/gitlab_exporter/sidekiq.rb, line 163 def probe_dead puts "[DEPRECATED] probe_dead is now considered obsolete and will be removed in future major versions,"\ " please use probe_stats instead" with_sidekiq do @metrics.add("sidekiq_dead_jobs", Sidekiq::Stats.new.dead_size) end self end
# File lib/gitlab_exporter/sidekiq.rb, line 80 def probe_future_sets now = Time.now.to_f with_sidekiq do Sidekiq.redis do |conn| Sidekiq::Scheduled::SETS.each do |set| # Default to 0; if all jobs are due in the future, there is no "negative" delay. delay = 0 _job, timestamp = conn.zrangebyscore(set, "-inf", now.to_s, limit: [0, 1], withscores: true).first delay = now - timestamp if timestamp @metrics.add("sidekiq_#{set}_set_processing_delay_seconds", delay) # zcount is O(log(N)) (prob. binary search), so is still quick even with large sets @metrics.add("sidekiq_#{set}_set_backlog_count", conn.zcount(set, "-inf", now.to_s)) end end end end
# File lib/gitlab_exporter/sidekiq.rb, line 73 def probe_jobs puts "[REMOVED] probe_jobs is now considered obsolete and does not emit any metrics,"\ " please use probe_jobs_limit instead" self end
Count worker classes present in Sidekiq queues. This only looks at the first PROBE_JOBS_LIMIT
jobs in each queue. This means that we run a single LRANGE command for each queue, which does not block other commands. For queues over PROBE_JOBS_LIMIT
in size, this means that we will not have completely accurate statistics, but the probe performance will also not degrade as the queue gets larger.
# File lib/gitlab_exporter/sidekiq.rb, line 107 def probe_jobs_limit with_sidekiq do job_stats = Hash.new(0) Sidekiq::Queue.all.each do |queue| Sidekiq.redis do |conn| conn.lrange("queue:#{queue.name}", 0, PROBE_JOBS_LIMIT).each do |job| job_class = Sidekiq.load_json(job)["class"] job_stats[job_class] += 1 end end end job_stats.each do |class_name, count| @metrics.add("sidekiq_enqueued_jobs", count, name: class_name) end end self end
# File lib/gitlab_exporter/sidekiq.rb, line 61 def probe_queues with_sidekiq do Sidekiq::Queue.all.each do |queue| @metrics.add("sidekiq_queue_size", queue.size, name: queue.name) @metrics.add("sidekiq_queue_latency_seconds", queue.latency, name: queue.name) @metrics.add("sidekiq_queue_paused", queue.paused? ? 1 : 0, name: queue.name) end end self end
# File lib/gitlab_exporter/sidekiq.rb, line 147 def probe_retries with_sidekiq do retry_stats = Hash.new(0) Sidekiq::RetrySet.new.map do |job| retry_stats[job.klass] += 1 end retry_stats.each do |class_name, count| @metrics.add("sidekiq_to_be_retried_jobs", count, name: class_name) end end self end
# File lib/gitlab_exporter/sidekiq.rb, line 42 def probe_stats with_sidekiq do stats = Sidekiq::Stats.new @metrics.add("sidekiq_jobs_processed_total", stats.processed) @metrics.add("sidekiq_jobs_failed_total", stats.failed) @metrics.add("sidekiq_jobs_enqueued_size", stats.enqueued) @metrics.add("sidekiq_jobs_scheduled_size", stats.scheduled_size) @metrics.add("sidekiq_jobs_retry_size", stats.retry_size) @metrics.add("sidekiq_jobs_dead_size", stats.dead_size) @metrics.add("sidekiq_default_queue_latency_seconds", stats.default_queue_latency) @metrics.add("sidekiq_processes_size", stats.processes_size) @metrics.add("sidekiq_workers_size", stats.workers_size) end self end
# File lib/gitlab_exporter/sidekiq.rb, line 129 def probe_workers with_sidekiq do worker_stats = Hash.new(0) Sidekiq::Workers.new.map do |_pid, _tid, work| job_klass = work["payload"]["class"] worker_stats[job_klass] += 1 end worker_stats.each do |class_name, count| @metrics.add("sidekiq_running_jobs", count, name: class_name) end end self end
# File lib/gitlab_exporter/sidekiq.rb, line 174 def write_to(target) target.write(@metrics.to_s) end
Private Instance Methods
# File lib/gitlab_exporter/sidekiq.rb, line 211 def connected? return @connected unless @connected.nil? Sidekiq.redis do |conn| @connected = (conn.ping == "PONG") end rescue Redis::BaseConnectionError => e @logger&.error "Error connecting to the Redis: #{e}" @connected = false end
# File lib/gitlab_exporter/sidekiq.rb, line 205 def redis_enable_client? return true if @opts[:redis_enable_client].nil? @opts[:redis_enable_client] end
# File lib/gitlab_exporter/sidekiq.rb, line 193 def redis_options options = { url: @opts[:redis_url], namespace: "resque:gitlab", connect_timeout: 1, reconnect_attempts: 0 } options[:id] = nil unless redis_enable_client? options end
# File lib/gitlab_exporter/sidekiq.rb, line 180 def with_sidekiq # TODO: this is not concurrent safe as we change global context # It means that we are unable to use many different sidekiq's # which is not a problem as of now Sidekiq.configure_client do |config| config.redis = self.class.connection_pool[redis_options] end return unless connected? yield end