class LogStash::Inputs::GoogleAnalyticsDaily

Pull daily reports from Google Analytics using the v3 Core Reporting API. This plugin will generate one Logstash event per date, with each event containing all the data for that date The plugin will try to maintain a single event per date and list of metrics

Public Instance Methods

register() click to toggle source
# File lib/logstash/inputs/google_analytics_daily.rb, line 91
def register
end
run(queue) click to toggle source
# File lib/logstash/inputs/google_analytics_daily.rb, line 94
def run(queue)
  # we abort the loop if stop? becomes true
  while !stop?
    plugin_start_time = Time.now

    analytics = get_service

    # Setting this here, not in the config at the top,
    # because we need to reset the date for each new loop (new day)
    start_date = @start_date || (Date.today - 1).to_s
    end_date = @end_date || (Date.today - 1).to_s

    @dates = (Date.parse(start_date)..Date.parse(end_date))

    @dates.each do |date|
      date = date.to_s
      options = get_request_parameters(date)

      results = analytics.get_ga_data(
          options[:view_id],
          options[:start_date],
          options[:end_date],
          options[:metrics],
          dimensions: options[:dimensions],
          filters: options[:filters],
          include_empty_rows: options[:include_empty_rows],
          sampling_level: options[:sampling_level],
          segment: options[:segment],
          sort: options[:sort],
      )

      column_headers = results.column_headers.map &:name

      rows = []

      if results.rows && results.rows.first

        # Example with dimensions, multiple metrics:
        # rows: [[Chrome, Cape Town, 6, 8], [Chrome, Paris, 1, 5], [Safari, Paris, 1, 3]], column_headers: ['ga:browser', 'ga:city', 'ga:user', 'ga:sessions']
        # Example with dimension, single metric:
        # rows: [[Chrome, 6]], column_headers: ['ga:browser', 'ga:user']
        # Example with no dimension, single metric:
        # rows: [[6]], column_headers: ['ga:user']
        # Dimensions always appear before values
        results.rows.each do |row|
          dimensions = []
          metrics = []

          column_headers.zip(row) do |header, value|
            # Combine GA column headers with values from row
            if is_num(value)
              float_value = Float(value)
              # Sometimes GA returns infinity. if so, the number is invalid
              # so set it to zero.
              value = (float_value == Float::INFINITY) ? 0.0 : float_value
            end

            entry = {
                name: header,
                value: value
            }
            if @metrics.include?(header)
              metrics << entry
            else
              dimensions << entry
            end

          end

          rows << {metrics: metrics, dimensions: dimensions}
        end

        query = results.query.to_h
        profile_info = results.profile_info.to_h

        # Transform into proper format for one event per metric
        @metrics.each do |metric|
          rows_for_this_metric = rows.clone.map do |row|
            new_row = {}
            new_row[:metric] = row[:metrics].find { |m| m[:name] == metric }
            new_row[:dimensions] = row[:dimensions]
            new_row
          end

          rows_for_this_metric.each do |row|
            event = LogStash::Event.new
            decorate(event)
            # Populate Logstash event fields
            event.set('ga.contains_sampled_data', results.contains_sampled_data?)
            event.set('ga.query', query.to_json) if @store_query
            event.set('ga.profile_info', profile_info) if @store_profile
            event.set('ga.date', date)

            event.set("ga.metric.name", metric)
            event.set("ga.metric.value", row[:metric][:value])


            # Remap dimensions into key: value
            # Might lead to "mapping explosion", but otherwise aggregations are tough
            joined_dimension_name = ''
            row[:dimensions].each do |d|
              dimension_name = d[:name].sub("ga:", '')
              joined_dimension_name += dimension_name
              event.set("ga.dimensions.#{dimension_name}", d[:value])
            end

            queue << event
          end
        end
      end
    end

    # If no interval was set, we're done
    if @interval.nil?
      break
    else
      # Otherwise we sleep till the next run
      time_lapsed = Time.now - plugin_start_time
      # Sleep for the remainder of the interval, or 0 if the duration ran
      # longer than the interval.
      time_to_sleep_for = [0, @interval - time_lapsed].max
      if time_to_sleep_for == 0
        @logger.warn(
            "Execution ran longer than the interval. Skipping sleep.",
            :duration => time_lapsed,
            :interval => @interval
        )
      else
        @logger.info(
            "Sleeping for #{@interval} seconds"
        )
        Stud.stoppable_sleep(time_to_sleep_for) { stop? }
      end
    end
  end # loop
end

Private Instance Methods

get_request_parameters(date) click to toggle source
# File lib/logstash/inputs/google_analytics_daily.rb, line 233
def get_request_parameters(date)
  options = {
      :view_id => @view_id,
      :start_date => date,
      :end_date => date,
      :metrics => @metrics.join(','),
      :output => 'json',
  }
  options.merge!({:dimensions => @dimensions.join(',')}) if (@dimensions and @dimensions.size)
  options.merge!({:filters => @filters}) if @filters
  options.merge!({:sort => @sort}) if @sort
  options.merge!({:segment => @segment}) if @segment
  options.merge!({:sampling_level => @sampling_level}) if @sampling_level
  options.merge!({:include_empty_rows => @include_empty_rows}) if !@include_empty_rows.nil?
  return options
end
get_service() click to toggle source
# File lib/logstash/inputs/google_analytics_daily.rb, line 250
def get_service
  scope = 'https://www.googleapis.com/auth/analytics.readonly'
  authorizer = Google::Auth::ServiceAccountCredentials.make_creds(
      json_key_io: File.open(@key_file_path),
      scope: scope
  )

  analytics = Google::Apis::AnalyticsV3::AnalyticsService.new
  analytics.authorization = authorizer
  return analytics
end
is_num(a) click to toggle source
# File lib/logstash/inputs/google_analytics_daily.rb, line 264
def is_num(a)
  return (Float(a) and true) rescue false
end