class Embulk::Input::GoogleAnalytics::Client
Attributes
task[R]
Public Class Methods
new(task, is_preview = false)
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 12 def initialize(task, is_preview = false) @task = task @is_preview = is_preview end
Public Instance Methods
auth()
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 205 def auth retryer.with_retry do case task['auth_method'] when Plugin::AUTH_TYPE_JSON_KEY Google::Auth::ServiceAccountCredentials.make_creds( json_key_io: StringIO.new(task["json_key_content"]), scope: "https://www.googleapis.com/auth/analytics.readonly" ) when Plugin::AUTH_TYPE_REFRESH_TOKEN Google::Auth::UserRefreshCredentials.new( 'token_credential_uri': Google::Auth::UserRefreshCredentials::TOKEN_CRED_URI, 'client_id': task['client_id'], 'client_secret': task['client_secret'], 'refresh_token': task['refresh_token'] ) else raise Embulk::ConfigError.new("Unknown Authentication method: '#{task['auth_method']}'.") end end rescue Google::Apis::AuthorizationError => e raise ConfigError.new(e.message) end
build_report_request(page_token = nil)
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 178 def build_report_request(page_token = nil) query = { view_id: view_id, dimensions: [{name: task["time_series"]}] + task["dimensions"].map{|d| {name: d}}, metrics: task["metrics"].map{|m| {expression: m}}, include_empty_rows: true, page_size: preview? ? 10 : 10000, } if task["start_date"] || task["end_date"] query[:date_ranges] = [{ start_date: task["start_date"], end_date: task["end_date"], }] end if page_token query[:page_token] = page_token end [query] end
canonical_column_names(columns)
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 133 def canonical_column_names(columns) result = [] columns.each do |col| if col[:id].match(/XX/) # for such columns: # https://developers.google.com/analytics/devguides/reporting/core/dimsmets#view=detail&group=content_grouping # https://developers.google.com/analytics/devguides/reporting/metadata/v3/devguide#attributes min = [ col[:attributes][:minTemplateIndex], col[:attributes][:premiumMinTemplateIndex], ].compact.min max = [ col[:attributes][:maxTemplateIndex], col[:attributes][:premiumMaxTemplateIndex], ].compact.max min.upto(max) do |n| actual_id = col[:id].gsub(/XX/, n.to_s) result << col.merge(id: actual_id) end else result << col end end result end
each_report_row(&block)
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 21 def each_report_row(&block) page_token = nil Embulk.logger.info "view_id:#{view_id} timezone has been set as '#{get_profile[:timezone]}'" loop do result = get_reports(page_token) report = result.to_h[:reports].first if !report[:data].has_key?(:rows) Embulk.logger.warn "Result doesn't contain rows." break end if report[:data][:rows].empty? Embulk.logger.warn "Result has 0 rows." break end dimensions = report[:column_header][:dimensions] metrics = report[:column_header][:metric_header][:metric_header_entries].map{|m| m[:name]} report[:data][:rows].each do |row| dim = dimensions.zip(row[:dimensions]).to_h met = metrics.zip(row[:metrics].first[:values]).to_h format_row = dim.merge(met) raw_time = format_row[task["time_series"]] optimize_value_by_query_limit?(raw_time) next if too_early_data?(raw_time) format_row[task["time_series"]] = time_parse_with_profile_timezone(raw_time) format_row["view_id"] = view_id block.call format_row end break if preview? unless page_token = report[:next_page_token] break end Embulk.logger.info "Fetching report with page_token: #{page_token}" end end
get_all_profiles()
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 77 def get_all_profiles service = Google::Apis::AnalyticsV3::AnalyticsService.new service.authorization = auth Embulk.logger.debug "Fetching profile from API" retryer.with_retry do service.list_profiles("~all", "~all") end end
get_columns_list()
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 128 def get_columns_list columns = get_custom_dimensions + get_metadata_columns canonical_column_names(columns) end
get_custom_dimensions()
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 169 def get_custom_dimensions # https://developers.google.com/analytics/devguides/config/mgmt/v3/mgmtReference/management/customDimensions/list service = Google::Apis::AnalyticsV3::AnalyticsService.new service.authorization = auth retryer.with_retry do service.list_custom_dimensions(get_profile[:account_id], get_profile[:web_property_id]).to_h[:items] end end
get_metadata_columns()
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 160 def get_metadata_columns # https://developers.google.com/analytics/devguides/reporting/metadata/v3/reference/metadata/columns/list service = Google::Apis::AnalyticsV3::AnalyticsService.new service.authorization = auth retryer.with_retry do service.list_metadata_columns("ga").to_h[:items] end end
get_profile()
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 62 def get_profile @profile ||= begin profile = get_all_profiles.to_h[:items].find do |prof| prof[:id] == view_id end unless profile raise Embulk::ConfigError.new("Can't find view_id:#{view_id} profile via Google Analytics API.") end profile end end
get_reports(page_token = nil)
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 114 def get_reports(page_token = nil) # https://developers.google.com/analytics/devguides/reporting/core/v4/rest/v4/reports/batchGet service = Google::Apis::AnalyticsreportingV4::AnalyticsReportingService.new service.authorization = auth request = Google::Apis::AnalyticsreportingV4::GetReportsRequest.new request.report_requests = build_report_request(page_token) Embulk.logger.info "Query to Core Report API: #{request.to_json}" retryer.with_retry do service.batch_get_reports request end end
optimize_value_by_query_limit?(data)
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 87 def optimize_value_by_query_limit?(data) # For any date range, Analytics returns a maximum of 1 million rows for the report. Rows in excess of 1 million are rolled-up into an (other) row. # See more details: https://support.google.com/analytics/answer/1009671 if data.to_s == "(other)" raise Embulk::DataError.new('Stop fetching data from Analytics because over 1M data fetching was limited. Please reduce data range to fetch data according to this article: https://support.google.com/analytics/answer/1009671.') end end
preview?()
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 17 def preview? @is_preview end
retryer()
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 249 def retryer PerfectRetry.new do |config| config.limit = task["retry_limit"] config.logger = Embulk.logger config.log_level = nil # https://developers.google.com/analytics/devguides/reporting/core/v4/errors # https://developers.google.com/analytics/devguides/reporting/core/v4/limits-quotas#additional_quota # https://github.com/google/google-api-ruby-client/blob/master/lib/google/apis/errors.rb # https://github.com/google/google-api-ruby-client/blob/0.9.11/lib/google/apis/core/http_command.rb#L33 config.rescues = Google::Apis::Core::HttpCommand::RETRIABLE_ERRORS config.dont_rescues = [Embulk::DataError, Embulk::ConfigError] config.sleep = lambda{|n| task["retry_initial_wait_sec"]* (2 ** (n-1)) } config.raise_original_error = true end end
swap_time_zone() { || ... }
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 228 def swap_time_zone(&block) orig_timezone = Time.zone Time.zone = get_profile[:timezone] yield ensure Time.zone = orig_timezone end
time_parse_with_profile_timezone(time_string)
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 95 def time_parse_with_profile_timezone(time_string) date_format = case task["time_series"] when "ga:dateHour" "%Y%m%d%H" when "ga:date" "%Y%m%d" end parts = Date._strptime(time_string, date_format) unless parts # strptime was failed. Google API returns unexpected date string. raise Embulk::DataError.new("Failed to parse #{task["time_series"]} data. The value is '#{time_string}'(#{time_string.class}) and it doesn't match with '#{date_format}'.") end swap_time_zone do Time.zone.local(*parts.values_at(:year, :mon, :mday, :hour)).to_time end end
too_early_data?(time_str)
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 236 def too_early_data?(time_str) # fetching 20160720 data on 2016-07-20, it is too early fetching swap_time_zone do now = Time.zone.now case task["time_series"] when "ga:dateHour" time_str.to_i >= now.strftime("%Y%m%d%H").to_i when "ga:date" time_str.to_i >= now.strftime("%Y%m%d").to_i end end end
view_id()
click to toggle source
# File lib/embulk/input/google_analytics/client.rb, line 201 def view_id task["view_id"] end