class Toccatore::Base
Constants
- ICON_URL
icon for Slack messages
Public Instance Methods
get_data(options={})
click to toggle source
# File lib/toccatore/base.rb, line 113 def get_data(options={}) query_url = get_query_url(options) Maremma.get(query_url, options) end
get_doi_ra(prefix)
click to toggle source
# File lib/toccatore/base.rb, line 163 def get_doi_ra(prefix) return nil if prefix.blank? url = "https://api.datacite.org/prefixes/#{prefix}" result = Maremma.get(url) return result.body.fetch("errors") if result.body.fetch("errors", nil).present? result.body.fetch("data", {}).fetch('attributes', {}).fetch('registration-agency', nil) end
get_name_identifier(author)
click to toggle source
# File lib/toccatore/base.rb, line 258 def get_name_identifier(author) name_identifier = author.fetch("nameIdentifier", nil) name_identifier_scheme = author.fetch("nameIdentifierScheme", "orcid").downcase if name_identifier_scheme == "orcid" && name_identifier = validate_orcid(name_identifier) "http://orcid.org/#{name_identifier}" else nil end end
get_query_url(options={})
click to toggle source
# File lib/toccatore/base.rb, line 28 def get_query_url(options={}) updated = "updated:[#{options[:from_date]}T00:00:00Z TO #{options[:until_date]}T23:59:59Z]" fq = "#{updated} AND has_metadata:true AND is_active:true" if options[:doi].present? q = "doi:#{options[:doi]}" elsif options[:orcid].present? q = "nameIdentifier:ORCID\\:#{options[:orcid]}" elsif options[:related_identifier].present? q = "relatedIdentifier:DOI\\:#{options[:related_identifier]}" elsif options[:query].present? q = options[:query] else q = query end params = { q: q, start: options[:offset], rows: options[:rows], fl: "doi,resourceTypeGeneral,relatedIdentifier,nameIdentifier,minted,updated", fq: fq, wt: "json" } url + URI.encode_www_form(params) end
get_total(options={})
click to toggle source
# File lib/toccatore/base.rb, line 53 def get_total(options={}) query_url = get_query_url(options.merge(rows: 0)) result = Maremma.get(query_url, options) result.body.fetch("data", {}).fetch("response", {}).fetch("numFound", 0) end
is_personal_name?(author)
click to toggle source
# File lib/toccatore/base.rb, line 233 def is_personal_name?(author) return true if author.include?(",") # lookup given name name_detector.name_exists?(author.split.first) end
job_batch_size()
click to toggle source
# File lib/toccatore/base.rb, line 143 def job_batch_size 1000 end
name_detector()
click to toggle source
# File lib/toccatore/base.rb, line 268 def name_detector GenderDetector.new end
normalize_doi(doi)
click to toggle source
# File lib/toccatore/base.rb, line 182 def normalize_doi(doi) doi = validate_doi(doi) return nil unless doi.present? # remove non-printing whitespace and downcase doi = doi.delete("\u200B").downcase # turn DOI into URL, escape unsafe characters "https://doi.org/" + Addressable::URI.encode(doi) end
orcid_as_url(orcid)
click to toggle source
# File lib/toccatore/base.rb, line 197 def orcid_as_url(orcid) "http://orcid.org/#{orcid}" if orcid.present? end
orcid_from_url(url)
click to toggle source
# File lib/toccatore/base.rb, line 193 def orcid_from_url(url) Array(/\Ahttp:\/\/orcid\.org\/(.+)/.match(url)).last end
process_data(options = {})
click to toggle source
# File lib/toccatore/base.rb, line 104 def process_data(options = {}) data = get_data(options.merge(timeout: timeout, source_id: source_id)) data = parse_data(data, options) return [OpenStruct.new(body: { "data" => [] })] if data.empty? push_data(data, options) end
push_data(items, options={})
click to toggle source
method returns number of errors
# File lib/toccatore/base.rb, line 119 def push_data(items, options={}) if items.empty? puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}." 0 elsif options[:access_token].blank? puts "An error occured: Access token missing." options[:total] else error_total = 0 Array(items).each do |item| error_total += push_item(item, options) end error_total end end
queue_jobs(options={})
click to toggle source
# File lib/toccatore/base.rb, line 59 def queue_jobs(options={}) options[:offset] = options[:offset].to_i || 0 options[:rows] = options[:rows].presence || job_batch_size options[:from_date] = options[:from_date].presence || (Time.now.to_date - 1.day).iso8601 options[:until_date] = options[:until_date].presence || Time.now.to_date.iso8601 total = get_total(options) if total > 0 # walk through paginated results total_pages = (total.to_f / job_batch_size).ceil error_total = 0 (0...total_pages).each do |page| options[:offset] = page * job_batch_size options[:total] = total err = process_data(options) if err.is_a?(Integer) error_total += err else puts err.inspect end end text = "#{total} works processed with #{error_total} errors for date range #{options[:from_date]} - #{options[:until_date]}." else text = "No works found for date range #{options[:from_date]} - #{options[:until_date]}." end puts text # send slack notification if total == 0 options[:level] = "warning" elsif error_total > 0 options[:level] = "danger" else options[:level] = "good" end options[:title] = "Report for #{source_id}" send_notification_to_slack(text, options) if options[:slack_webhook_url].present? # return number of works queued total end
send_notification_to_slack(text, options={})
click to toggle source
# File lib/toccatore/base.rb, line 147 def send_notification_to_slack(text, options={}) return nil unless options[:slack_webhook_url].present? attachment = { title: options[:title] || "Report", text: text, color: options[:level] || "good" } notifier = Slack::Notifier.new options[:slack_webhook_url], username: "Event Data Agent", icon_url: ICON_URL response = notifier.post attachments: [attachment] response.first end
timeout()
click to toggle source
# File lib/toccatore/base.rb, line 139 def timeout 120 end
unfreeze(hsh)
click to toggle source
# File lib/toccatore/base.rb, line 272 def unfreeze(hsh) new_hash = {} hsh.each_pair { |k,v| new_hash.merge!({k.downcase.to_sym => v}) } new_hash end
url()
click to toggle source
# File lib/toccatore/base.rb, line 135 def url "https://search.datacite.org/api?" end
validate_doi(doi)
click to toggle source
# File lib/toccatore/base.rb, line 174 def validate_doi(doi) Array(/\A(?:(http|https):\/\/(dx\.)?doi.org\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last end
validate_orcid(orcid)
click to toggle source
# File lib/toccatore/base.rb, line 201 def validate_orcid(orcid) Array(/\A(?:http:\/\/orcid\.org\/)?(\d{4}-\d{4}-\d{4}-\d{3}[0-9X]+)\z/.match(orcid)).last end
validate_prefix(doi)
click to toggle source
# File lib/toccatore/base.rb, line 178 def validate_prefix(doi) Array(/\A(?:(http|https):\/\/(dx\.)?doi.org\/)?(doi:)?(10\.\d{4,5})\/.+\z/.match(doi)).last end