module NewspaperWorks::Ingest
Module for Ingest
adapters that import files into model objects
Public Class Methods
assign_administrative_metadata(work, opts = {})
click to toggle source
# File lib/newspaper_works/ingest.rb, line 78 def self.assign_administrative_metadata(work, opts = {}) work.depositor = opts.fetch(:email, User.batch_user.user_key) work.admin_set = find_admin_set(opts.fetch(:admin_set, nil)) work.visibility = opts.fetch(:visibility, 'open') work.resource_type = ['Newspapers'] work.date_modified ||= Hyrax::TimeService.time_in_utc work.date_uploaded ||= work.date_modified work.state = RDF::URI( 'http://fedora.info/definitions/1/0/access/ObjState#active' ) end
find_admin_set(admin_set = nil)
click to toggle source
# File lib/newspaper_works/ingest.rb, line 66 def self.find_admin_set(admin_set = nil) return admin_set if admin_set.class == AdminSet admin_set = AdminSet::DEFAULT_ID if admin_set.nil? begin AdminSet.find(admin_set) rescue # only create if default admin set raise unless admin_set == AdminSet::DEFAULT_ID AdminSet.find(AdminSet.find_or_create_default_admin_set_id) end end
geonames_place_uri(place_name)
click to toggle source
Get Geonames URI for closest place match
Requires Qa::Authorities::Geonames.username is set, likely via `Hyrax.config.geonames_username=` setter in config/initializers/hyrax.rb of consuming app.
@param place_name [String] Name of place as human-readable text @return [String, NilClass] URI to Geonames RDF or nil
# File lib/newspaper_works/ingest.rb, line 35 def self.geonames_place_uri(place_name) username = Qa::Authorities::Geonames.username return if username.nil? || username.empty? place_name = place_name.delete('.').split(/[\[\(]/)[0].strip query = URI.encode(place_name) geo_qs = "q=#{query}&username=#{username}" url = "http://api.geonames.org/search?#{geo_qs}" resp = NewspaperWorks::ResourceFetcher.get url doc = Nokogiri.XML(resp['body']) geonames_id = doc.xpath('//geonames/geoname[1]/geonameId').first return if geonames_id.nil? "http://sws.geonames.org/#{geonames_id.text}/" end
normalize_title(title)
click to toggle source
Normalize publication title from catalog data
Presently strips trailing period
@param title [String] @return [String] normalized title
# File lib/newspaper_works/ingest.rb, line 53 def self.normalize_title(title) title.strip.sub(/[.]+$/, '') end
publication_metadata(lccn)
click to toggle source
Get publication metadata from LC catalog MODS data, if available,
and from ChronAm, as a fallback.
@param lccn [String] Library of Congress Control number for publication @return [NewspaperWorks::Ingest::PublicationInfo] proxy to metadata
source, an object for accessors for publication fields.
# File lib/newspaper_works/ingest.rb, line 62 def self.publication_metadata(lccn) PublicationInfo.new(lccn) end