class NewspaperWorks::Ingest::ChronAmPublicationInfo
Publication info from ChronAm as remote authority for metadata
Constants
- BASE_URL
- XML_NS
Attributes
issn[RW]
language[RW]
place_name[RW]
place_of_publication[RW]
title[RW]
Public Class Methods
new(lccn)
click to toggle source
Calls superclass method
NewspaperWorks::Ingest::BasePublicationInfo::new
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 21 def initialize(lccn) # true until loaded @empty = true super(lccn) @issn = nil # chronam doesn't have this end
Public Instance Methods
empty?()
click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 28 def empty? @empty end
inspect()
click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 32 def inspect format( "<#{self.class}:0x000000000%<oid>x " \ "\tlccn: '#{@lccn}'>", oid: object_id << 1 ) end
load()
click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 53 def load resp = NewspaperWorks::ResourceFetcher.get url return if resp['status'] == 404 @doc = Nokogiri.XML(resp['body']) @title = normalize_title(find('//dcterms:title').first.text) @language = iso_language_for(find('//dcterms:language').first.text) @empty = false load_place end
load_place()
click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 40 def load_place place_match = find('//rda:placeOfPublication') return if place_match.nil? @place_name = place_match.first.text @place_of_publication = NewspaperWorks::Ingest.geonames_place_uri( @place_name ) end
oclcnum()
click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 63 def oclcnum key = 'info:oclcnum' selected = sameas_resources.select { |v| v.text.start_with?(key) } return if selected.empty? oclc_prefixed(selected[0].text.split('/')[1]) end
preceded_by()
click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 70 def preceded_by return if empty? found = find('//frbr:successorOf/@rdf:resource').first return if found.nil? normalize_related(found.text) end
succeeded_by()
click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 77 def succeeded_by return if empty? found = find('//frbr:successor/@rdf:resource').first return if found.nil? normalize_related(found.text) end
url()
click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 49 def url "#{BASE_URL}/#{@lccn}.rdf" end
Private Instance Methods
find(expr, context = nil)
click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 113 def find(expr, context = nil) context ||= @doc return if context.nil? context.xpath(expr, **XML_NS) end
iso_language_for(code)
click to toggle source
ISO 639-2 three-character code from ISO 639-1 two-character code
or equivalent lingvoj resource URL used by ChronAm; uses HTML language tables maintained by LOC.
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 122 def iso_language_for(code) # handle case where source language code is lingvoj url: code = code.split('/')[-1] lookup_url = 'https://www.loc.gov/standards/iso639-2/php/langcodes_name.php' lookup_url += "?iso_639_1=#{code}" resp = NewspaperWorks::ResourceFetcher.get lookup_url html = Nokogiri::HTML(resp['body']) html.xpath('//table[1]/tr[2]/td[2]').first.text.strip end
lc_catalog_url(lccn)
click to toggle source
Returns URL to LC catalog, provided such exists, on the basis of
non-empty MODS for given LCCN. Otherwise returns nil.
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 92 def lc_catalog_url(lccn) content_url = "https://lccn.loc.gov/#{lccn}" url = "#{content_url}/mods" resp = NewspaperWorks::ResourceFetcher.get url doc = Nokogiri.XML(resp['body']) return content_url unless doc.root.children.empty? end
normalize_title(value)
click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 86 def normalize_title(value) NewspaperWorks::Ingest.normalize_title(value) end
sameas_resources()
click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 109 def sameas_resources find('//owl:sameAs/@rdf:resource') || [] end