class NewspaperWorks::Ingest::ChronAmPublicationInfo

Publication info from ChronAm as remote authority for metadata

Constants

BASE_URL
XML_NS

Attributes

issn[RW]
language[RW]
place_name[RW]
place_of_publication[RW]
title[RW]

Public Class Methods

new(lccn) click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 21
def initialize(lccn)
  # true until loaded
  @empty = true
  super(lccn)
  @issn = nil # chronam doesn't have this
end

Public Instance Methods

empty?() click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 28
def empty?
  @empty
end
inspect() click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 32
def inspect
  format(
    "<#{self.class}:0x000000000%<oid>x " \
      "\tlccn: '#{@lccn}'>",
    oid: object_id << 1
  )
end
load() click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 53
def load
  resp = NewspaperWorks::ResourceFetcher.get url
  return if resp['status'] == 404
  @doc = Nokogiri.XML(resp['body'])
  @title = normalize_title(find('//dcterms:title').first.text)
  @language = iso_language_for(find('//dcterms:language').first.text)
  @empty = false
  load_place
end
load_place() click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 40
def load_place
  place_match = find('//rda:placeOfPublication')
  return if place_match.nil?
  @place_name = place_match.first.text
  @place_of_publication = NewspaperWorks::Ingest.geonames_place_uri(
    @place_name
  )
end
oclcnum() click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 63
def oclcnum
  key = 'info:oclcnum'
  selected = sameas_resources.select { |v| v.text.start_with?(key) }
  return if selected.empty?
  oclc_prefixed(selected[0].text.split('/')[1])
end
preceded_by() click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 70
def preceded_by
  return if empty?
  found = find('//frbr:successorOf/@rdf:resource').first
  return if found.nil?
  normalize_related(found.text)
end
succeeded_by() click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 77
def succeeded_by
  return if empty?
  found = find('//frbr:successor/@rdf:resource').first
  return if found.nil?
  normalize_related(found.text)
end
url() click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 49
def url
  "#{BASE_URL}/#{@lccn}.rdf"
end

Private Instance Methods

find(expr, context = nil) click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 113
def find(expr, context = nil)
  context ||= @doc
  return if context.nil?
  context.xpath(expr, **XML_NS)
end
iso_language_for(code) click to toggle source

ISO 639-2 three-character code from ISO 639-1 two-character code

or equivalent lingvoj resource URL used by ChronAm;
uses HTML language tables maintained by LOC.
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 122
def iso_language_for(code)
  # handle case where source language code is lingvoj url:
  code = code.split('/')[-1]
  lookup_url = 'https://www.loc.gov/standards/iso639-2/php/langcodes_name.php'
  lookup_url += "?iso_639_1=#{code}"
  resp = NewspaperWorks::ResourceFetcher.get lookup_url
  html = Nokogiri::HTML(resp['body'])
  html.xpath('//table[1]/tr[2]/td[2]').first.text.strip
end
lc_catalog_url(lccn) click to toggle source

Returns URL to LC catalog, provided such exists, on the basis of

non-empty MODS for given LCCN.  Otherwise returns nil.
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 92
def lc_catalog_url(lccn)
  content_url = "https://lccn.loc.gov/#{lccn}"
  url = "#{content_url}/mods"
  resp = NewspaperWorks::ResourceFetcher.get url
  doc = Nokogiri.XML(resp['body'])
  return content_url unless doc.root.children.empty?
end
normalize_title(value) click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 86
def normalize_title(value)
  NewspaperWorks::Ingest.normalize_title(value)
end
sameas_resources() click to toggle source
# File lib/newspaper_works/ingest/chronam_publication_info.rb, line 109
def sameas_resources
  find('//owl:sameAs/@rdf:resource') || []
end