class NewspaperWorks::Ingest::LCPublicationInfo

Constants

BASE_URL
XML_NS

Attributes

doc[RW]
full_title[RW]
lccn[RW]
place_name[RW]
place_of_publication[RW]

Public Class Methods

new(lccn) click to toggle source
# File lib/newspaper_works/ingest/lc_publication_info.rb, line 17
def initialize(lccn)
  super(lccn)
  @doc = nil
  @full_title = nil
  @place_of_publication = nil
  @place_name = nil
  load
end

Public Instance Methods

empty?() click to toggle source
# File lib/newspaper_works/ingest/lc_publication_info.rb, line 73
def empty?
  @doc.nil? || @doc.root.children.empty?
end
inspect() click to toggle source
# File lib/newspaper_works/ingest/lc_publication_info.rb, line 26
def inspect
  format(
    "<#{self.class}:0x000000000%<oid>x " \
      "\tlccn: '#{@lccn}'>",
    oid: object_id << 1
  )
end
issn() click to toggle source
# File lib/newspaper_works/ingest/lc_publication_info.rb, line 94
def issn
  return if empty?
  v = find('//mods:mods/mods:identifier[@type="issn"]').first
  v.nil? ? nil : v.text
end
language(default = 'eng') click to toggle source

ISO-639-2 three character language code, default is 'eng' (English)

# File lib/newspaper_works/ingest/lc_publication_info.rb, line 88
def language(default = 'eng')
  return if empty?
  v = find('//mods:language/mods:languageTerm').first
  v.nil? ? default : v.text
end
load() click to toggle source
# File lib/newspaper_works/ingest/lc_publication_info.rb, line 77
def load
  load_lc
  load_place unless @full_title.nil?
end
load_lc() click to toggle source
# File lib/newspaper_works/ingest/lc_publication_info.rb, line 38
def load_lc
  resp = NewspaperWorks::ResourceFetcher.get url
  @doc = Nokogiri.XML(resp['body'])
  return if empty?
  # try title[@type="uniform"] first:
  title = find('//mods:titleInfo[@type="uniform"]/mods:title').first
  # if no type="uniform" title, try non-alternate bare titleInfo:
  #   -- in either case, should omit any non-sorted article (e.g. "The")
  title = find('//mods:titleInfo[count(@type)=0]/mods:title').first if title.nil?
  @full_title = title.text unless title.nil?
end
load_place() click to toggle source
# File lib/newspaper_works/ingest/lc_publication_info.rb, line 66
def load_place
  @place_name = mods_place_name || place_name_from_title(@full_title)
  return if @place_name.nil?
  uri = NewspaperWorks::Ingest.geonames_place_uri(@place_name)
  @place_of_publication = uri
end
mods_place_name() click to toggle source
# File lib/newspaper_works/ingest/lc_publication_info.rb, line 50
def mods_place_name
  # prefer geographic subject hierarchy for place name construction:
  city = find('//mods:hierarchicalGeographic/mods:city').first
  # State (e.g. "Utah"), Province (e.g. "Ontario"), other (e.g. "England")
  state = find('//mods:hierarchicalGeographic/mods:state').first
  # if state is nil, fallback to country in its place
  state = find('//mods:hierarchicalGeographic/mods:country').first if state.nil?
  return "#{city.text}, #{state.text}" if city && state
  # fallback to placeTerm text, which may be abbreviated in such a
  #   way that geonames struggles to find on search; for a list of
  #   abbreviations, see:
  #   https://www.loc.gov/aba/publications/FreeSHM/H0810.pdf
  name = find('//mods:originInfo//mods:placeTerm[@type="text"]').first
  name.nil? ? nil : name.text
end
oclcnum() click to toggle source
# File lib/newspaper_works/ingest/lc_publication_info.rb, line 100
def oclcnum
  return if empty?
  v = find('//mods:mods/mods:identifier[@type="oclc"]').first
  v.nil? ? nil : oclc_prefixed(v.text)
end
preceded_by() click to toggle source
# File lib/newspaper_works/ingest/lc_publication_info.rb, line 106
def preceded_by
  related_by('preceding')
end
succeeded_by() click to toggle source
# File lib/newspaper_works/ingest/lc_publication_info.rb, line 110
def succeeded_by
  related_by('succeeding')
end
title() click to toggle source
# File lib/newspaper_works/ingest/lc_publication_info.rb, line 82
def title
  return if empty?
  NewspaperWorks::Ingest.normalize_title(@full_title.split(/ [\(]/)[0])
end
url() click to toggle source
# File lib/newspaper_works/ingest/lc_publication_info.rb, line 34
def url
  "#{BASE_URL}/#{@lccn}/mods"
end

Private Instance Methods

find(expr, context = nil) click to toggle source
# File lib/newspaper_works/ingest/lc_publication_info.rb, line 134
def find(expr, context = nil)
  context ||= @doc
  return if context.nil? && empty?
  context.xpath(
    expr,
    **XML_NS
  )
end
lccn_for(related_item) click to toggle source
# File lib/newspaper_works/ingest/lc_publication_info.rb, line 127
def lccn_for(related_item)
  identifiers = find('mods:identifier[@type="local"]', related_item)
  selected = identifiers.select { |v| v.text.start_with?('(DLC)') }
  return if selected.size.zero?
  selected.first.text.split(')')[-1].sub(' ', '')
end