class Krikri::Harvesters::PrimoHarvester

A harvester implementation for Primo

Accepts options passed as `:primo => opts`

Options allowed are:

- bulk_size: The number of records to fetch from Primo per request
  (default: 500)

Constants

PRIMO_NS
PrimoHarvestError
SEAR_NS

Public Class Methods

new(opts = {}) click to toggle source
Calls superclass method Krikri::Harvester::new
# File lib/krikri/harvesters/primo_harvester.rb, line 20
def initialize(opts = {})
  @opts = opts.fetch(:primo, {})
  super

  @opts[:bulk_size] ||= 500

  @http_conn = Faraday.new do |conn|
    conn.request :retry, :max => 3
    conn.response :follow_redirects, :limit => 5
    conn.response :logger, Rails.logger
    conn.adapter :net_http
  end
end

Public Instance Methods

count() click to toggle source

@return [Integer] the number of records available for harvesting.

# File lib/krikri/harvesters/primo_harvester.rb, line 36
def count
  response = @http_conn.get(uri, :indx => '1', :bulkSize => '1')

  unless response.status == 200
    raise PrimoHarvestError, "Couldn't get record count"
  end

  total_hits = Nokogiri::XML(response.body)
               .xpath('//sear:DOCSET')
               .first
               .attr('TOTALHITS')

  Integer(total_hits)
end
get_record(identifier) click to toggle source

@param identifier [#to_s] the identifier of the record to get @return [#to_s] the record

# File lib/krikri/harvesters/primo_harvester.rb, line 72
def get_record(identifier)
  response = @http_conn.get(uri,
                            :indx => 1,
                            :bulkSize => 1,
                            :query => "rid,exact,#{identifier}")

  unless response.status == 200
    raise PrimoHarvestError, "Couldn't get record: #{identifier}"
  end

  enumerate_records(response.body).first
end
records() click to toggle source

@return [Enumerator::Lazy] an enumerator of the records targeted by this

harvester.
# File lib/krikri/harvesters/primo_harvester.rb, line 54
def records
  bulk_size = @opts.fetch(:bulk_size)

  (1...count).step(bulk_size).lazy.flat_map do |offset|
    response = @http_conn.get(uri, :indx => offset, :bulkSize => bulk_size)

    unless response.status == 200
      raise PrimoHarvestError, "Record fetch from #{offset} to " \
                               "#{offset + bulk_size} failed"
    end

    enumerate_records(response.body)
  end
end

Private Instance Methods

enumerate_records(xml) click to toggle source

Extract a page's worth of records from a Primo XML search result. @param xml [String] an XML document returned from a Primo search @return [Array] an array of @record_class instances

# File lib/krikri/harvesters/primo_harvester.rb, line 91
def enumerate_records(xml)
  doc = Nokogiri::XML(xml)
  doc.root.add_namespace_definition('nmbib', PRIMO_NS)
  doc.xpath('//sear:DOC').lazy.map do |record|
    identifier = record.xpath('./nmbib:PrimoNMBib/nmbib:record/' \
                              'nmbib:control/nmbib:recordid')
                 .first.text

    record = record.dup
    record.add_namespace_definition('sear', SEAR_NS)

    @record_class.build(mint_id(identifier), record.to_xml)
  end
end