module RelatonBsi::Scrapper

Scrapper.

Constants

Client
HTTP
Product
Query
Schema

Public Class Methods

parse_page(hit) click to toggle source

Parse page. @param hit [RelatonBsi::Hit] @return [Hash]

# File lib/relaton_bsi/scrapper.rb, line 71
def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
  # doc = hit.hit_collection.agent.get hit.hit[:url]
  result = Client.query(Query::GetProducts, variables: { h0: hit.hit[:url] })
  data = result.data.product_by_handle.to_h
  BsiBibliographicItem.new(
    fetched: Date.today.to_s,
    type: "standard",
    docid: fetch_docid(hit.hit[:code], data),
    language: ["en"],
    script: ["Latn"],
    title: fetch_titles(hit.hit[:title]),
    doctype: hit.hit[:doctype],
    docstatus: fetch_status(hit.hit[:status]),
    ics: fetch_ics(hit.hit[:ics]),
    date: fetch_dates(hit),
    contributor: fetch_contributors(hit),
    editorialgroup: fetch_editorialgroup(data),
    structuredidentifier: fetch_structuredid(hit),
    abstract: fetch_abstract(data),
    copyright: fetch_copyright(hit),
    link: fetch_link(hit.hit[:url]),
    # relation: fetch_relations(doc),
    place: ["London"],
  )
end

Private Class Methods

fetch_abstract(data) click to toggle source

Fetch abstracts. @param data [Hash] @return [Array<Hash>]

# File lib/relaton_bsi/scrapper.rb, line 111
def fetch_abstract(data)
  return [] unless data["description"]

  [{ content: data["description"], language: "en", script: "Latn" }]
end
fetch_contributors(hit) click to toggle source

Fetch contributors @param hit [RelatonBsi::Hit] @return [Array<Hash>]

# File lib/relaton_bsi/scrapper.rb, line 187
def fetch_contributors(hit)
  contrib = { role: [type: "publisher"] }
  contrib[:entity] = owner_entity hit
  [contrib]
end
fetch_dates(hit) click to toggle source

Fetch dates @param hit [RelatonBsi:Hit] @return [Array<Hash>]

# File lib/relaton_bsi/scrapper.rb, line 180
def fetch_dates(hit)
  [{ type: "published", on: hit.hit[:date] }]
end
fetch_docid(docid, data) click to toggle source

Fetch docid. @param docid [String] @param data [Hash] @return [Array<RelatonBib::DocumentIdentifier>]

# File lib/relaton_bsi/scrapper.rb, line 121
def fetch_docid(docid, data)
  ids = [{ type: "BSI", id: docid }]
  if data.any?
    isbn = data["variants"]["edges"][0]["node"]["isbn"]["value"]
    ids << { type: "ISBN", id: isbn }
  end
  ids.map do |did|
    RelatonBib::DocumentIdentifier.new(type: did[:type], id: did[:id])
  end
end
fetch_editorialgroup(data) click to toggle source

Fetch workgroup. @param data [Hash] @return [RelatonIsoBib::EditorialGroup]

# File lib/relaton_bsi/scrapper.rb, line 144
def fetch_editorialgroup(data)
  wg = data["committee"]&.fetch("value")
  return unless wg

  tc = RelatonBib::WorkGroup.new name: wg
  RelatonIsoBib::EditorialGroup.new technical_committee: [tc]
end
fetch_ics(ics) click to toggle source

@param ics [Array<String>] @return [Array<RelatonIsobib::Ics>]

# File lib/relaton_bsi/scrapper.rb, line 101
def fetch_ics(ics)
  ics.map do |s|
    code, = s.split
    RelatonIsoBib::Ics.new(code)
  end
end
fetch_status(status) click to toggle source

Fetch status. @param status [String] @return [RelatonBib::DocumentStatus, nil]

# File lib/relaton_bsi/scrapper.rb, line 135
def fetch_status(status)
  return unless status

  RelatonBib::DocumentStatus.new(stage: status)
end
fetch_structuredid(hit) click to toggle source

@param hit [RelatonBsi::Hit] @return [RelatonIsoBib::StructuredIdentifier]

# File lib/relaton_bsi/scrapper.rb, line 154
def fetch_structuredid(hit)
  RelatonIsoBib::StructuredIdentifier.new project_number: hit.hit[:code]
end
fetch_titles(title) click to toggle source

Fetch titles. @param title [String] @return [RelatonBib::TypedTitleStringCollection]

# File lib/relaton_bsi/scrapper.rb, line 173
def fetch_titles(title)
  RelatonBib::TypedTitleString.from_string title, "en", "Latn"
end
owner_entity(hit) click to toggle source

@param hit [RelatonBsi::Hit] @return [Hash]

# File lib/relaton_bsi/scrapper.rb, line 212
def owner_entity(hit)
  case hit.hit[:publisher]
  when "BSI"
    { abbreviation: hit.hit[:publisher], name: "British Standards Institution", url: "https://www.bsigroup.com/" }
  else
    { name: hit.hit[:publisher] }
  end
end