module RelatonBsi::Scrapper
Constants
- Client
- HTTP
- Product
- Query
- Schema
Public Class Methods
Parse page. @param hit [RelatonBsi::Hit] @return [Hash]
# File lib/relaton_bsi/scrapper.rb, line 71 def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength # doc = hit.hit_collection.agent.get hit.hit[:url] result = Client.query(Query::GetProducts, variables: { h0: hit.hit[:url] }) data = result.data.product_by_handle.to_h BsiBibliographicItem.new( fetched: Date.today.to_s, type: "standard", docid: fetch_docid(hit.hit[:code], data), language: ["en"], script: ["Latn"], title: fetch_titles(hit.hit[:title]), doctype: hit.hit[:doctype], docstatus: fetch_status(hit.hit[:status]), ics: fetch_ics(hit.hit[:ics]), date: fetch_dates(hit), contributor: fetch_contributors(hit), editorialgroup: fetch_editorialgroup(data), structuredidentifier: fetch_structuredid(hit), abstract: fetch_abstract(data), copyright: fetch_copyright(hit), link: fetch_link(hit.hit[:url]), # relation: fetch_relations(doc), place: ["London"], ) end
Private Class Methods
Fetch abstracts. @param data [Hash] @return [Array<Hash>]
# File lib/relaton_bsi/scrapper.rb, line 111 def fetch_abstract(data) return [] unless data["description"] [{ content: data["description"], language: "en", script: "Latn" }] end
Fetch contributors @param hit [RelatonBsi::Hit] @return [Array<Hash>]
# File lib/relaton_bsi/scrapper.rb, line 187 def fetch_contributors(hit) contrib = { role: [type: "publisher"] } contrib[:entity] = owner_entity hit [contrib] end
Fetch copyright. @param hit [RelatonBsi::Hit] @return [Array<Hash>]
# File lib/relaton_bsi/scrapper.rb, line 204 def fetch_copyright(hit) owner = owner_entity hit from = Date.parse(hit.hit[:date]).year.to_s [{ owner: [owner], from: from }] end
Fetch dates @param hit [RelatonBsi:Hit] @return [Array<Hash>]
# File lib/relaton_bsi/scrapper.rb, line 180 def fetch_dates(hit) [{ type: "published", on: hit.hit[:date] }] end
Fetch docid. @param docid [String] @param data [Hash] @return [Array<RelatonBib::DocumentIdentifier>]
# File lib/relaton_bsi/scrapper.rb, line 121 def fetch_docid(docid, data) ids = [{ type: "BSI", id: docid }] if data.any? isbn = data["variants"]["edges"][0]["node"]["isbn"]["value"] ids << { type: "ISBN", id: isbn } end ids.map do |did| RelatonBib::DocumentIdentifier.new(type: did[:type], id: did[:id]) end end
Fetch workgroup. @param data [Hash] @return [RelatonIsoBib::EditorialGroup]
# File lib/relaton_bsi/scrapper.rb, line 144 def fetch_editorialgroup(data) wg = data["committee"]&.fetch("value") return unless wg tc = RelatonBib::WorkGroup.new name: wg RelatonIsoBib::EditorialGroup.new technical_committee: [tc] end
@param ics [Array<String>] @return [Array<RelatonIsobib::Ics>]
# File lib/relaton_bsi/scrapper.rb, line 101 def fetch_ics(ics) ics.map do |s| code, = s.split RelatonIsoBib::Ics.new(code) end end
Fetch links. @param path [String] @return [Array<Hash>]
# File lib/relaton_bsi/scrapper.rb, line 196 def fetch_link(path) url = "#{HitCollection::DOMAIN}/products/#{path}" [{ type: "src", content: url }] end
Fetch status. @param status [String] @return [RelatonBib::DocumentStatus, nil]
# File lib/relaton_bsi/scrapper.rb, line 135 def fetch_status(status) return unless status RelatonBib::DocumentStatus.new(stage: status) end
@param hit [RelatonBsi::Hit] @return [RelatonIsoBib::StructuredIdentifier]
# File lib/relaton_bsi/scrapper.rb, line 154 def fetch_structuredid(hit) RelatonIsoBib::StructuredIdentifier.new project_number: hit.hit[:code] end
Fetch titles. @param title [String] @return [RelatonBib::TypedTitleStringCollection]
# File lib/relaton_bsi/scrapper.rb, line 173 def fetch_titles(title) RelatonBib::TypedTitleString.from_string title, "en", "Latn" end
@param hit [RelatonBsi::Hit] @return [Hash]
# File lib/relaton_bsi/scrapper.rb, line 212 def owner_entity(hit) case hit.hit[:publisher] when "BSI" { abbreviation: hit.hit[:publisher], name: "British Standards Institution", url: "https://www.bsigroup.com/" } else { name: hit.hit[:publisher] } end end