class RelatonUn::HitCollection

Page of hit collection.

Constants

AGENT
BOUNDARY
DOMAIN

Public Class Methods

new(text) click to toggle source

@param text [String] reference to search

Calls superclass method
# File lib/relaton_un/hit_collection.rb, line 15
def initialize(text)
  super
  @uri = URI.parse DOMAIN
  @jar = HTTP::CookieJar.new
  @http = Net::HTTP.new @uri.host, @uri.port # , "localhost", "8000"
  @http.use_ssl = true
  # @http.verify_mode = OpenSSL::SSL::VERIFY_PEER
  # @http.cert_store = OpenSSL::X509::Store.new
  # ca_file = "/Users/andrej/Library/Preferences/httptoolkit/ca.pem"
  # @http.cert_store.set_default_paths
  # @http.cert_store.add_file ca_file
  @http.read_timeout = 120
  if (form_resp = get_page)
    doc = Nokogiri::HTML page_resp(form_resp, text).body
    @array = doc.css("div.viewHover").map { |item| hit item }
  end
end

Private Instance Methods

agenda(item) click to toggle source

@param item [Nokogiri::XML::Element] @return [String]

# File lib/relaton_un/hit_collection.rb, line 156
def agenda(item)
  item.at("//label[.='Agenda Item(s):']/following-sibling::span")&.text
end
date_pub(item) click to toggle source

@param item [Nokogiri::XML::Element] @return [String]

# File lib/relaton_un/hit_collection.rb, line 127
def date_pub(item)
  item.at("//label[.='Publication Date: ']/following-sibling::span")&.text
end
date_rel(item) click to toggle source

@param item [Nokogiri::XML::Element] @return [String]

# File lib/relaton_un/hit_collection.rb, line 133
def date_rel(item)
  item.at("./following-sibling::span[contains(@id, 'cfRelDateE')]")&.text
end
distribution(item) click to toggle source

@param item [Nokogiri::XML::Element] @return [String]

# File lib/relaton_un/hit_collection.rb, line 162
def distribution(item)
  item.at("//label[.='Distribution:']/following-sibling::span")&.text
end
form_data(form, text) click to toggle source

@param form [Nokogiri::HTML::Document] @param text [String] @return [Array<String>]

# File lib/relaton_un/hit_collection.rb, line 56
def form_data(form, text) # rubocop:disable Metrics/CyclomaticComplexity
  fd = form.xpath(
    "//input[@type!='radio']|"\
    "//input[@type='radio'][@checked]|"\
    "//select[@name!='view:_id1:_id2:cbLang']|"\
    "//textarea"
  ).reduce([]) do |m, i|
    v = case i[:name]
        when "view:_id1:_id2:txtSymbol" then text
        when "view:_id1:_id2:rgTrunc" then "R"
        when "view:_id1:_id2:cbType" then "FP"
        when "view:_id1:_id2:cbSort" then "R"
        when "$$xspsubmitid" then "view:_id1:_id2:_id130"
        when "$$xspsubmitscroll" then "0|102"
        when "view:_id1" then "view:_id1"
        else i[:value]
        end
    m << %{--#{BOUNDARY}}
    m << %{Content-Disposition: form-data; name="#{i[:name]}"\r\n\r\n#{v}}
  end
  fd << %{--#{BOUNDARY}--\r\n}
end
get_page(location = "/", deep = 0) click to toggle source

@param location [String] @param deeep [Integer] @return [Strinf, NilClass]

# File lib/relaton_un/hit_collection.rb, line 38
def get_page(location = "/", deep = 0)
  return if deep > 3

  req = Net::HTTP::Get.new location
  set_headers req
  resp = @http.request req
  resp.get_fields("set-cookie")&.each { |v| @jar.parse v, @uri }
  return resp if resp.code == "200"

  request_uri = URI.parse(resp["location"]).request_uri
  get_page request_uri, deep + 1
end
hit(item) click to toggle source

@param item [Nokogiri::XML::Element] @return [RelatonUn::Hit]

# File lib/relaton_un/hit_collection.rb, line 95
def hit(item)
  Hit.new(hit_data(item), self)
end
hit_data(item) click to toggle source

@param item [Nokogiri::XML::Element] @return [Hash]

# File lib/relaton_un/hit_collection.rb, line 101
def hit_data(item) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
  en = item.at("//span[.='ENGLISH']/../..")
  {
    ref: item.at("div/div/a")&.text&.sub("\u00A0", ""),
    symbol: symbol(item),
    title: item.at("div/div/span")&.text,
    keyword: item.at("div[3]/div[5]/span")&.text,
    date_pub: date_pub(item),
    date_rel: date_rel(en),
    link: link(en),
    session: session(item),
    agenda: agenda(item),
    distribution: distribution(item),
    job_number: job_number(item),
  }
end
job_number(item) click to toggle source
# File lib/relaton_un/hit_collection.rb, line 166
def job_number(item)
  item.at("//span[contains(@id, 'cfJobNumE')]")&.text
end
page_resp(form_resp, text) click to toggle source

@param form_resp [Net::HTTPOK] @param text [String] @return [Net::HTTPOK]

# File lib/relaton_un/hit_collection.rb, line 83
def page_resp(form_resp, text)
  form = Nokogiri::HTML form_resp.body
  req = Net::HTTP::Post.new form.at("//form")[:action]
  set_headers req
  req["Content-Type"] = "multipart/form-data; boundary=#{BOUNDARY}"
  req.body = form_data(form, text).join("\r\n")
  resp = @http.request req
  get_page URI.parse(resp["location"]).request_uri
end
session(item) click to toggle source

@param item [Nokogiri::XML::Element] @return [String]

# File lib/relaton_un/hit_collection.rb, line 150
def session(item)
  item.at("//label[.='Session / Year:']/following-sibling::span")&.text
end
set_headers(req) click to toggle source

@param req [Net::HTTP::Get, Net::HTTP::Post]

# File lib/relaton_un/hit_collection.rb, line 173
def set_headers(req) # rubocop:disable Metrics/AbcSize
  set_cookie req
  req["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,"\
  "image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;"\
  "v=b3;q=0.9"
  req["Accept-Encoding"] = "gzip, deflate, br"
  req["Accept-Language"] = "en-US;q=0.8,en;q=0.7"
  req["Cache-Control"] = "max-age=0"
  req["Connection"] = "keep-alive"
  req["Origin"] = "https://documents.un.org"
  req["Referer"] = "https://documents.un.org/prod/ods.nsf/home.xsp"
  req["Sec-Fetch-Dest"] = "document"
  req["Sec-Fetch-Mode"] = "navigate"
  req["Sec-Fetch-Site"] = "same-origin"
  req["Sec-Fetch-User"] = "?1"
  req["Upgrade-Insecure-Requests"] = "1"
  req["User-Agent"] = AGENT
end
symbol(item) click to toggle source

@param item [Nokogiri::XML::Element] @return [String]

# File lib/relaton_un/hit_collection.rb, line 120
def symbol(item)
  item.xpath("div/div[not(contains(@class, 'hidden'))]/"\
    "label[contains(.,'Symbol')]/following-sibling::span[1]").map &:text
end