class RelatonW3c::HitCollection

Page of hit collection.

Constants

DATADIR
DATAFILE
DOMAIN
TYPES

Public Class Methods

new(ref) click to toggle source

@param ref [String] reference to search

Calls superclass method
# File lib/relaton_w3c/hit_collection.rb, line 23
def initialize(ref)
  %r{
    ^(?:W3C\s)?
    (?<type>(?:CR|NOTE|PER|PR|REC|RET|WD|Candidate\sRecommendation|
      Group\sNote|Proposed\sEdited\sRecommendation|Proposed\sRecommendation|
      Recommendation|Retired|Working\sDraft))? # type
    \s?
    (?<title_date>.+) # title_date
  }x =~ ref
  super
  @array = from_yaml title_date, type
end

Private Instance Methods

data() click to toggle source

Fetches YAML data

@return [Hash]

# File lib/relaton_w3c/hit_collection.rb, line 129
def data
  FileUtils.mkdir_p DATADIR
  ctime = File.ctime DATAFILE if File.exist? DATAFILE
  fetch_data if !ctime || ctime.to_date < Date.today
  @data ||= YAML.safe_load File.read(DATAFILE, encoding: "UTF-8")
end
fetch_data() click to toggle source

fetch data form server and save it to file.

# File lib/relaton_w3c/hit_collection.rb, line 139
def fetch_data
  resp = Net::HTTP.get_response URI.parse("#{DOMAIN}/TR/")
  # return if there aren't any changes since last fetching
  return unless resp.code == "200"

  doc = Nokogiri::HTML resp.body
  @data = doc.xpath("//ul[@id='container']/li").map do |h_el|
    link = h_el.at("h2/a")
    pubdetails = h_el.at("p[@class='pubdetails']")
    fetch_hit h_el, link, pubdetails
  end
  File.write DATAFILE, @data.to_yaml, encoding: "UTF-8"
end
fetch_hit(h_el, link, pubdetails) click to toggle source

@param h_el [Nokogiri::XML::Element] @param link [Nokogiri::XML::Element] @param pubdetails [Nokogiri::XML::Element]

# File lib/relaton_w3c/hit_collection.rb, line 156
def fetch_hit(h_el, link, pubdetails) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
  datepub = pubdetails.at("text()").text.match(/\d{4}-\d{2}-\d{2}/).to_s
  editor = h_el.xpath("ul[@class='editorlist']/li").map { |e| e.text.strip }
  keyword = h_el.xpath("ul[@class='taglist']/li").map { |e| e.text.strip }
  {
    "title" => link.text.gsub("\u00a0", " "),
    "link" => link[:href],
    "type" => h_el.at("div").text.upcase,
    "workgroup" => h_el.xpath("p[@class='deliverer']").map(&:text),
    "datepub" => datepub,
    "history" => pubdetails.at("a[text()='History']")[:href],
    "editor" => editor,
    "keyword" => keyword,
  }
end
filter_history_by_date(history, history_doc, type, date) click to toggle source

@param history [Nokogiri::XML::NodeSet] @param history_doc [Nokogiri::HTML::NodeSet] @param type [String] @param date [String] @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]

# File lib/relaton_w3c/hit_collection.rb, line 94
def filter_history_by_date(history, history_doc, type, date)
  if type
    history.select do |h|
      h.at("td[@class='table_datecol']").text == date
    end
  else
    history_doc.xpath(
      "//table//td[@class='table_datecol'][.='#{date}']/..",
    )
  end
end
from_yaml(title_date, type) click to toggle source

Fetch data form yaml

@param title_date [String] @param type [String] @return [Array<Hash>]

# File lib/relaton_w3c/hit_collection.rb, line 44
def from_yaml(title_date, type) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
  /(?<title>.+)\s(?<date>\d{4}-\d{2}-\d{2})$/ =~ title_date
  title ||= title_date
  result = data.select do |hit|
    (hit["title"].casecmp?(title) ||
      hit["link"].split("/").last.match?(/-#{title}-/)) &&
      type_date_filter(hit, type, date)
  end
  if result.empty?
    result = data.select { |h| h["link"].split("/").last.match?(/#{title}/) }
  end
  result.map { |h| Hit.new(h, self) }
end
get_history(hit, type, date) click to toggle source

@param hit [Hash] @param type [String] @param date [String] @return [Array<Nokogiri::XML::Element>, Nokogiri::HTML::NodeSet]

# File lib/relaton_w3c/hit_collection.rb, line 78
def get_history(hit, type, date)
  resp = Net::HTTP.get URI.parse(HitCollection::DOMAIN + hit["history"])
  history_doc = Nokogiri::HTML resp
  history = history_doc.xpath(
    "//table//a[contains(.,'#{long_type(type)}')]/../..",
  )
  return filter_history_by_date(history, history_doc, type, date) if date

  history
end
long_type(type) click to toggle source

Convert shot type name to long

@param [String] @return [String]

# File lib/relaton_w3c/hit_collection.rb, line 121
def long_type(type)
  TYPES[type] || type
end
short_type(type) click to toggle source

Convetr long type name to short

@param type [String] @return [String]

# File lib/relaton_w3c/hit_collection.rb, line 111
def short_type(type)
  tp = TYPES.select { |_, v| v == type }.keys
  tp.first || type
end
type_date_filter(hit, type, date) click to toggle source

@param hit [Hash] @param type [String] @param date [String] @return [TrueClass, FalseClass]

# File lib/relaton_w3c/hit_collection.rb, line 62
def type_date_filter(hit, type, date) # rubocop:disable Metrics/AbcSize
  if type && hit["type"] != short_type(type) || date && hit["date"] != date
    history = get_history hit, type, date
    return false unless history.any?

    hit["type"] = short_type type
    hit["datepub"] = history.first.at("td").text
    hit["link"] = history.first.at("a")[:href]
  end
  true
end