class Nanoc::Checking::Checks::ExternalLinks

A validator that verifies that all external links point to a location that exists.

@api private

Public Instance Methods

excluded?(href) click to toggle source
# File lib/nanoc/checking/checks/external_links.rb, line 147
def excluded?(href)
  excludes = @config.fetch(:checks, {}).fetch(:external_links, {}).fetch(:exclude, [])
  excludes.any? { |pattern| Regexp.new(pattern).match(href) }
end
excluded_file?(file) click to toggle source
# File lib/nanoc/checking/checks/external_links.rb, line 152
def excluded_file?(file)
  excludes = @config.fetch(:checks, {}).fetch(:external_links, {}).fetch(:exclude_files, [])
  excludes.any? { |pattern| Regexp.new(pattern).match(file) }
end
extract_location(res, url) click to toggle source
# File lib/nanoc/checking/checks/external_links.rb, line 104
def extract_location(res, url)
  location = res['Location']

  case location
  when nil
    nil
  when /^https?:\/\//
    location
  else
    base_url = url.dup
    base_url.path = (/^\//.match?(location) ? '' : '/')
    base_url.query = nil
    base_url.fragment = nil
    base_url.to_s + location
  end
end
path_for_url(url) click to toggle source
# File lib/nanoc/checking/checks/external_links.rb, line 121
def path_for_url(url)
  path =
    if url.path.nil? || url.path.empty?
      '/'
    else
      url.path
    end

  if url.query
    path = path + '?' + url.query
  end

  path
end
request_url_once(url) click to toggle source
# File lib/nanoc/checking/checks/external_links.rb, line 136
def request_url_once(url)
  req = Net::HTTP::Get.new(path_for_url(url))
  req['User-Agent'] = "Mozilla/5.0 Nanoc/#{Nanoc::VERSION} (link rot checker)"
  http = Net::HTTP.new(url.host, url.port)
  if url.instance_of? URI::HTTPS
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_NONE
  end
  http.request(req)
end
run() click to toggle source
# File lib/nanoc/checking/checks/external_links.rb, line 12
def run
  # Find all broken external hrefs
  # TODO: de-duplicate this (duplicated in internal links check)
  filenames = output_html_filenames.reject { |f| excluded_file?(f) }
  hrefs_with_filenames = ::Nanoc::Extra::LinkCollector.new(filenames, :external).filenames_per_href
  results = select_invalid(hrefs_with_filenames.keys.shuffle)

  # Report them
  results.each do |res|
    filenames = hrefs_with_filenames[res.href]
    filenames.each do |filename|
      add_issue(
        "broken reference to #{res.href}: #{res.explanation}",
        subject: filename,
      )
    end
  end
end
select_invalid(hrefs) click to toggle source
# File lib/nanoc/checking/checks/external_links.rb, line 41
def select_invalid(hrefs)
  ::Parallel.map(hrefs, in_threads: 10) { |href| validate(href) }.compact
end
validate(href) click to toggle source
# File lib/nanoc/checking/checks/external_links.rb, line 45
def validate(href)
  # Parse
  url = nil
  begin
    url = URI.parse(href)
  rescue URI::Error
    return Result.new(href, 'invalid URI')
  end

  # Skip excluded URLs
  return nil if excluded?(href)

  # Skip non-HTTP URLs
  return nil if url.scheme !~ /^https?$/

  # Get status
  res = nil
  last_err = nil
  timeouts = [3, 5, 10, 30, 60]
  5.times do |i|
    begin
      Timeout.timeout(timeouts[i]) do
        res = request_url_once(url)
      end
    rescue => e
      last_err = e
      next
    end

    if /^3..$/.match?(res.code)
      if i == 4
        return Result.new(href, 'too many redirects')
      end

      location = extract_location(res, url)
      return Result.new(href, 'redirection without a target location') if location.nil?

      # ignore redirects back onto self (misused to set HTTP cookies)
      return nil if href == location

      if /^30[18]$/.match?(res.code)
        return Result.new(href, "link has moved permanently to '#{location}'")
      end

      url = URI.parse(location)
    elsif res.code == '200'
      return nil
    else
      return Result.new(href, res.code)
    end
  end

  if last_err
    Result.new(href, last_err.message)
  else
    raise Nanoc::Core::Errors::InternalInconsistency, 'last_err cannot be nil'
  end
end