class LinkCheck

Constants

SRI_REL_TYPES

Whitelist for affected elements from Subresource Integrity specification w3c.github.io/webappsec-subresource-integrity/#link-element-for-stylesheets

Public Instance Methods

check_schemes(link, line, content) click to toggle source
# File lib/html-proofer/check/links.rb, line 83
def check_schemes(link, line, content)
  case link.scheme
  when 'mailto'
    handle_mailto(link, line, content)
  when 'tel'
    handle_tel(link, line, content)
  when 'http'
    return unless @options[:enforce_https]

    add_issue("#{link.href} is not an HTTPS link", line: line, content: content)
  end
end
check_sri(line, content) click to toggle source
# File lib/html-proofer/check/links.rb, line 153
def check_sri(line, content)
  return unless SRI_REL_TYPES.include?(@link.rel)

  if !defined?(@link.integrity) && !defined?(@link.crossorigin)
    add_issue("SRI and CORS not provided in: #{@link.src}", line: line, content: content)
  elsif !defined?(@link.integrity)
    add_issue("Integrity is missing in: #{@link.src}", line: line, content: content)
  elsif !defined?(@link.crossorigin)
    add_issue("CORS not provided for external resource in: #{@link.src}", line: line, content: content)
  end
end
find_fragments(html, fragment_ids) click to toggle source
# File lib/html-proofer/check/links.rb, line 136
def find_fragments(html, fragment_ids)
  xpaths = fragment_ids.flat_map do |frag_id|
    escaped_frag_id = "'#{frag_id.split("'").join("', \"'\", '")}', ''"
    [
      "//*[case_sensitive_equals(@id, concat(#{escaped_frag_id}))]",
      "//*[case_sensitive_equals(@name, concat(#{escaped_frag_id}))]"
    ]
  end
  xpaths << XpathFunctions.new

  html.xpath(*xpaths)
end
handle_hash(link, path, line, content) click to toggle source
# File lib/html-proofer/check/links.rb, line 108
def handle_hash(link, path, line, content)
  if link.internal? && !hash_exists?(link.html, link.hash) # rubocop:disable Style/GuardClause
    return add_issue("linking to internal hash ##{link.hash} that does not exist", path: path, line: line, content: content)
  elsif link.external?
    return external_link_check(link, line, content)
  end

  true
end
handle_mailto(link, line, content) click to toggle source
# File lib/html-proofer/check/links.rb, line 96
def handle_mailto(link, line, content)
  if link.path.empty?
    add_issue("#{link.href} contains no email address", line: line, content: content)
  elsif !link.path.include?('@')
    add_issue("#{link.href} contains an invalid email address", line: line, content: content)
  end
end
handle_tel(link, line, content) click to toggle source
# File lib/html-proofer/check/links.rb, line 104
def handle_tel(link, line, content)
  add_issue("#{link.href} contains no phone number", line: line, content: content) if link.path.empty?
end
hash_exists?(html, href_hash) click to toggle source
# File lib/html-proofer/check/links.rb, line 129
def hash_exists?(html, href_hash)
  decoded_href_hash = Addressable::URI.unescape(href_hash)
  fragment_ids = [href_hash, decoded_href_hash]
  # https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
  fragment_ids.include?('top') || !find_fragments(html, fragment_ids).empty?
end
missing_href?() click to toggle source
# File lib/html-proofer/check/links.rb, line 6
def missing_href?
  return blank?(@link.src) if @node.name == 'source'

  blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
end
placeholder?() click to toggle source
# File lib/html-proofer/check/links.rb, line 12
def placeholder?
  (!blank?(@link.id) || !blank?(@link.name)) && @link.href.nil?
end
run() click to toggle source
# File lib/html-proofer/check/links.rb, line 16
def run
  @html.css('a, link, source').each do |node|
    @link = create_element(node)
    line = node.line
    content = node.to_s

    next if @link.ignore?

    next if placeholder?
    next if @link.allow_hash_href? && @link.href == '#'

    # is it even a valid URL?
    unless @link.valid?
      add_issue("#{@link.href} is an invalid URL", line: line, content: content)
      next
    end

    check_schemes(@link, line, content)

    # is there even an href?
    if missing_href?
      next if @link.allow_missing_href?
      # HTML5 allows dropping the href: http://git.io/vBX0z
      next if @html.internal_subset.nil? || (@html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?)

      add_issue('anchor has no href attribute', line: line, content: content)
      next
    end

    # intentionally here because we still want valid? & missing_href? to execute
    next if @link.non_http_remote?

    if !@link.href&.start_with?('#') && !@link.internal? && @link.remote?
      check_sri(line, content) if @link.check_sri? && node.name == 'link'
      # we need to skip these for now; although the domain main be valid,
      # curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
      next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'

      unless @link.path?
        add_issue("#{@link.href} is an invalid URL", line: line, content: content)
        next
      end

      add_to_external_urls(@link.href || @link.src)
      next
    elsif @link.internal?
      add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
      add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content) if !@link.exists? && !@link.hash
    end
  end

  external_urls
end