class Hesburgh::Lib::HtmlScrubber::AllowedTagsScrubber

Responsible for stripping and general sanitization of HTML documents

Constants

FALLBACK

Attributes

attributes[R]
direction[RW]
tags[R]

Public Class Methods

new(tags: FALLBACK, attributes: FALLBACK, direction: :bottom_up) click to toggle source

@param tags [Symbol, Array<String>] What are the tags we are we going to keep. Otherwise the tag (but not content) is stripped. @param attributes [Symbol, Array<String>] What are the attributes we are we going to keep? Otherwise the attribute and its value

are dropped.

@param direction [Symbol] How are we processing the nodes; This is an assumption based on the Loofah::Scrubber

# File lib/hesburgh/lib/html_scrubber.rb, line 43
def initialize(tags: FALLBACK, attributes: FALLBACK, direction: :bottom_up)
  self.direction = direction
  self.tags = tags
  self.attributes = attributes
end

Public Instance Methods

call(input)
Alias for: sanitize
sanitize(input) click to toggle source

A convenience method for sanitiziation

# File lib/hesburgh/lib/html_scrubber.rb, line 50
def sanitize(input)
  return '' if input.to_s.strip == ''
  return input unless input.is_a?(String)
  Loofah.fragment(input).scrub!(self).to_s.strip
end
Also aliased as: call
scrub(node) click to toggle source
# File lib/hesburgh/lib/html_scrubber.rb, line 57
def scrub(node)
  return node.remove if script_node?(node)
  if node_allowed?(node)
    scrub_node_attributes(node)
    return CONTINUE
  else
    node.before node.children
    node.remove
  end
end

Private Instance Methods

allowed_not_element_node_types() click to toggle source
# File lib/hesburgh/lib/html_scrubber.rb, line 97
def allowed_not_element_node_types
  [Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE]
end
attributes=(input) click to toggle source
# File lib/hesburgh/lib/html_scrubber.rb, line 77
def attributes=(input)
  @attributes = extract_with_fallback_consideration(input)
end
extract_with_fallback_consideration(input) click to toggle source
# File lib/hesburgh/lib/html_scrubber.rb, line 81
def extract_with_fallback_consideration(input)
  return FALLBACK if input == FALLBACK
  Array.wrap(input)
end
fallback_allowed_element_detection(node) click to toggle source
# File lib/hesburgh/lib/html_scrubber.rb, line 106
def fallback_allowed_element_detection(node)
  Loofah::HTML5::Scrub.allowed_element?(node.name)
end
fallback_scrub_node_attributes(node) click to toggle source
# File lib/hesburgh/lib/html_scrubber.rb, line 101
def fallback_scrub_node_attributes(node)
  Loofah::HTML5::Scrub.scrub_attributes(node)
  true
end
node_allowed?(node) click to toggle source
# File lib/hesburgh/lib/html_scrubber.rb, line 110
def node_allowed?(node)
  return fallback_allowed_element_detection(node) if tags == FALLBACK
  return true if allowed_not_element_node_types.include?(node.type)
  return false unless node.type == Nokogiri::XML::Node::ELEMENT_NODE
  tags.include?(node.name)
end
script_node?(node) click to toggle source
# File lib/hesburgh/lib/html_scrubber.rb, line 86
def script_node?(node)
  node.name == 'script'
end
scrub_node_attributes(node) click to toggle source
# File lib/hesburgh/lib/html_scrubber.rb, line 90
def scrub_node_attributes(node)
  return fallback_scrub_node_attributes(node) if attributes == FALLBACK
  node.attribute_nodes.each do |attr_node|
    attr_node.remove unless attributes.include?(attr_node.name)
  end
end
tags=(input) click to toggle source
# File lib/hesburgh/lib/html_scrubber.rb, line 73
def tags=(input)
  @tags = extract_with_fallback_consideration(input)
end