class Jets::Html::PermitScrubber

Jets::Html::PermitScrubber

Jets::Html::PermitScrubber allows you to permit only your own tags and/or attributes.

Jets::Html::PermitScrubber can be subclassed to determine:

Subclasses don't need to worry if tags or attributes are set or not. If tags or attributes are not set, Loofah's behavior will be used. If you override allowed_node? and no tags are set, it will not be called. Instead Loofahs behavior will be used. Likewise for scrub_attribute? and attributes respectively.

Text and CDATA nodes are skipped by default. Unallowed elements will be stripped, i.e. element is removed but its subtree kept. Supplied tags and attributes should be Enumerables.

tags= If set, elements excluded will be stripped. If not, elements are stripped based on Loofahs HTML5::Scrub.allowed_element?.

attributes= If set, attributes excluded will be removed. If not, attributes are removed based on Loofahs HTML5::Scrub.scrub_attributes.

class CommentScrubber < Html::PermitScrubber

def initialize
  super
  self.tags = %w(form script comment blockquote)
end

def skip_node?(node)
  node.text?
end

def scrub_attribute?(name)
  name == "style"
end

end

See the documentation for Nokogiri::XML::Node to understand what's possible with nodes: nokogiri.org/Nokogiri/XML/Node.html

Attributes

attributes[R]
tags[R]

Public Class Methods

new() click to toggle source
# File lib/jets/html/scrubbers.rb, line 50
def initialize
  @direction = :bottom_up
  @tags, @attributes = nil, nil
end

Public Instance Methods

attributes=(attributes) click to toggle source
# File lib/jets/html/scrubbers.rb, line 59
def attributes=(attributes)
  @attributes = validate!(attributes, :attributes)
end
scrub(node) click to toggle source
# File lib/jets/html/scrubbers.rb, line 63
def scrub(node)
  if node.cdata?
    text = node.document.create_text_node node.text
    node.replace text
    return CONTINUE
  end
  return CONTINUE if skip_node?(node)

  unless keep_node?(node)
    return STOP if scrub_node(node) == STOP
  end

  scrub_attributes(node)
end
tags=(tags) click to toggle source
# File lib/jets/html/scrubbers.rb, line 55
def tags=(tags)
  @tags = validate!(tags, :tags)
end

Protected Instance Methods

allowed_node?(node) click to toggle source
# File lib/jets/html/scrubbers.rb, line 80
def allowed_node?(node)
  @tags.include?(node.name)
end
keep_node?(node) click to toggle source
# File lib/jets/html/scrubbers.rb, line 92
def keep_node?(node)
  if @tags
    allowed_node?(node)
  else
    Loofah::HTML5::Scrub.allowed_element?(node.name)
  end
end
scrub_attribute(node, attr_node) click to toggle source
# File lib/jets/html/scrubbers.rb, line 134
def scrub_attribute(node, attr_node)
  attr_name = if attr_node.namespace
                "#{attr_node.namespace.prefix}:#{attr_node.node_name}"
              else
                attr_node.node_name
              end

  if Loofah::HTML5::WhiteList::ATTR_VAL_IS_URI.include?(attr_name)
    # this block lifted nearly verbatim from HTML5 sanitization
    val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(Loofah::HTML5::Scrub::CONTROL_CHARACTERS,'').downcase
    if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! Loofah::HTML5::WhiteList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(Loofah::HTML5::WhiteList::PROTOCOL_SEPARATOR)[0])
      attr_node.remove
    end
  end
  if Loofah::HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
    attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
  end
  if Loofah::HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
    attr_node.remove
  end

  node.remove_attribute(attr_node.name) if attr_name == 'src' && attr_node.value !~ /[^[:space:]]/

  Loofah::HTML5::Scrub.force_correct_attribute_escaping! node
end
scrub_attribute?(name) click to toggle source
# File lib/jets/html/scrubbers.rb, line 88
def scrub_attribute?(name)
  !@attributes.include?(name)
end
scrub_attributes(node) click to toggle source
# File lib/jets/html/scrubbers.rb, line 105
def scrub_attributes(node)
  if @attributes
    node.attribute_nodes.each do |attr|
      attr.remove if scrub_attribute?(attr.name)
      scrub_attribute(node, attr)
    end

    scrub_css_attribute(node)
  else
    Loofah::HTML5::Scrub.scrub_attributes(node)
  end
end
scrub_css_attribute(node) click to toggle source
# File lib/jets/html/scrubbers.rb, line 118
def scrub_css_attribute(node)
  if Loofah::HTML5::Scrub.respond_to?(:scrub_css_attribute)
    Loofah::HTML5::Scrub.scrub_css_attribute(node)
  else
    style = node.attributes['style']
    style.value = Loofah::HTML5::Scrub.scrub_css(style.value) if style
  end
end
scrub_node(node) click to toggle source
# File lib/jets/html/scrubbers.rb, line 100
def scrub_node(node)
  node.before(node.children) # strip
  node.remove
end
skip_node?(node) click to toggle source
# File lib/jets/html/scrubbers.rb, line 84
def skip_node?(node)
  node.text?
end
validate!(var, name) click to toggle source
# File lib/jets/html/scrubbers.rb, line 127
def validate!(var, name)
  if var && !var.is_a?(Enumerable)
    raise ArgumentError, "You should pass :#{name} as an Enumerable"
  end
  var
end