class StringDoc

String-based XML document optimized for fast manipulation and rendering.

In Pakyow, we rarely care about every node in a document. Instead, only significant nodes and immediate children are available for manipulation. StringDoc provides “just enough” for our purposes. A StringDoc is represented as a multi- dimensional array of strings, making rendering essentially a flatten.join.

Because less work is performed during render, StringDoc is consistently faster than rendering a document using Nokigiri or Oga. One obvious tradeoff is that parsing is much slower (we use Oga to parse the XML, then convert it into a StringDoc). This is an acceptable tradeoff because we only pay the parsing cost once (when the Pakyow application boots).

All that to say, StringDoc is a tool that is very specialized to Pakyow's use-case. Use it only when a longer parse time is acceptable and you only care about a handful of identifiable nodes in a document.

@api private

Constants

ATTR_MAPPING
DATA_ATTRS

Attributes that should be prefixed with data-

DELETED_ATTRS

Attributes that should be deleted from the view

LABEL_ATTRS

Attributes that will be turned into StringDoc labels

LABEL_MAPPING
SEMANTIC_TAGS

Attributes

collapsed[R]

Array of Node objects.

nodes[R]

Array of Node objects.

Public Class Methods

attributes(element) click to toggle source

Returns attributes for an oga element.

# File lib/string_doc.rb, line 80
def attributes(element)
  if element.is_a?(Oga::XML::Element)
    element.attributes
  else
    []
  end
end
attributes_string(element) click to toggle source

Builds a string-based representation of attributes for an oga element.

# File lib/string_doc.rb, line 90
def attributes_string(element)
  attributes(element).each_with_object(String.new) do |attribute, string|
    string << " #{attribute.name}=\"#{attribute.value}\""
  end
end
breadth_first(doc) { |element| ... } click to toggle source

Yields nodes from an oga document, breadth-first.

# File lib/string_doc.rb, line 64
def breadth_first(doc)
  queue = [doc]

  until queue.empty?
    element = queue.shift

    if element == doc
      queue.concat(element.children.to_a); next
    end

    yield element
  end
end
contains_significant_child?(element) click to toggle source

Returns true if the given Oga element contains a child node that is significant.

# File lib/string_doc.rb, line 108
def contains_significant_child?(element)
  element.children.each do |child|
    return true if find_significance(child).any?
    return true if contains_significant_child?(child)
  end

  false
end
empty() click to toggle source

Creates an empty doc.

# File lib/string_doc.rb, line 36
def empty
  allocate.tap do |doc|
    doc.instance_variable_set(:@nodes, [])
    doc.instance_variable_set(:@collapsed, nil)
  end
end
find_significance(element) click to toggle source

Determines the significance of element.

# File lib/string_doc.rb, line 98
def find_significance(element)
  significant_types.each_with_object([]) do |(key, info), significance|
    if info[:object].significant?(element)
      significance << key
    end
  end
end
from_nodes(nodes) click to toggle source

Creates a StringDoc from an array of Node objects.

# File lib/string_doc.rb, line 51
def from_nodes(nodes)
  allocate.tap do |instance|
    instance.instance_variable_set(:@nodes, nodes)
    instance.instance_variable_set(:@collapsed, nil)

    nodes.each do |node|
      node.parent = instance
    end
  end
end
new(html) click to toggle source

Creates a StringDoc from an html string.

# File lib/string_doc.rb, line 143
def initialize(html)
  @nodes = parse(Oga.parse_html(html))
  @collapsed = nil
end
nodes_from_doc_or_string(doc_node_or_string) click to toggle source

@api private

# File lib/string_doc.rb, line 123
def nodes_from_doc_or_string(doc_node_or_string)
  case doc_node_or_string
  when StringDoc
    doc_node_or_string.nodes
  when Node, MetaNode
    [doc_node_or_string]
  else
    StringDoc.new(doc_node_or_string.to_s).nodes
  end
end
significant(name, object, descend: true) click to toggle source

Registers a significant node with a name and an object to handle parsing.

# File lib/string_doc.rb, line 45
def significant(name, object, descend: true)
  significant_types[name] = { object: object, descend: descend }
end
significant_types() click to toggle source

@api private

# File lib/string_doc.rb, line 118
def significant_types
  @significant_types ||= {}
end

Public Instance Methods

==(other) click to toggle source
# File lib/string_doc.rb, line 456
def ==(other)
  other.is_a?(StringDoc) && @nodes == other.nodes
end
append(doc_or_string) click to toggle source

Appends to this document.

Accepts a StringDoc or XML String.

# File lib/string_doc.rb, line 326
def append(doc_or_string)
  tap do
    nodes = self.class.nodes_from_doc_or_string(doc_or_string)

    nodes.each do |node|
      node.parent = self
    end

    @nodes.concat(nodes)
  end
end
append_html(html) click to toggle source

Appends raw html to this document, without parsing.

# File lib/string_doc.rb, line 340
def append_html(html)
  tap do
    node = Node.new(html.to_s)
    node.parent = self
    @nodes << node
  end
end
clear() click to toggle source

Clears all nodes.

# File lib/string_doc.rb, line 299
def clear
  tap do
    @nodes.clear
  end
end
Also aliased as: remove
collapse(*significance) click to toggle source
# File lib/string_doc.rb, line 460
def collapse(*significance)
  if significance?(*significance)
    @nodes.each do |node|
      node.children.collapse(*significance)
    end
  else
    @collapsed = render
    @nodes = []
  end

  @collapsed
end
each(descend: false) { |each_meta_node| ... } click to toggle source
# File lib/string_doc.rb, line 182
def each(descend: false, &block)
  return enum_for(:each, descend: descend) unless block_given?

  @nodes.each do |node|
    case node
    when MetaNode
      node.each do |each_meta_node|
        yield each_meta_node
      end
    else
      yield node
    end

    if descend || node.label(:descend) != false
      if node.children.is_a?(StringDoc)
        node.children.each(descend: descend, &block)
      else
        yield node.children
      end
    end
  end
end
each_significant_node(type, descend: false) { |each_meta_node| ... } click to toggle source

Yields each node matching the significant type.

# File lib/string_doc.rb, line 207
def each_significant_node(type, descend: false)
  return enum_for(:each_significant_node, type, descend: descend) unless block_given?

  each(descend: descend) do |node|
    case node
    when MetaNode
      if node.significant?(type)
        node.each do |each_meta_node|
          yield each_meta_node
        end
      end
    when Node
      if node.significant?(type)
        yield node
      end
    end
  end
end
each_significant_node_with_name(type, name, descend: false) { |node| ... } click to toggle source

Yields each node matching the significant type and name.

@see find_significant_nodes

# File lib/string_doc.rb, line 259
def each_significant_node_with_name(type, name, descend: false)
  return enum_for(:each_significant_node_with_name, type, name, descend: descend) unless block_given?

  each_significant_node(type, descend: descend) do |node|
    yield node if node.label(type) == name
  end
end
each_significant_node_without_descending_into_type(type, descend: false) { |each_meta_node| ... } click to toggle source

Yields each node matching the significant type, without descending into nodes that are of that type.

# File lib/string_doc.rb, line 228
def each_significant_node_without_descending_into_type(type, descend: false, &block)
  return enum_for(:each_significant_node_without_descending_into_type, type, descend: descend) unless block_given?

  @nodes.each do |node|
    if node.is_a?(Node) || node.is_a?(MetaNode)
      if node.significant?(type)
        case node
        when MetaNode
          node.each do |each_meta_node|
            yield each_meta_node
          end
        when Node
          yield node
        end
      else
        if descend || node.label(:descend) != false
          if node.children.is_a?(StringDoc)
            node.children.each_significant_node_without_descending_into_type(type, descend: descend, &block)
          else
            yield node.children
          end
        end
      end
    end
  end
end
empty?() click to toggle source
# File lib/string_doc.rb, line 489
def empty?
  @nodes.empty?
end
finalize_labels(keep: []) click to toggle source
# File lib/string_doc.rb, line 174
def finalize_labels(keep: [])
  @nodes.each do |node|
    node.finalize_labels(keep: keep)
  end
end
find_first_significant_node(type, descend: false) click to toggle source

Returns the first node matching the significant type.

# File lib/string_doc.rb, line 269
def find_first_significant_node(type, descend: false)
  each(descend: descend).find { |node|
    node.significant?(type)
  }
end
find_significant_nodes(type, descend: false) click to toggle source

Returns nodes matching the significant type.

# File lib/string_doc.rb, line 277
def find_significant_nodes(type, descend: false)
  [].tap do |nodes|
    each_significant_node(type, descend: descend) do |node|
      nodes << node
    end
  end
end
find_significant_nodes_with_name(type, name, descend: false) click to toggle source

Returns nodes matching the significant type and name.

@see find_significant_nodes

# File lib/string_doc.rb, line 289
def find_significant_nodes_with_name(type, name, descend: false)
  [].tap do |nodes|
    each_significant_node_with_name(type, name, descend: descend) do |node|
      nodes << node
    end
  end
end
initialize_copy(_) click to toggle source

@api private

Calls superclass method
# File lib/string_doc.rb, line 149
def initialize_copy(_)
  super

  @nodes = @nodes.map { |node|
    node.dup.tap do |duped_node|
      duped_node.parent = self
    end
  }
end
insert_after(node_to_insert, after_node) click to toggle source

Inserts a node after another node contained in this document.

# File lib/string_doc.rb, line 366
def insert_after(node_to_insert, after_node)
  tap do
    if after_node_index = @nodes.index(after_node)
      nodes = self.class.nodes_from_doc_or_string(node_to_insert)

      nodes.each do |node|
        node.parent = self
      end

      @nodes.insert(after_node_index + 1, *nodes)
    end
  end
end
insert_before(node_to_insert, before_node) click to toggle source

Inserts a node before another node contained in this document.

# File lib/string_doc.rb, line 382
def insert_before(node_to_insert, before_node)
  tap do
    if before_node_index = @nodes.index(before_node)
      nodes = self.class.nodes_from_doc_or_string(node_to_insert)

      nodes.each do |node|
        node.parent = self
      end

      @nodes.insert(before_node_index, *nodes)
    end
  end
end
prepend(doc_or_string) click to toggle source

Prepends to this document.

Accepts a StringDoc or XML String.

# File lib/string_doc.rb, line 352
def prepend(doc_or_string)
  tap do
    nodes = self.class.nodes_from_doc_or_string(doc_or_string)

    nodes.each do |node|
      node.parent = self
    end

    @nodes.unshift(*nodes)
  end
end
remove()
Alias for: clear
remove_empty_nodes() click to toggle source
# File lib/string_doc.rb, line 479
def remove_empty_nodes
  @nodes.each do |node|
    node.children.remove_empty_nodes
  end

  unless empty?
    @nodes.delete_if(&:empty?)
  end
end
remove_node(node_to_delete) click to toggle source

Removes a node from the document.

# File lib/string_doc.rb, line 398
def remove_node(node_to_delete)
  tap do
    @nodes.delete_if { |node|
      node.equal?(node_to_delete)
    }
  end
end
render(output = String.new, context: nil) click to toggle source
# File lib/string_doc.rb, line 423
def render(output = String.new, context: nil)
  if collapsed && empty?
    output << collapsed
  else
    nodes.each do |node|
      case node
      when MetaNode
        node.render(output, context: context)
      when Node
        node.render(output, context: context)
      else
        output << node.to_s
      end
    end

    output
  end
end
Also aliased as: to_html, to_xml
replace(doc_or_string) click to toggle source

Replaces the current document.

Accepts a StringDoc or XML String.

# File lib/string_doc.rb, line 310
def replace(doc_or_string)
  tap do
    nodes = self.class.nodes_from_doc_or_string(doc_or_string)

    nodes.each do |node|
      node.parent = self
    end

    @nodes = nodes
  end
end
replace_node(node_to_replace, replacement_node) click to toggle source

Replaces a node from the document.

# File lib/string_doc.rb, line 408
def replace_node(node_to_replace, replacement_node)
  tap do
    if replace_node_index = @nodes.index(node_to_replace)
      nodes_to_insert = self.class.nodes_from_doc_or_string(replacement_node)

      nodes_to_insert.each do |node|
        node.parent = self
      end

      @nodes.insert(replace_node_index + 1, *nodes_to_insert)
      @nodes.delete_at(replace_node_index)
    end
  end
end
significance?(*significance) click to toggle source
# File lib/string_doc.rb, line 473
def significance?(*significance)
  @nodes.any? { |node|
    node.significance?(*significance) || node.children.significance?(*significance)
  }
end
soft_copy() click to toggle source

@api private

# File lib/string_doc.rb, line 160
def soft_copy
  instance = self.class.allocate

  instance.instance_variable_set(:@nodes, @nodes.map { |node|
    duped_node = node.soft_copy
    duped_node.parent = instance
    duped_node
  })

  instance.instance_variable_set(:@collapsed, @collapsed)

  instance
end
to_html(output = String.new, context: nil)
Alias for: render
to_s() click to toggle source

Returns the node as an xml string, without transforming.

# File lib/string_doc.rb, line 446
def to_s
  if collapsed && empty?
    collapsed
  else
    @nodes.each_with_object(String.new) do |node, string|
      string << node.to_s
    end
  end
end
to_xml(output = String.new, context: nil)
Alias for: render
transforms?() click to toggle source
# File lib/string_doc.rb, line 493
def transforms?
  @nodes.any?(&:transforms?)
end

Private Instance Methods

attributes_hash(element) click to toggle source
# File lib/string_doc.rb, line 555
def attributes_hash(element)
  StringDoc.attributes(element).each_with_object({}) { |attribute, elements|
    elements[attribute.name.to_sym] = CGI.escape_html(attribute.value.to_s)
  }
end
build_significant_node(element, significance) click to toggle source
# File lib/string_doc.rb, line 571
def build_significant_node(element, significance)
  node = if element.is_a?(Oga::XML::Element)
    attributes = attributes_hash(element).each_with_object({}) { |(key, value), remapped_attributes|
      unless DELETED_ATTRS.include?(key)
        remapped_key = ATTR_MAPPING.fetch(key, key)

        if DATA_ATTRS.include?(key)
          remapped_key = :"data-#{remapped_key}"
        end

        remapped_attributes[remapped_key] = value || ""
      end
    }

    labels = labels_hash(element)

    if labels.include?(:binding)
      post_process_binding!(element, attributes, labels)
    end

    significance_options = significance.map { |significant_type|
      self.class.significant_types[significant_type]
    }

    labels[:descend] = significance_options.all? { |options| options[:descend] == true }

    Node.new("<#{element.name}", Attributes.new(attributes), significance: significance, labels: labels, parent: self)
  else
    name = element.text.strip.match(/@[^\s]*\s*([a-zA-Z0-9\-_]*)/)[1]
    labels = significance.each_with_object({}) { |significant_type, labels_hash|
      # FIXME: remove this special case logic
      labels_hash[significant_type] = if name.empty? && significant_type == :container
        Pakyow::Presenter::Views::Page::DEFAULT_CONTAINER
      else
        name.to_sym
      end
    }

    Node.new(element.to_xml, significance: significance, parent: self, labels: labels)
  end

  significance.each do |significant_type|
    object = StringDoc.significant_types.dig(significant_type, :object)
    if object && object.respond_to?(:decorate)
      object.decorate(node)
    end
  end

  node
end
labels_hash(element) click to toggle source
# File lib/string_doc.rb, line 561
def labels_hash(element)
  StringDoc.attributes(element).dup.each_with_object({}) { |attribute, labels|
    attribute_name = attribute.name.to_sym

    if LABEL_ATTRS.include?(attribute_name)
      labels[LABEL_MAPPING.fetch(attribute_name, attribute_name)] = attribute.value.to_s.to_sym
    end
  }
end
parse(doc) click to toggle source

Parses an Oga document into an array of Node objects.

# File lib/string_doc.rb, line 501
def parse(doc)
  nodes = []

  unless doc.is_a?(Oga::XML::Element) || !doc.respond_to?(:doctype) || doc.doctype.nil?
    nodes << Node.new("<!DOCTYPE html>")
  end

  self.class.breadth_first(doc) do |element|
    significance = self.class.find_significance(element)

    unless significance.any? || self.class.contains_significant_child?(element)
      # Nothing inside of the node is significant, so collapse it to a single node.
      nodes << Node.new(element.to_xml); next
    end

    node = if significance.any?
      build_significant_node(element, significance)
    elsif element.is_a?(Oga::XML::Text) || element.is_a?(Oga::XML::Comment)
      Node.new(element.to_xml)
    else
      Node.new("<#{element.name}#{self.class.attributes_string(element)}")
    end

    if element.is_a?(Oga::XML::Element)
      node.close(element.name, parse(element))
    end

    nodes << node
  end

  nodes
end
post_process_binding!(element, attributes, labels) click to toggle source
# File lib/string_doc.rb, line 622
def post_process_binding!(element, attributes, labels)
  channel = semantic_channel_for_element(element)
  binding = labels[:binding].to_s

  if binding.start_with?("@")
    plug, binding = binding.split(".", 2)
    plug_name, plug_instance = plug.split("(", 2)

    if plug_instance
      plug_instance = plug_instance[0..-2]
    else
      plug_instance = :default
    end

    labels[:plug] = {
      name: plug_name[1..-1].to_sym,
      instance: plug_instance.to_sym,
    }

    labels[:plug][:key] = if labels[:plug][:instance] == :default
      "@#{labels[:plug][:name]}"
    else
      "@#{labels[:plug][:name]}.#{labels[:plug][:instance]}"
    end
  end

  binding_parts = binding.split(":").map(&:to_sym)
  binding_name, binding_prop = binding_parts[0].to_s.split(".", 2).map(&:to_sym)
  plural_binding_name = Pakyow::Support.inflector.pluralize(binding_name).to_sym
  singular_binding_name = Pakyow::Support.inflector.singularize(binding_name).to_sym

  labels[:binding] = binding_name
  labels[:plural_binding] = plural_binding_name
  labels[:singular_binding] = singular_binding_name

  if binding_prop
    labels[:binding_prop] = binding_prop
  end

  channel.concat(binding_parts[1..-1])
  labels[:channeled_binding] = [binding_name].concat(channel).join(":").to_sym
  labels[:plural_channeled_binding] = [plural_binding_name].concat(channel).join(":").to_sym
  labels[:singular_channeled_binding] = [singular_binding_name].concat(channel).join(":").to_sym
  attributes[:"data-b"] = [binding_parts[0]].concat(channel).join(":")
end
semantic_channel_for_element(element, channel = []) click to toggle source
# File lib/string_doc.rb, line 672
def semantic_channel_for_element(element, channel = [])
  if SEMANTIC_TAGS.include?(element.name)
    channel << element.name.to_sym
  end

  channel
end