class TruncatedSaxDocument
Constants
- IGNORABLE_TAGS
- VOID_TAGS
These don't have to be closed (which also impacts ongoing length calculations) www.456bereastreet.com/archive/201005/void_empty_elements_and_self-closing_start_tags_in_html/
Attributes
ignored_levels[R]
max_length[R]
tail[R]
truncated[R]
truncated_string[R]
Public Class Methods
new(options)
click to toggle source
FIXME: Call super to initialize state of the parent class.
# File lib/abbreviato/truncated_sax_document.rb, line 20 def initialize(options) # rubocop:disable Lint/MissingSuper @html_coder = HTMLEntities.new @max_length = options[:max_length] @tail = options[:tail] || '' @fragment_mode = options[:fragment] @truncated_string = '' @closing_tags = [] @estimated_length = 0 @ignored_levels = 0 @truncated = false end
Public Instance Methods
cdata_block(string)
click to toggle source
This method is called when the parser encounters cdata. In practice, this also gets called for this style of comment inside an element:
<style><!-- /* Font Definitions */ @font-face {font-family:Wingdings; panose-1:5 0 0 0 0 0 0 0 0 0;} --></style>
# File lib/abbreviato/truncated_sax_document.rb, line 103 def cdata_block(string) if string.bytesize <= remaining_length append_to_truncated_string(string) else @truncated = true end end
characters(decoded_string)
click to toggle source
This method is called when the parser encounters characters between tags
# File lib/abbreviato/truncated_sax_document.rb, line 64 def characters(decoded_string) if max_length_reached? || ignore_mode? @truncated = true return end # Use encoded length, so > counts as 4 bytes, not 1 (which is what '>' would give) encoded_string = @html_coder.encode(decoded_string, :named) string_to_append = if encoded_string.bytesize > remaining_length # This is the line which prevents HTML entities getting truncated - treat them as a single char str = truncate_string(decoded_string) str << tail if remaining_length - str.bytesize >= tail.bytesize str else encoded_string end append_to_truncated_string(string_to_append) end
comment(string)
click to toggle source
This method is called when the parser encounters a comment
# File lib/abbreviato/truncated_sax_document.rb, line 84 def comment(string) comment = comment_tag(string) if comment.bytesize <= remaining_length append_to_truncated_string(comment) else @truncated = true end end
end_document()
click to toggle source
# File lib/abbreviato/truncated_sax_document.rb, line 130 def end_document @closing_tags.reverse_each { |name| append_to_truncated_string(closing_tag(name), 0) } end
end_element(name)
click to toggle source
This method is called when the parser encounters a closing tag
# File lib/abbreviato/truncated_sax_document.rb, line 112 def end_element(name) if ignore_mode? exit_ignored_level(name) return end # Note that any remaining end tags get added automatically (in `end_document`) as the document is closed return if max_length_reached? || ignorable_tag?(name) # FIXME: Style/GuardClause: Use a guard clause (return if single_tag_element?(name)) instead of wrapping the code inside a conditional expression. (https://rubystyle.guide#no-nested-conditionals) unless single_tag_element?(name) # rubocop:disable Style/GuardClause @closing_tags.pop # Don't count the length when closing a tag - it was accommodated when # the tag was opened append_to_truncated_string(closing_tag(name), 0) end end
start_element(name, attributes)
click to toggle source
This method is called when the parser encounters an open tag
# File lib/abbreviato/truncated_sax_document.rb, line 35 def start_element(name, attributes) if max_length_reached? || ignorable_tag?(name) @truncated = true if max_length_reached? return end # If already in ignore mode, go in deeper if ignore_mode? enter_ignored_level(name) return end string_to_add = opening_tag(name, attributes) # Abort if there is not enough space to add the combined opening tag and (potentially) the closing tag length_of_tags = overridden_tag_length(name, string_to_add) if length_of_tags > remaining_length @truncated = true enter_ignored_level(name) return end # Save the tag so we can push it on at the end @closing_tags.push name unless single_tag_element?(name) append_to_truncated_string(string_to_add, length_of_tags) end
Private Instance Methods
append_to_truncated_string(string, overridden_length = nil)
click to toggle source
# File lib/abbreviato/truncated_sax_document.rb, line 161 def append_to_truncated_string(string, overridden_length = nil) @truncated_string << string @estimated_length += (overridden_length || string.bytesize) end
attributes_to_string(attributes)
click to toggle source
# File lib/abbreviato/truncated_sax_document.rb, line 166 def attributes_to_string(attributes) attributes.inject(' ') do |string, attribute| key, value = attribute string << "#{key}='#{@html_coder.encode value}' " end.rstrip end
closing_tag(name)
click to toggle source
# File lib/abbreviato/truncated_sax_document.rb, line 149 def closing_tag(name) "</#{name}>" end
comment_tag(comment)
click to toggle source
# File lib/abbreviato/truncated_sax_document.rb, line 145 def comment_tag(comment) "<!--#{comment}-->" end
enter_ignored_level(name)
click to toggle source
# File lib/abbreviato/truncated_sax_document.rb, line 204 def enter_ignored_level(name) @ignored_levels += 1 unless single_tag_element?(name) end
exit_ignored_level(name)
click to toggle source
# File lib/abbreviato/truncated_sax_document.rb, line 208 def exit_ignored_level(name) @ignored_levels -= 1 unless single_tag_element?(name) end
ignorable_tag?(name)
click to toggle source
# File lib/abbreviato/truncated_sax_document.rb, line 200 def ignorable_tag?(name) @fragment_mode && IGNORABLE_TAGS.include?(name.downcase) end
ignore_mode?()
click to toggle source
# File lib/abbreviato/truncated_sax_document.rb, line 212 def ignore_mode? @ignored_levels.positive? end
max_length_reached?()
click to toggle source
# File lib/abbreviato/truncated_sax_document.rb, line 173 def max_length_reached? @estimated_length >= max_length end
opening_tag(name, attributes)
click to toggle source
# File lib/abbreviato/truncated_sax_document.rb, line 136 def opening_tag(name, attributes) attributes_string = attributes_to_string(attributes) if single_tag_element? name "<#{name}#{attributes_string}/>" else "<#{name}#{attributes_string}>" end end
overridden_tag_length(tag_name, rendered_tag_with_attributes)
click to toggle source
# File lib/abbreviato/truncated_sax_document.rb, line 191 def overridden_tag_length(tag_name, rendered_tag_with_attributes) # Start with the opening tag length = rendered_tag_with_attributes.bytesize # Add on closing tag if necessary length += closing_tag(tag_name).bytesize unless single_tag_element?(tag_name) length end
remaining_length()
click to toggle source
# File lib/abbreviato/truncated_sax_document.rb, line 153 def remaining_length max_length - @estimated_length end
single_tag_element?(name)
click to toggle source
# File lib/abbreviato/truncated_sax_document.rb, line 157 def single_tag_element?(name) VOID_TAGS.include? name end
truncate_string(decoded_string)
click to toggle source
# File lib/abbreviato/truncated_sax_document.rb, line 177 def truncate_string(decoded_string) @truncated = true truncate_length = remaining_length - tail.bytesize truncated_string = '' decoded_string.chars.each do |char| encoded_char = @html_coder.encode(char) break if encoded_char.bytesize > truncate_length truncated_string += encoded_char truncate_length -= encoded_char.bytesize end truncated_string.scrub('') end