class HTMLValidationResult

Attributes

exceptions[RW]
html[RW]
options[RW]
resource[RW]

Public Class Methods

load_from_files(filepath) click to toggle source

takes a .url and loads the data into this object

# File lib/html_validation/html_validation_result.rb, line 34
def self.load_from_files(filepath)
  resource = File.open("#{filepath}.resource.txt", 'r').read
  html = File.open("#{filepath}.html.txt", 'r').read
  HTMLValidationResult.new(resource, html, filepath)
end
new(resource, html, datapath, tidy_flags = [], options = {}) click to toggle source

options ex: options = ['–show-warnings false']

# File lib/html_validation/html_validation_result.rb, line 16
def initialize(resource, html, datapath, tidy_flags = [], options = {})
  @resource = resource
  @html = html
  @exceptions = ''
  @datapath = datapath
  @tidy_flags = (HTMLValidation.default_tidy_flags + tidy_flags).uniq
  @options = options
  valid?
end

Public Instance Methods

accept!() click to toggle source

Saves the exception string for the given url or file path. When next run, if the exception string is identical, valid? will return true. Note that exceptions will still list the exception string, though, even if it is an accepted exception string.

# File lib/html_validation/html_validation_result.rb, line 56
def accept!
  File.open(data_path("accepted"), 'w') {|f| f.write(@exceptions)}
end
reject!() click to toggle source
# File lib/html_validation/html_validation_result.rb, line 60
def reject!
  if File.exists?(data_path("accepted"))
    File.delete data_path("accepted")
  end
end
valid?() click to toggle source

Validates an html string using html tidy. If there are no warnings or exceptions, or there is a previously accepted exception string that matches exactly, valid? returns true Line numbers of exceptions are likely to change with any edit, so our validation compares the exception strings with the lines and columns removed. Name can be a filename, file system path, or url, so long it is uniquely associated with the passed in html.

# File lib/html_validation/html_validation_result.rb, line 45
def valid?
  @exceptions = validate
  File.delete(data_path("accepted")) if File.exists?(data_path("accepted")) if @exceptions == ''
  valid = (filter(@exceptions) == '' or accepted?(@exceptions))
  save_html_and_exceptions
  valid
end

Private Instance Methods

accepted?(exception_str) click to toggle source

have we previously accepted this exact string for this path?

# File lib/html_validation/html_validation_result.rb, line 91
def accepted?(exception_str)
  exception_str = filter(exception_str)
  File.exists?(data_path('accepted')) ? filter(File.open(data_path('accepted'), "r").read) == exception_str : false
end
data_path(filetype) click to toggle source

get the filename for storing a type of data

# File lib/html_validation/html_validation_result.rb, line 80
def data_path(filetype)
  "#{@datapath}.#{filetype}.txt"
end
filter(str) click to toggle source

Line numbers of exceptions are likely to change with any minor edit, so our validation compares the result strings with the lines and columns removed. This means that if the errors change position in the file (up or down b/c you add or remove code), accepted exception strings will remain valid.

# File lib/html_validation/html_validation_result.rb, line 100
def filter(str)
  str = str.gsub(/^line.*trimming empty.*\n/, '') # the messages about empty are overzealous, and not invalid
  str = str.gsub(/^line.*proprietary.*\n/, '') if options[:ignore_proprietary] # if you use IE only attributes like wrap, or spellcheck or things not in standard
  str = str.gsub(/^line.*(?:Error|Warning):.*<\/?(?:#{options[:ignored_tag_errors].join('|')})>.*\n/, '') if options[:ignored_tag_errors] && options[:ignored_tag_errors].any?
  str = str.gsub(/^line.*(?:Error|Warning):.* attribute \"(?:#{options[:ignored_attribute_errors].join('|')})\".*\n/, '') if options[:ignored_attribute_errors] && options[:ignored_attribute_errors].any?
  if options[:ignored_errors] && options[:ignored_errors].any? && str.gsub(/^line.*(?:Error|Warning):/, '') =~ ignored_errors_regex
    str = str.gsub(Regexp.new(/^line.*(?:Error|Warning):/.source + '.*' + ignored_errors_regex.source + '.*' + /\n/.source), '')
  end
  str.gsub(/line [0-9]+ column [0-9]+ -/, '')
  # /line [0-9]+ column [0-9]+ - / +  =~ "line 1 column 1 - Warning: missing <!DOCTYPE> declaration"
end
ignored_errors_regex() click to toggle source
# File lib/html_validation/html_validation_result.rb, line 112
def ignored_errors_regex
  /(?:#{options[:ignored_errors].join('|')})/
end
save_html_and_exceptions() click to toggle source
# File lib/html_validation/html_validation_result.rb, line 84
def save_html_and_exceptions
  File.open(data_path("html"), 'w') {|f| f.write(@html)}
  File.open(data_path("resource"), 'w') {|f| f.write(@resource)}
  File.open(data_path("exceptions"), 'w') {|f| f.write(@exceptions)}
end
tidy_command() click to toggle source

We used to specifically prefer /usr/bin/tidy by default on *nix as there is another “tidy” program that could end up earlier on the path. Tidy was installed at this location for me by default. The norm is now to custom install the tidy fork for HTML 5, though, and respecting the PATH is better philosophically. Now we expect the PATH to be correct. ie, if which Tidy being used is an issue, put the right tidy first on the PATH.

# File lib/html_validation/html_validation_result.rb, line 72
def tidy_command
  is_windows = (RbConfig::CONFIG['host_os'] =~ /mswin|mingw|cygwin/)
  bin = is_windows ? 'tidy.exe' : 'tidy'
  cmd = "#{bin} #{@tidy_flags.join(' ')}"
  cmd
end
validate() click to toggle source
# File lib/html_validation/html_validation_result.rb, line 116
def validate
  stdin, stdout, stderr = Open3.popen3(tidy_command)
  stdin.puts @html.encode!("UTF-8", invalid: :replace, undef: :replace).force_encoding("utf-8")
  stdin.close
  stdout.close
  result = stderr.read
  stderr.close
  result
end