class HTMLDOMDiff::Differ
Public Instance Methods
diff(ldoc, rdoc)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 15 def diff(ldoc, rdoc) reset ldoc, rdoc match_by_ids ldoc, rdoc prep_with @lsignatures, ldoc prep_with @rsignatures, rdoc perform_initial_top_down_matching [ldoc], [rdoc] @matchqueue.push(rdoc) perform_initial_matching match_bottom_up ldoc match_top_down ldoc @builder end
diff_fragments(left, right)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 11 def diff_fragments(left, right) diff parse_fragments(left).child, parse_fragments(right).child end
diff_strings(left, right)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 7 def diff_strings(left, right) diff parse(left).root, parse(right).root end
Private Instance Methods
find_best_match(element)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 134 def find_best_match(element) candidates = [] @lsignatures.each do |left, sig| if !left_matched?(left) && sig == @rsignatures[element] candidates << left end end if candidates.size == 0 return elsif candidates.size == 1 return candidates.first else matching_parents = candidates.select do |left| left_matches?(left.parent, element.parent) end if matching_parents.size == 1 return matching_parents.first else return end end end
hash_for(array)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 100 def hash_for(array) Digest::SHA256.digest array.join(";") end
match_all_children(left, right)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 159 def match_all_children(left, right) record_matching left, right left.children.zip(right.children).each do |a, b| match_all_children a, b end end
match_bottom_up(element)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 174 def match_bottom_up(element) element.children.each do |child| match_bottom_up child end if !left_matched?(element) && element.respond_to?(:parent) && left_matched?(element.parent) children = left_match(element.parent).children.reject { |c| right_matched?(c) } match = children.find { |c| c.name == element.name } record_matching(element, match) if match end end
match_by_ids(ldoc, rdoc)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 57 def match_by_ids(ldoc, rdoc) rightside = rdoc.css("[id]").to_a ldoc.css("[id]").each do |element| rindex = rightside.find_index { |e| e[:id] == element[:id] } if rindex record_matching element, rightside[rindex] rightside.delete_at(rindex) end end end
match_parents(left, right)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 166 def match_parents(left, right) # TODO implement multi-ancestor matching return if left_matched?(left.parent) || right_matched?(right.parent) if left.parent.name == right.parent.name record_matching left.parent, right.parent end end
match_top_down(element)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 186 def match_top_down(element) unless left_matched?(element) childmatches = element.children.select { |c| left_matched?(c) }.map { |c| left_match(c).parent }.uniq childmatches.reject! { |e| right_matched?(e) } if childmatches.size == 1 && childmatches.first.name == element.name record_matching(element, childmatches.first) end end element.children.each do |child| match_top_down child end end
parse(string)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 41 def parse(string) Nokogiri::HTML(string, nil, nil, (Nokogiri::XML::ParseOptions::DEFAULT_HTML & Nokogiri::XML::ParseOptions::NOBLANKS)) end
parse_fragments(string)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 45 def parse_fragments(string) Nokogiri::HTML::DocumentFragment.parse(string) end
perform_initial_matching()
click to toggle source
# File lib/html-dom-diff/differ.rb, line 122 def perform_initial_matching while @matchqueue.size > 0 element = @matchqueue.pop if !right_matched?(element) && (match = find_best_match(element)) match_all_children match, element match_parents match, element else element.children.each { |c| @matchqueue.push c } end end end
perform_initial_top_down_matching(lnodes, rnodes)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 108 def perform_initial_top_down_matching(lnodes, rnodes) _lnodes = lnodes.reject(&:text?) _rnodes = rnodes.reject(&:text?) _lnodes.each do |lnode| lcounts = _lnodes.count { |c| c.name == lnode.name } candidates = _rnodes.select { |c| c.name == lnode.name } if lcounts == 1 && candidates.size == 1 record_matching lnode, candidates.first perform_initial_top_down_matching lnode.children, candidates.first.children end end end
prep_with(sig_hash, element, level=0)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 68 def prep_with(sig_hash, element, level=0) weights = weight_for(element) signatures = [signature_part_for(element)] element.children.each do |child| weight, signature = prep_with(sig_hash, child, level+1) weights += weight signatures << signature end @builder.add_weight(element, weights) sig_hash[element] = hash_for(signatures) @depths[element] = level [ weights, sig_hash[element] ] end
record_matching(left, right)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 104 def record_matching(left, right) @builder.match(left, right) end
reset(ldoc, rdoc)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 49 def reset(ldoc, rdoc) @builder = DeltaTreeBuilder.new(ldoc, rdoc) @depths = {} @lsignatures = {} @rsignatures = {} @matchqueue = PQueue.new() { |a, b| @builder.weight(a) > @builder.weight(b) } end
signature_part_for(element)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 92 def signature_part_for(element) if element.text? or element.cdata? element.text else element.name end end
weight_for(element)
click to toggle source
# File lib/html-dom-diff/differ.rb, line 84 def weight_for(element) if element.text? or element.cdata? 1 + Math.log(element.text.size) else 1 end end