class Boilerpipe::SAX::TagActions::AnchorText
Public Instance Methods
append_anchor_text_end(handler)
click to toggle source
# File lib/boilerpipe/sax/tag_actions/anchor_text.rb, line 35 def append_anchor_text_end(handler) handler.append_space handler.append_token(Boilerpipe::SAX::HTMLContentHandler::ANCHOR_TEXT_END) handler.append_token(' ') end
append_anchor_text_start(handler)
click to toggle source
# File lib/boilerpipe/sax/tag_actions/anchor_text.rb, line 29 def append_anchor_text_start(handler) handler.append_space handler.append_token(Boilerpipe::SAX::HTMLContentHandler::ANCHOR_TEXT_START) handler.append_token(' ') end
changes_tag_level?()
click to toggle source
# File lib/boilerpipe/sax/tag_actions/anchor_text.rb, line 25 def changes_tag_level? true end
end_tag(handler, name)
click to toggle source
# File lib/boilerpipe/sax/tag_actions/anchor_text.rb, line 19 def end_tag(handler, name) handler.in_anchor_tag -= 1 append_anchor_text_end(handler) unless handler.in_anchor_tag? || handler.in_ignorable_element? false end
nested_achor_tag_error_recovering(handler, name)
click to toggle source
# File lib/boilerpipe/sax/tag_actions/anchor_text.rb, line 41 def nested_achor_tag_error_recovering(handler, name) # - dunno about nokogiri??????? # as nested A elements are not allowed per specification, we # are probably reaching this branch due to a bug in the XML parser # puts "Warning: SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow..." end_tag(handler, name) end
start(handler, name, attrs)
click to toggle source
Marks this tag as “anchor” (this should usually only be set for the <A> tag). Anchor tags may not be nested. There is a bug in certain versions of NekoHTML which still allows nested tags. If boilerpipe
-
encounters such nestings, a SAXException is thrown.
# File lib/boilerpipe/sax/tag_actions/anchor_text.rb, line 6 def start(handler, name, attrs) if handler.in_anchor_tag? handler.in_anchor_tag += 1 nested_achor_tag_error_recovering(handler, name) return else handler.in_anchor_tag += 1 end append_anchor_text_start(handler) unless handler.in_ignorable_element? false end