class Boilerpipe::SAX::TagActions::AnchorText

Public Instance Methods

append_anchor_text_end(handler) click to toggle source
# File lib/boilerpipe/sax/tag_actions/anchor_text.rb, line 35
def append_anchor_text_end(handler)
  handler.append_space
  handler.append_token(Boilerpipe::SAX::HTMLContentHandler::ANCHOR_TEXT_END)
  handler.append_token(' ')
end
append_anchor_text_start(handler) click to toggle source
# File lib/boilerpipe/sax/tag_actions/anchor_text.rb, line 29
def append_anchor_text_start(handler)
  handler.append_space
  handler.append_token(Boilerpipe::SAX::HTMLContentHandler::ANCHOR_TEXT_START)
  handler.append_token(' ')
end
changes_tag_level?() click to toggle source
# File lib/boilerpipe/sax/tag_actions/anchor_text.rb, line 25
def changes_tag_level?
  true
end
end_tag(handler, name) click to toggle source
# File lib/boilerpipe/sax/tag_actions/anchor_text.rb, line 19
def end_tag(handler, name)
  handler.in_anchor_tag -= 1
  append_anchor_text_end(handler) unless handler.in_anchor_tag? || handler.in_ignorable_element?
  false
end
nested_achor_tag_error_recovering(handler, name) click to toggle source
# File lib/boilerpipe/sax/tag_actions/anchor_text.rb, line 41
def nested_achor_tag_error_recovering(handler, name)
  # - dunno about nokogiri???????
  # as nested A elements are not allowed per specification, we
  # are probably reaching this branch due to a bug in the XML parser
  # puts "Warning: SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow..."
  end_tag(handler, name)
end
start(handler, name, attrs) click to toggle source

Marks this tag as “anchor” (this should usually only be set for the <A> tag). Anchor tags may not be nested. There is a bug in certain versions of NekoHTML which still allows nested tags. If boilerpipe

  • encounters such nestings, a SAXException is thrown.

# File lib/boilerpipe/sax/tag_actions/anchor_text.rb, line 6
def start(handler, name, attrs)
  if handler.in_anchor_tag?
    handler.in_anchor_tag += 1
    nested_achor_tag_error_recovering(handler, name)
    return
  else
    handler.in_anchor_tag += 1
  end

  append_anchor_text_start(handler) unless handler.in_ignorable_element?
  false
end