module XMLhelper
15-Jan-2022: bug fix: Related to previous bug fix; Detecting
Attributes::Value is now performed instead of a String object
14-Jan-2022: bug fix: Related to previous bug fix; Unescape is
now only applied to objects of type Attributes::Value
01-Jan-2022: bug fix: Attribute values are no longer unescaped when
called from Rexle#xml
03-Apr-2021: bug fix: Using to_a a CDATA element if present is now output 20-Feb-2021: bug fix: The @instructions accessor is now ignored if nil. 11-Sep-2020: feature: Rexle::Element#text
now has unescaped HTML using CGI 30-Jul-2020: minor improvement: plaintext now unescapes & to & 11-May-2020: bug fix: Rexle#css
now responds correctly to valid selectors 23-Apr-2020: feature: Added public method plaintext. 04-Feb-2020: minor bug fix: Element A is now defined as a non self-closing tag 18-Sep-2019: minor bug fix: &apos is now unescaped properly 09-Jul-2019: minor improvement: A comment tag now has a
new line when pretty printed
02-Feb-2019: feature: A comment tag can now have nested elements 03-Nov-2018: feature: Debug messages can now use coloured text 02-Oct-2018: feature: Added Rexle::Elements#last
18-Jan-2018: bug fix: An Element’s attributes are now cloned too 16-Sep-2017: improvement: Multiple results are now returned if the
xpath contains an *and* operator
14-Sep-2017: improvement: An and operator can now be
used between xpath statements
10-Sep-2017: bug fix: The following XPath has now been tested => //.[@id] 10-Aug-2017: feature: Rexle
now has a member variable (@rexle) to keep
track of the working document when elements are passed to different documents bug fix: Element prefixes are now only processed if they exist
13-Apr-2017: bug fix: Rexle::Elements#index
was implemented which fixes the
Rexle::Element#next_sibling and Rexle::Element#previous_sibling bugs
25-Feb-2017: improvement:
An input rexle array can now have an empty array for children e.g. doc = Rexle.new(["records", {}, "", []])
25-Dec-2016: revision for Ruby 2.4: Replaced Fixnum with Integer
Public Instance Methods
# File lib/rexle.rb, line 83 def doc_pretty_print(children, declaration=true) body = pretty_print(children,2).join a = self.root.attributes.to_a.map do |k,v| "%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v.to_s)] end ind = "\n " xml = "<%s%s>%s%s%s</%s>" % [self.root.name, a.empty? ? '' : \ ' ' + a.join(' '), ind, body, "\n", self.root.name] if self.instructions and declaration then processing_instructions("") + xml else xml end end
# File lib/rexle.rb, line 56 def doc_print(children, declaration=true) body = (children.nil? or children.empty? \ or children.is_an_empty_string? ) ? '' : \ scan_print(children).join.force_encoding("utf-8") a = self.root.attributes.to_a.map do |k,v| val = if v.is_a?(Array) then v.join(' ') else v.is_a?(Attributes::Value) ? v.to_s(unescape: false) : v end "%s='%s'" % [k, val] end xml = "<%s%s>%s</%s>" % [self.root.name, a.empty? ? '' : \ ' ' + a.join(' '), body, self.root.name] if self.instructions and declaration then processing_instructions() + xml else xml end end
# File lib/rexle.rb, line 102 def inspect() "#<Rexle:%s>" % [self.object_id] end
# File lib/rexle.rb, line 176 def pretty_print(nodes, indent='0') indent = indent.to_i return '' unless nodes nodes.select(){|x| x.to_s.strip.length > 0} .map.with_index do |x, i| if x.is_a? Rexle::Element then a = x.attributes.to_a.map do |k,v| "%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)] end a ||= [] tag = x.name + (a.empty? ? '' : ' ' + a.join(' ')) start = i > 0 ? ("\n" + ' ' * (indent - 1)) : '' if (x.value and x.value.length > 0) \ or (x.children and x.children.length > 0 \ and not x.children.is_an_empty_string?) or \ x.name == 'script' or x.name == 'textarea' or \ x.name == 'iframe' then ind1 = (x.children and x.children.grep(Rexle::Element).length > 0) ? ("\n" + ' ' * indent) : '' out = ["%s<%s>%s" % [start, tag, ind1]] out << pretty_print(x.children, (indent + 1).to_s.clone) ind2 = (ind1 and ind1.length > 0) ? ("\n" + ' ' * (indent - 1)) : '' out << "%s</%s>" % [ind2, x.name] else out = ["%s<%s/>" % [start, tag]] end elsif x.is_a? String then x.sub(/^[\n\s]+$/,'') elsif x.is_a? Rexle::CData then x.print elsif x.is_a? Rexle::Comment then "\n" + (' ' * indent) + x.print end end end
# File lib/rexle.rb, line 106 def processing_instructions(s='') self.instructions.map do |instruction| "<?%s?>\n" % instruction.join(' ') end.join s end
# File lib/rexle.rb, line 112 def scan_print(nodes) r2 = nodes.map do |x| r = if x.is_a? Rexle::Element then a = x.attributes.to_a.map do |k,v| "%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)] end tag = x.name + (a.empty? ? '' : ' ' + a.join(' ')) non_self_closing_tags = %w(script textarea iframe div object a) if (x.children and x.children.length > 0 \ and not x.children.is_an_empty_string?) or \ non_self_closing_tags.include? x.name then out = ["<%s>" % tag] out << scan_print(x.children) out << "</%s>" % x.name else out = ["<%s/>" % tag] end elsif x.is_a? String then x elsif x.is_a? Rexle::CData then x.print elsif x.is_a? Rexle::Comment then x.print end r end r2 end
# File lib/rexle.rb, line 151 def scan_to_a(nodes) nodes.inject([]) do |r,x| if x.is_a? Rexle::Element then a = [String.new(x.name), Hash.new(x.attributes), x.value.to_s] if x.cdatas.any? then a.concat x.cdatas.map {|cdata| ['![', {}, cdata] } end (a.concat(scan_to_a(x.children))) if x.children.length > 1 r << a elsif x.is_a? String then r << String.new(x) end end end