module XMLhelper

03-Apr-2021: bug fix: Using to_a a CDATA element if present is now output 20-Feb-2021: bug fix: The @instructions accessor is now ignored if nil. 11-Sep-2020: feature: Rexle::Element#text now has unescaped HTML using CGI 30-Jul-2020: minor improvement: plaintext now unescapes & to & 11-May-2020: bug fix: Rexle#css now responds correctly to valid selectors 23-Apr-2020: feature: Added public method plaintext. 04-Feb-2020: minor bug fix: Element A is now defined as a non self-closing tag 18-Sep-2019: minor bug fix: &apos is now unescaped properly 09-Jul-2019: minor improvement: A comment tag now has a

new line when pretty printed

02-Feb-2019: feature: A comment tag can now have nested elements 03-Nov-2018: feature: Debug messages can now use coloured text 02-Oct-2018: feature: Added Rexle::Elements#last 18-Jan-2018: bug fix: An Element's attributes are now cloned too 16-Sep-2017: improvement: Multiple results are now returned if the

xpath contains an *and* operator

14-Sep-2017: improvement: An and operator can now be

used between xpath statements

10-Sep-2017: bug fix: The following XPath has now been tested => //.[@id] 10-Aug-2017: feature: Rexle now has a member variable (@rexle) to keep

track of the working document when elements are passed to 
                  different documents
bug fix: Element prefixes are now only processed if they exist

13-Apr-2017: bug fix: Rexle::Elements#index was implemented which fixes the

Rexle::Element#next_sibling and Rexle::Element#previous_sibling  bugs

25-Feb-2017: improvement:

An input rexle array can now have an empty array for 
children e.g. doc = Rexle.new(["records", {}, "", []])

25-Dec-2016: revision for Ruby 2.4: Replaced Fixnum with Integer

Public Instance Methods

doc_pretty_print(children, declaration=true) click to toggle source
# File lib/rexle.rb, line 69
def doc_pretty_print(children, declaration=true)

  body = pretty_print(children,2).join

  a = self.root.attributes.to_a.map do |k,v| 
    "%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
  end
  
  ind = "\n  "   
  xml = "<%s%s>%s%s%s</%s>" % [self.root.name, a.empty? ? '' : \
    ' ' + a.join(' '), ind, body, "\n", self.root.name]

  if self.instructions and declaration then
    processing_instructions("") + xml
  else 
    xml
  end
end
doc_print(children, declaration=true) click to toggle source
# File lib/rexle.rb, line 49
def doc_print(children, declaration=true)

  body = (children.nil? or children.empty? \
         or children.is_an_empty_string? ) ? '' : \
                        scan_print(children).join.force_encoding("utf-8")

  a = self.root.attributes.to_a.map do |k,v|
    "%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v.to_s)]
  end

  xml = "<%s%s>%s</%s>" % [self.root.name, a.empty? ? '' : \
    ' ' + a.join(' '), body, self.root.name]

  if self.instructions and declaration then
    processing_instructions() + xml
  else 
    xml
  end
end
inspect() click to toggle source
# File lib/rexle.rb, line 88
def inspect()    
  "#<Rexle:%s>" % [self.object_id]
end
pretty_print(nodes, indent='0') click to toggle source
# File lib/rexle.rb, line 162
def pretty_print(nodes, indent='0')

  indent = indent.to_i
  return '' unless nodes

  nodes.select(){|x| x.to_s.strip.length > 0}
      .map.with_index do |x, i|

    if x.is_a? Rexle::Element then

      a = x.attributes.to_a.map do |k,v| 
        "%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
      end
      a ||= []

      tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
      start = i > 0 ? ("\n" + '  ' * (indent - 1)) : ''          

      if (x.value and x.value.length > 0) \
          or (x.children and x.children.length > 0 \
          and not x.children.is_an_empty_string?) or \
            x.name == 'script' or x.name == 'textarea' or \
                                                x.name == 'iframe' then

        ind1 = (x.children and x.children.grep(Rexle::Element).length > 0) ? 
          ("\n" + '  ' * indent) : ''
          
        out = ["%s<%s>%s" % [start, tag, ind1]]
        out << pretty_print(x.children, (indent + 1).to_s.clone) 
        ind2 = (ind1 and ind1.length > 0) ? ("\n" + '  ' * (indent - 1)) : ''
        out << "%s</%s>" % [ind2, x.name]            
      else

        out = ["%s<%s/>" % [start, tag]]
      end


    elsif x.is_a? String then  x.sub(/^[\n\s]+$/,'')
    elsif x.is_a? Rexle::CData then x.print        
    elsif x.is_a? Rexle::Comment then "\n" + (' ' * indent) + x.print           

    end
  end

end
processing_instructions(s='') click to toggle source
# File lib/rexle.rb, line 92
def processing_instructions(s='')
  self.instructions.map do |instruction|
    "<?%s?>\n" % instruction.join(' ') 
  end.join s
end
scan_print(nodes) click to toggle source
# File lib/rexle.rb, line 98
def scan_print(nodes)

  r2 = nodes.map do |x|
    
    r = if x.is_a? Rexle::Element then

      a = x.attributes.to_a.map do |k,v| 
        "%s='%s'" % [k,(v.is_a?(Array) ? v.join(' ') : v)]
      end

      tag = x.name + (a.empty? ? '' : ' ' + a.join(' '))
      
      non_self_closing_tags = %w(script textarea iframe div object a)

      if  (x.children and x.children.length > 0 \
          and not x.children.is_an_empty_string?) or \
            non_self_closing_tags.include? x.name then

        out = ["<%s>" % tag]
        out << scan_print(x.children)

        out << "</%s>" % x.name
      else
        out = ["<%s/>" % tag]
      end
    
    elsif x.is_a? String then  x
    elsif x.is_a? Rexle::CData then x.print        
    elsif x.is_a? Rexle::Comment then x.print        
      
    end

    r
  end
  
  r2

end
scan_to_a(nodes) click to toggle source
# File lib/rexle.rb, line 137
def scan_to_a(nodes)

  nodes.inject([]) do |r,x|

    if x.is_a? Rexle::Element then

      a = [String.new(x.name), Hash.new(x.attributes), x.value.to_s]        

      if x.cdatas.any? then        
        a.concat x.cdatas.map {|cdata| ['![', {}, cdata] }
      end        

      (a.concat(scan_to_a(x.children))) if x.children.length > 1
      r << a
    elsif x.is_a? String then

      r << String.new(x)
    end

  end

end