class Rexle::Element
Attributes
child_elements[R]
doc_id[R]
instructions[R]
name[RW]
parent[RW]
value[RW]
Public Class Methods
new(name=nil, value: nil, attributes: Attributes.new, rexle: self)
click to toggle source
Calls superclass method
# File lib/rexle.rb, line 334 def initialize(name=nil, value: nil, attributes: Attributes.new, rexle: self) @rexle = rexle super() @name, @attributes = name.to_s, attributes raise "Element name must not be blank" unless name @child_elements = [] self.add_text value if value end
Public Instance Methods
add(item)
click to toggle source
# File lib/rexle.rb, line 891 def add(item) if item.is_a? Rexle::Element then if self.doc_id == item.doc_id then new_item = item.deep_clone add_element new_item item.delete item = nil item = new_item new_item else add_element item end else add_element item end end
add_attribute(*x)
click to toggle source
# File lib/rexle.rb, line 920 def add_attribute(*x) proc_hash = lambda {|x| Hash[*x]} procs = { Hash: lambda {|x| x[0] || {}}, String: proc_hash, Symbol: proc_hash, :'Attributes::Value' => proc_hash } type = x[0].class.to_s.to_sym h = procs[type].call(x) @attributes.merge! h end
add_element(item)
click to toggle source
# File lib/rexle.rb, line 869 def add_element(item) if item.is_a? String then @child_elements << Value.new(item) elsif item.is_a? Rexle::CData then @child_elements << item elsif item.is_a? Rexle::Comment then @child_elements << item elsif item.is_a? Rexle::Element then @child_elements << item # add a reference from this element (the parent) to the child item.parent = self item elsif item.is_a? Rexle then self.add_element(item.root) end end
add_text(s)
click to toggle source
# File lib/rexle.rb, line 938 def add_text(s) self.child_elements << s self end
at_css(selector)
click to toggle source
# File lib/rexle.rb, line 379 def at_css(selector) self.root.element RexleCSS.new(selector).to_xpath end
attribute(key)
click to toggle source
# File lib/rexle.rb, line 944 def attribute(key) key = key.to_sym if key.is_a? String if @attributes[key].is_a? String then @attributes[key].gsub('<','<').gsub('>','>') else @attributes[key] end end
attributes()
click to toggle source
# File lib/rexle.rb, line 955 def attributes() @attributes end
backtrack(use_attributes: true)
click to toggle source
# File lib/rexle.rb, line 347 def backtrack(use_attributes: true) BacktrackXPath.new(self, use_attributes: use_attributes) end
cdata?()
click to toggle source
# File lib/rexle.rb, line 351 def cdata?() self.is_a? CData end
cdatas()
click to toggle source
# File lib/rexle.rb, line 957 def cdatas() self.children.inject([]){|r,x| x.is_a?(Rexle::CData) ? r << x.to_s : r } end
children()
click to toggle source
# File lib/rexle.rb, line 961 def children() r = @child_elements def r.is_an_empty_string?() self.length == 1 and self.first == '' end return r end
children=(a)
click to toggle source
# File lib/rexle.rb, line 972 def children=(a) @child_elements = a if a.is_a? Array end
clone()
click to toggle source
# File lib/rexle.rb, line 976 def clone() Element.new(@name, attributes: Marshal.load( Marshal.dump(@attributes))) end
Also aliased as: original_clone
contains(raw_args)
click to toggle source
# File lib/rexle.rb, line 355 def contains(raw_args) path, raw_val = raw_args.split(',',2) val = raw_val.strip[/^["']?.*["']?$/] anode = query_xpath(path) return [false] if anode.nil? or anode.empty? a = scan_contents(anode.first) r = [a.grep(/#{val.sub(/^["'](.*)["']$/,'\1')}/).length > 0] r.any? end
content(options={})
click to toggle source
# File lib/rexle.rb, line 1116 def content(options={}) CGI.unescapeHTML(xml(options)) end
count(path)
click to toggle source
# File lib/rexle.rb, line 370 def count(path) length = query_xpath(path).flatten.compact.length length end
css(selector)
click to toggle source
# File lib/rexle.rb, line 383 def css(selector) selector.split(',')\ .flat_map{|x| self.root.xpath RexleCSS.new(x).to_xpath} end
current()
click to toggle source
# File lib/rexle.rb, line 375 def current() self end
deep_clone()
click to toggle source
# File lib/rexle.rb, line 974 def deep_clone() Rexle.new(self.xml).root end
delete(obj=nil)
click to toggle source
# File lib/rexle.rb, line 980 def delete(obj=nil) if obj then if obj.is_a? String then self.xpath(obj).each {|e| e.delete; e = nil} else i = @child_elements.index(obj) [@child_elements].each{|x| x.delete_at i} if i end else self.parent.delete(self) if self.parent end end
Also aliased as: remove
doc_root()
click to toggle source
# File lib/rexle.rb, line 1016 def doc_root() @rexle.root end
each(&blk)
click to toggle source
# File lib/rexle.rb, line 1017 def each(&blk) self.children.each(&blk) end
each_recursive(&blk)
click to toggle source
# File lib/rexle.rb, line 1018 def each_recursive(&blk) recursive_scan(self.children,&blk) end
Also aliased as: traverse
element(s)
click to toggle source
# File lib/rexle.rb, line 1001 def element(s) r = self.xpath(s) r.is_a?(Array) ? r.first : r end
elements(s=nil)
click to toggle source
# File lib/rexle.rb, line 1006 def elements(s=nil) procs = { NilClass: proc {Elements.new(@child_elements\ .select{|x| x.kind_of? Rexle::Element })}, String: proc {|x| @child_elements[x]} } procs[s.class.to_s.to_sym].call(s) end
filter_xpath(raw_path, rlist=[], &blk)
click to toggle source
# File lib/rexle.rb, line 463 def filter_xpath(raw_path, rlist=[], &blk) #@log.debug 'inside filter_xpath : ' + raw_path.inspect path = String.new raw_path # is it a function fn_match = path.match(/^(\w+)\(["']?([^\)]*)["']?\)(?:\[(.*)\])?$/) #fn_match = path.match(/^(\w+)\(/) #@log.debug 'fn_match : ' + fn_match.inspect end_fn_match = path.slice!(/\[\w+\(\)\]$/) if end_fn_match then m = end_fn_match[1..-4] #@log.debug 'its a function' [method(m.to_sym).call(xpath path)] elsif (fn_match and fn_match.captures.first[/^(attribute|@)/]) procs = { Array: proc { |x| if block_given? then x.flatten(1) else rs = x.flatten rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id) end }, String: proc {|x| x}, Hash: proc {|x| x}, TrueClass: proc{|x| x}, FalseClass: proc{|x| x}, :"Rexle::Element" => proc {|x| [x]} } bucket = [] raw_results = path.split('|').map do |xp| query_xpath(xp.strip, bucket, &blk) end results = raw_results procs[results.class.to_s.to_sym].call(results) if results elsif fn_match.nil? procs = { Array: proc { |x| if block_given? then x.flatten(1) else rs = x.flatten rs.any?{|x| x == true or x == false} ? rs : rs.uniq(&:object_id) end }, String: proc {|x| x}, Hash: proc {|x| x}, TrueClass: proc{|x| x}, FalseClass: proc{|x| x}, :"Rexle::Element" => proc {|x| [x]} } bucket = [] results = if path =~ /[\[]|\(/ then raw_results = path.split(/\|/).map do |xp| query_xpath(xp.strip, bucket, &blk) end raw_results.flatten.index(true) ? [true] : [] else raw_results = path.split(/ *(?:\||\band\b) */).map do |xp| query_xpath(xp.strip, bucket, &blk) end if path =~ / and / then raw_results.flatten.select {|x| x == true or x == false} else raw_results.flatten.index(true) ? [true] : [] end end return results if !path[/[><]/] and results.any? results = raw_results # .flatten.select {|x| x} procs[results.class.to_s.to_sym].call(results) if results else m, xpath_value, index = fn_match.captures if m == 'text' then a = texts() return index ? a[index.to_i - 1].to_s : a end #@log.debug 'before function call' raw_results = xpath_value.empty? ? method(m.to_sym).call \ : method(m.to_sym).call(xpath_value) raw_results end end
has_elements?()
click to toggle source
# File lib/rexle.rb, line 1020 def has_elements?() !self.elements.empty? end
insert_after(node)
click to toggle source
# File lib/rexle.rb, line 1021 def insert_after(node) insert(node, 1) end
insert_before(node)
click to toggle source
# File lib/rexle.rb, line 1022 def insert_before(node) insert(node) end
inspect()
click to toggle source
# File lib/rexle.rb, line 912 def inspect() if self.xml.length > 30 then "%s ... </>" % self.xml[/<[^>]+>/] else self.xml end end
last(a)
click to toggle source
# File lib/rexle.rb, line 1023 def last(a) a.last end
lowercase(s)
click to toggle source
not yet implemented
# File lib/rexle.rb, line 390 def lowercase(s) end
map(&blk)
click to toggle source
# File lib/rexle.rb, line 1024 def map(&blk) self.children.map(&blk) end
max(path)
click to toggle source
# File lib/rexle.rb, line 394 def max(path) a = query_xpath(path).flatten.select{|x| x.is_a? String or x.is_a? Rexle::Element::Attribute}.map(&:to_i) a.max end
next_element()
click to toggle source
# File lib/rexle.rb, line 415 def next_element() id = self.object_id a = self.parent.elements i = a.index {|x| x.object_id == id} + 2 a[i] if i < a.length + 1 end
Also aliased as: next_sibling
not(bool)
click to toggle source
# File lib/rexle.rb, line 427 def not(bool) r = self.xpath(bool).any? !r end
plaintext()
click to toggle source
# File lib/rexle.rb, line 1026 def plaintext() CGI.unescapeHTML xml().gsub(/<\/?[^>]+>/,'').gsub(' ',' ')\ .gsub(/\n\s+/,' ') end
prepend(item)
click to toggle source
# File lib/rexle.rb, line 1120 def prepend(item) @child_elements.unshift item # add a reference from this element (the parent) to the child item.parent = self item end
previous_element()
click to toggle source
# File lib/rexle.rb, line 434 def previous_element() id = self.object_id a = self.parent.elements i = a.index {|x| x.object_id == id} a[i] if i > 0 end
Also aliased as: previous_sibling
query_xpath(raw_xpath_value, rlist=[], &blk)
click to toggle source
# File lib/rexle.rb, line 571 def query_xpath(raw_xpath_value, rlist=[], &blk) #@log.debug 'query_xpath : ' + raw_xpath_value.inspect #@log.debug '++ ' + self.xml.inspect flag_func = false xpath_value = raw_xpath_value.sub('child::','./') if xpath_value[/^[\w\/]+\s*=.*/] then flag_func = true xpath_value.sub!(/^\w+\s*=.*/,'.[\0]') xpath_value.sub!(/\/([\w]+\s*=.*)/,'[\1]') end raw_path, raw_condition = xpath_value.sub(/^\.?\/(?!\/)/,'')\ .match(/([^\[]+)(\[[^\]]+\])?/).captures remaining_path = ($').to_s #@log.debug 'remaining_path: ' + remaining_path.inspect if remaining_path[/^contains\(/] then raw_condition = raw_condition ? raw_condition + '/' + remaining_path \ : remaining_path remaining_path = '' end r = raw_path[/^([^\/]+)(?=\/\/)/,1] if r then a_path = raw_path.split(/(?=\/\/)/,2) else a_path = raw_path.split('/',2) end condition = raw_condition if a_path.length <= 1 #and not raw_condition[/^\[\w+\(.*\)\]$/] if raw_path[0,2] == '//' then s = '' elsif raw_path == 'text()' a_path.shift #return @value return self.texts else attribute = xpath_value[/^(attribute::|@)(.*)/,2] return @attributes if attribute == '*' if attribute and @attributes and \ @attributes.has_key?(attribute.to_sym) then return [Attribute.new(@attributes[attribute.to_sym])] end s = a_path.shift end # isolate the xpath to return just the path to the current element elmnt_path = s[/^([a-zA-Z:\-\*]+\[[^\]]+\])|[\/]+{,2}[^\/]+/] element_part = elmnt_path[/(^@?[^\[]+)?/,1] if elmnt_path if element_part then unless element_part[/^(@|[@\.a-zA-Z]+[\s=])/] then element_name = element_part[/^[\w:\-\*\.]+/] if element_name and element_name[/^\d/] then element_name = nil end condition = raw_xpath_value if element_name.nil? else if xpath_value[/^\[/] then condition = xpath_value element_name = nil else condition = element_part attr_search = format_condition('[' + condition + ']') #@log.debug 'attr_search : ' + attr_search.inspect return [attribute_search(attr_search, \ self, self.attributes) != nil] end end end #element_name ||= '*' raw_condition = '' if condition attr_search = format_condition(condition) if condition \ and condition.length > 0 puts ('1. attr_search: ' + attr_search.inspect).debug if $debug #@log.debug 'attr_search2 : ' + attr_search.inspect attr_search2 = xpath_value[/^\[(.*)\]$/,1] if attr_search2 then #@log.debug 'before attribute_Search' r4 = attribute_search(attr_search, self, self.attributes) return r4 end return_elements = [] if raw_path[0,2] == '//' then regex = /\[(\d+)\]$/ n = xpath_value[regex,1] xpath_value.slice!(regex) rs = scan_match(self, xpath_value).flatten.compact return n ? rs[n.to_i-1] : rs else if element_name.is_a? String then ename, raw_selector = (element_name.split('::',2)).reverse selector = case raw_selector when 'following-sibling' then 1 when 'preceding-sibling' then -1 end else ename = element_name end if ename == '..' then remaining_xpath = raw_path[/\.\.\/(.*)/,1] # select the parent element r2 = self.parent.xpath(remaining_xpath) return r2 elsif ename == '.' remaining_xpath = raw_path[1..-1] if remaining_xpath.empty? then if xpath_value.length > 0 and xpath_value =~ /\[/ then r = eval(attr_search.sub(/^h/,'self.attributes')) return self if r else return self end else return self.xpath(remaining_xpath) end elsif element_name.nil? puts ('attr_search: ' + attr_search.inspect).debug if $debug return eval attr_search else if raw_selector.nil? and ename != element_part then right_cond = element_part[/#{ename}(.*)/,1] end return_elements = @child_elements.map.with_index.select do |x, i| next unless x.is_a? Rexle::Element #x.name == ename or (ename == '*') r10 = ((x.name == ename) or (ename == '*')) end if right_cond then r12 = return_elements.map do |x, i| if x.text then r11 = eval "'%s'%s" % [x.text.to_s, right_cond] else false end end return r12 end if selector then ne = return_elements.inject([]) do |r,x| i = x.last + selector if i >= 0 then r << i else r end end return_elements = ne.map {|x| [@child_elements[x], x] if x} end end end if return_elements.length > 0 then if (a_path + [remaining_path]).join.empty? then # pass in a block to the filter if it is function contains? rlist = return_elements.map.with_index do |x,i| r5 = filter(x, i+1, attr_search, &blk) r5 end.compact rlist = rlist[0] if rlist.length == 1 else rlist << return_elements.map.with_index do |x,i| rtn_element = filter(x, i+1, attr_search) do |e| r = e.xpath(a_path.join('/') + raw_condition.to_s \ + remaining_path, &blk) r = e if r.is_a?(Array) and r.first and r.first == true \ and a_path.empty? r end next if rtn_element.nil? or (rtn_element.is_a? Array \ and rtn_element.empty?) if rtn_element.is_a? Hash then rtn_element elsif rtn_element.is_a? Array then rtn_element elsif (rtn_element.is_a? String) || (rtn_element.is_a?(Array) \ and not(rtn_element[0].is_a? String)) rtn_element elsif rtn_element.is_a? Rexle::Element rtn_element elsif rtn_element == true true end end rlist = rlist.flatten(1) unless rlist.length > 1 \ and rlist[0].is_a? Array rlist end rlist.compact! if rlist.is_a? Array else # strip off the 1st element from the XPath new_xpath = xpath_value[/^\/\/[\w:\-]+\/(.*)/,1] if new_xpath then self.xpath(new_xpath + raw_condition.to_s + remaining_path, \ rlist,&blk) end end rlist = rlist.flatten(1) unless not(rlist.is_a? Array) \ or (rlist.length > 1 and rlist[0].is_a? Array) rlist = [rlist] if rlist.is_a? Rexle::Element rlist = (rlist.length > 0 ? true : false) if flag_func == true rlist end
root()
click to toggle source
# File lib/rexle.rb, line 1031 def root() self end
text(s='')
click to toggle source
# File lib/rexle.rb, line 1033 def text(s='') return self.value if s.empty? e = self.element(s) return e if e.is_a? String e.value if e end
texts()
click to toggle source
# File lib/rexle.rb, line 1043 def texts() r = @child_elements.select do |x| x.is_a? String or x.is_a? Rexle::CData end r.map do |x| def x.unescape() s = self.to_s.clone %w(< < > > & & ' ').each_slice(2){|x| s.gsub!(*x)} s end end return r end
to_a()
click to toggle source
# File lib/rexle.rb, line 1088 def to_a() e = [String.new(self.name), Hash.new(self.attributes)] if self.cdatas.any? then e.concat self.cdatas.map {|cdata| ['![', {}, cdata] } end [*e, *scan_to_a(self.children)] end
value=(raw_s)
click to toggle source
# File lib/rexle.rb, line 1074 def value=(raw_s) val = Value.new(raw_s.to_s.clone) escape_chars = %w(& & ' ' < < > >).each_slice(2).to_a escape_chars.each{|x| val.gsub!(*x)} t = val @child_elements.any? ? @child_elements[0] = t : @child_elements << t end
Also aliased as: text=
xml(options={})
click to toggle source
# File lib/rexle.rb, line 1099 def xml(options={}) h = { Hash: lambda {|x| o = {pretty: false}.merge(x) msg = o[:pretty] == false ? :doc_print : :doc_pretty_print method(msg).call(self.children) }, String: lambda {|x| r = self.element(x) r ? r.xml : '' } } h[options.class.to_s.to_sym].call options end
Also aliased as: to_s
xpath(path, rlist=[], &blk)
click to toggle source
# File lib/rexle.rb, line 446 def xpath(path, rlist=[], &blk) #@log.debug 'inside xpath ' + path.inspect r = filter_xpath(path, rlist=[], &blk) #@log.debug 'after filter_xpath : ' + r.inspect if r.is_a?(Array) then Recordset.new(r.compact) else r end end
Private Instance Methods
attribute_search(attr_search, e, h, i=nil, &blk)
click to toggle source
# File lib/rexle.rb, line 1291 def attribute_search(attr_search, e, h, i=nil, &blk) r2 = if attr_search.is_a? Integer then block_given? ? blk.call(e) : e if i == attr_search elsif attr_search[/i\s(?:<|>|==|%)\s\d+/] and eval(attr_search) then block_given? ? blk.call(e) : e elsif h and !h.empty? and attr_search[/^h\[/] and eval(attr_search) then block_given? ? blk.call(e) : e elsif attr_search[/^\(name ==/] and e.child_elements.select {|x| next unless x.is_a? Rexle::Element name, attributes, value = x.name, x.attributes, x.value.to_s b = eval(attr_search) b}.length > 0 block_given? ? blk.call(e) : e elsif attr_search[/^\(name ==/] and eval(attr_search) block_given? ? blk.call(e) : e elsif attr_search[/^e\.value/] v = attr_search[/[^\s]+$/] duck_type = %w(to_f to_i to_s).detect {|x| v == v.send(x).to_s} attr_search.sub!(/^e.value/,'e.value.' + duck_type) if eval(attr_search) then block_given? ? blk.call(e) : e end elsif attr_search[/e\.xpath/] and eval(attr_search) block_given? ? blk.call(e) : e elsif attr_search[/^\w*\(/] and e.element(attr_search) block_given? ? blk.call(e) : e end r2 end
filter(raw_element, i, attr_search, &blk)
click to toggle source
# File lib/rexle.rb, line 1270 def filter(raw_element, i, attr_search, &blk) x, index = raw_element e = @child_elements[index] return unless e.is_a? Rexle::Element name, value = e.name, e.value if e.is_a? Rexle::Element h = x.attributes # <-- fetch the attributes if attr_search then r6 = attribute_search(attr_search,e, h, i, &blk) r6 else block_given? ? blk.call(e) : e end end
format_condition(condition)
click to toggle source
# File lib/rexle.rb, line 1146 def format_condition(condition) raw_items = condition.sub(/\[(.*)\]/,'\1').scan(/\'[^\']*\'|\"[^\"]*\"|\ and|or|\d+|[!=<>%]+|position\(\)|contains\([^\)]+\)|not\([^\)]+\)|[@\w\.\/&;]+/) if raw_items[0][/^\d+$/] then if condition[0 ] == '[' then return raw_items[0].to_i else return condition end elsif raw_items[0] == 'position()' then rrr = condition[1..-2].gsub(/position\(\)/,'i').gsub('<','<')\ .gsub('>','>').gsub(/\s=\B/,' ==').gsub(/\bmod\b/,'%') return rrr elsif raw_items[0][/^contains\(/] return raw_items[0] elsif raw_items[0][/^not\(/] return raw_items[0] else andor_items = raw_items.map.with_index\ .select{|x,i| x[/\band\b|\bor\b/]}\ .map{|x| [x.last, x.last + 1]}.flatten indices = [0] + andor_items + [raw_items.length] if raw_items[0][0] == '@' then raw_items.each{|x| x.gsub!(/^@/,'')} cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)} items = cons_items.map do |x| if x.length >= 3 then if x[0] != 'class' then x[1] = '==' if x[1] == '=' "h[:'%s'] %s %s" % x else "h[:class] and h[:class].include? %s" % x.last end else x.join[/^(and|or)$/] ? x : ("h[:'%s']" % x) end end return items.join(' ') else cons_items = indices.each_cons(2).map{|x,y| raw_items.slice(x...y)} items = cons_items.map do |x| if x.length >= 3 then x[1] = '==' if x[1] == '=' if x[0] != '.' then if x[0][/\//] then path, value = x.values_at(0,-1) if x[0][/@\w+$/] then "r = e.xpath('#{path}').first; r and r.value == #{value}" else "r = e.xpath('#{path}').first; r and r.value == #{value}" end else "(name == '%s' and value %s \"%s\")" % [x[0], x[1], \ x[2].sub(/^['"](.*)['"]$/,'\1')] end else "e.value %s %s" % [x[1], x[2]] end else x end end return items.join(' ') end end end
insert(node,offset=0)
click to toggle source
# File lib/rexle.rb, line 1133 def insert(node,offset=0) i = parent.child_elements.index(self) return unless i parent.child_elements.insert(i+offset, node) @doc_id = self.doc_root.object_id node.instance_variable_set(:@doc_id, self.doc_root.object_id) self end
recursive_scan(nodes, &blk)
click to toggle source
# File lib/rexle.rb, line 1327 def recursive_scan(nodes, &blk) nodes.each do |x| if x.is_a? Rexle::Element then blk.call(x) recursive_scan(x.children, &blk) if x.children.length > 0 end end end
scan_contents(node)
click to toggle source
used by xpath function contains()
# File lib/rexle.rb, line 1258 def scan_contents(node) a = [] a << node.text node.elements.each do |child| a.concat scan_contents(child) end a end
scan_match(node, path)
click to toggle source
# File lib/rexle.rb, line 1237 def scan_match(node, path) if path == '//' then return [node, node.text, node.elements.map {|x| scan_match x, path}] end r = [] xpath2 = path[2..-1] #jr150316 xpath2.sub!(/^\*\//,'') #jr150316xpath2.sub!(/^\*/,self.name) #jr150316xpath2.sub!(/^\w+/,'').sub!(/^\//,'') if xpath2[/^\w+/] == self.name r << node.xpath(xpath2) r << node.elements.map {|n| scan_match(n, path) if n\ .is_a? Rexle::Element} r end