def parse(str)
str.gsub!(/&(\s+)/, '&\1')
doc = REXML::Document.new(str)
@xml = doc.root
@encoding = doc.encoding
@title,@link,@description,@creator = nil
@items = []
if doc.root.elements['channel'] || doc.root.elements['rss:channel']
@type = "rss"
if (e = doc.root.elements['channel/title'] ||
doc.root.elements['rss:channel/rss:title']) && e.text
@title = e.text.unescape_html.toUTF8(@encoding).rmWhiteSpace!
end
if (e = doc.root.elements['channel/link'] ||
doc.root.elements['rss:channel/rss:link']) && e.text
@link = e.text.rmWhiteSpace!
end
if (e = doc.root.elements['channel/description'] ||
doc.root.elements['rss:channel/rss:description']) && e.text
@description = e.text.toUTF8(@encoding).rmWhiteSpace!
end
if ((e = doc.root.elements['channel/dc:creator']) && e.text) ||
((e = doc.root.elements['channel/author'] ||
doc.root.elements['rss:channel/rss:author']) && e.text)
@creator = e.text.unescape_html.toUTF8(@encoding).rmWhiteSpace!
end
if doc.root.elements['channel/item']
query = 'channel/item'
elsif doc.root.elements['item']
query = 'item'
elsif doc.root.elements['rss:channel/rss:item']
query = 'rss:channel/rss:item'
else
query = 'rss:item'
end
doc.root.each_element(query) { |e| @items << RSSItem::new(e, self) }
elsif doc.root.elements['/feed']
@type = "atom"
if (e = doc.root.elements['/feed/title']) && e.text
@title = e.text.unescape_html.toUTF8(@encoding).rmWhiteSpace!
end
doc.root.each_element('/feed/link') do |e|
if e.attribute('type') and (
e.attribute('type').value == 'text/html' or
e.attribute('type').value == 'application/xhtml' or
e.attribute('type').value == 'application/xhtml+xml')
if (h = e.attribute('href')) && h
@link = h.value.rmWhiteSpace!
end
end
end
if e = doc.root.elements['/feed/info']
e = e.elements['div'] || e
@description = e.to_s.toUTF8(@encoding).rmWhiteSpace!
end
doc.root.each_element('/feed/entry') do |e|
@items << AtomItem::new(e, self)
end
else
raise UnknownFeedTypeException::new
end
end