module Feedstock
Constants
- VERSION
Public Class Methods
feed(url, rules, format = :html, template_file = "
click to toggle source
# File lib/feedstock.rb, line 10 def feed(url, rules, format = :html, template_file = "#{__dir__}/../default.xml") page = download_page url, format rules = normalise_rules rules info = extract_info page, rules entries = extract_entries page, rules create_feed info, entries, template_file end
Private Class Methods
create_feed(info, entries, template_file)
click to toggle source
# File lib/feedstock.rb, line 20 def create_feed(info, entries, template_file) template = ERB.new File.read(template_file), trim_mode: "-" template.result_with_hash info: info, entries: entries end
download_page(url, format)
click to toggle source
# File lib/feedstock.rb, line 25 def download_page(url, format) case format when :html Nokogiri::HTML URI.open(url) when :xml Nokogiri::XML URI.open(url) else raise "Format not recognised" end end
extract_content(node, rule)
click to toggle source
# File lib/feedstock.rb, line 36 def extract_content(node, rule) case rule[:content] in { attribute: attribute } node[attribute] in "inner_html" node.inner_html in "html" | "xml" node.to_s else node.content.strip end end
extract_entries(page, rules)
click to toggle source
# File lib/feedstock.rb, line 49 def extract_entries(page, rules) if rules[:entries] extract_entries_wrapped page, rules else extract_entries_unwrapped page, rules end end
extract_entries_unwrapped(page, rules)
click to toggle source
# File lib/feedstock.rb, line 57 def extract_entries_unwrapped(page, rules) static = Hash.new entries = Array.new rules[:entry].each do |name, rule| if rule[:literal] static[name.to_s] = rule[:literal] elsif rule[:repeat] static[name.to_s] = format_content page.at_css(rule[:path]), rule else page.css(rule[:path]).each.with_index do |match, i| entries[i] = Hash.new if entries[i].nil? entries[i].merge!({ name.to_s => format_content(match, rule) }) end end end unless static.empty? entries.each{ |entry| entry.merge!(static) } end entries end
extract_entries_wrapped(page, rules)
click to toggle source
# File lib/feedstock.rb, line 81 def extract_entries_wrapped(page, rules) entries = Array.new page.css(rules[:entries][:path]).each.with_index do |node, i| rules[:entry].each do |name, rule| entries[i] = Hash.new if entries[i].nil? content = if rule[:literal] rule[:literal] elsif rule[:repeat] format_content page.at_css(rule[:path]), rule else format_content node.at_css(rule[:path]), rule end entries[i].merge!({ name.to_s => content }) end end return entries unless rules[:entries][:filter].is_a? Proc entries.filter(&rules[:entries][:filter]) end
extract_info(page, rules)
click to toggle source
# File lib/feedstock.rb, line 106 def extract_info(page, rules) info = Hash.new rules[:info].each do |name, rule| if rule[:literal] info[name.to_s] = rule[:literal] else info[name.to_s] = format_content page.at_css(rule[:path]), rule end end info end
format_content(match, rule)
click to toggle source
# File lib/feedstock.rb, line 120 def format_content(match, rule) return "" if match.nil? text = extract_content match, rule processed = process_content text, rule wrapped = wrap_content processed, rule case rule[:type] when "cdata" "<![CDATA[#{wrapped}]]>" when "datetime" "#{Timeliness.parse(wrapped)&.iso8601}" else wrapped end end
normalise_rules(rules)
click to toggle source
# File lib/feedstock.rb, line 137 def normalise_rules(rules) rules.keys.each do |category| case category when :info, :entry rules[category].each do |name, rule| rules[category][name] = { :path => rule } unless rule.is_a? Hash end when :entries rule = rules[category] rules[category] = { :path => rule } unless rule.is_a? Hash end end rules end
process_content(content, rule)
click to toggle source
# File lib/feedstock.rb, line 153 def process_content(content, rule) if rule[:processor] rule[:processor].call content, rule else content end end
wrap_content(content, rule)
click to toggle source
# File lib/feedstock.rb, line 161 def wrap_content(content, rule) return content unless rule[:prepend] || rule[:append] "#{rule[:prepend]}#{content}#{rule[:append]}" end