class Pluto::Model::Feed

Constants

FIX_DATE_SLUG_RE

try to get date from slug in url

e.g. /news/2019-10-17-growing-ruby-together

Public Class Methods

latest() click to toggle source
# File lib/pluto/models/feed.rb, line 32
def self.latest
  # note: order by first non-null datetime field
  #   coalesce - supported by sqlite (yes), postgres (yes)

  # note: if not updated or published use hardcoded 1970-01-01 for now
  ## was: order( "coalesce(updated,published,'1970-01-01') desc" )
  order( Arel.sql( "coalesce(feeds.items_last_updated,'1970-01-01') desc" ) )
end

Public Instance Methods

data() click to toggle source

use a different name for data - why? why not?

e.g. inner, internal, readonly or r, raw, table, direct, or ???
# File lib/pluto/models/feed.rb, line 123
def data()   @data ||= Data.new( self ); end
date() click to toggle source
add convenience date attribute helpers / readers
- what to return if date is nil? - return nil or empty string or 'n/a' or '?' - why? why not?

date date_iso | date_iso8601 date_822 | date_rfc2822 | date_rfc822

# File lib/pluto/models/feed.rb, line 93
def date()        updated; end
date_822() click to toggle source
# File lib/pluto/models/feed.rb, line 98
def date_822()    date ? date.rfc822 : ''; end
Also aliased as: date_rfc2822, date_rfc822
date_iso() click to toggle source
# File lib/pluto/models/feed.rb, line 95
def date_iso()    date ? date.iso8601 : ''; end
Also aliased as: date_iso8601
date_iso8601()
Alias for: date_iso
date_rfc2822()
Alias for: date_822
date_rfc822()
Alias for: date_822
debug?() click to toggle source

logging w/ ActiveRecord

todo/check: check if logger instance method is present by default?
  only class method present?
what's the best way to add logging to activerecord (use "builtin" machinery??)
# File lib/pluto/models/feed.rb, line 16
def debug?()  Pluto.config.debug?;  end
deep_update_from_struct!( data ) click to toggle source
# File lib/pluto/models/feed.rb, line 126
def deep_update_from_struct!( data )

  logger = LogUtils::Logger.root

  ## note: handle case with empty feed, that is, feed with NO items / entries
  ##                                                    (e.g. data.items.size == 0).
  if data.items.size > 0

    #####
    ## apply some fix-up for "broken" feed data
    fix_dates( data )


    ######
    ## check for filters (includes/excludes) if present
    ##  for now just check for includes
    ##
    if includes.present?
      includesFilter = FeedFilter::IncludeFilters.new( includes )
    else
      includesFilter = nil
    end

    data.items.each do |item|
      if includesFilter && includesFilter.match_item?( item ) == false
        logger.info "** SKIPPING | #{item.title}"
        logger.info "  no include terms match: #{includes}"
        next   ## skip to next item
      end

      item_rec = Item.find_by_guid( item.guid )
      if item_rec.nil?
        item_rec  = Item.new
        logger.info "** NEW | #{item.title}"
      else
        ## todo: check if any attribs changed
        logger.info "UPDATE | #{item.title}"
      end

      item_rec.feed_id = id        # feed_rec.id - add feed_id fk_ref
      item_rec.fetched = fetched   # feed_rec.fetched

      item_rec.update_from_struct!( item )
    end  # each item


    ###
    #  delete (old) feed items if no longer in feed AND
    #   date range is in (lastest/current) feed list
    #
    #  thanks to Harry Wood
    #   see https://github.com/feedreader/pluto/pull/16
    #    for more comments

    #  todo/fix: use a delete feature/command line flag to make it optional - why? why not?

    guids_in_feed = data.items.map {|item| item.guid }
    earliest_still_in_feed = data.items.min_by {|item| item.published }.published

    items_no_longer_present =
      Item
        .where(feed_id: id)
        .where.not(published: nil)
        .where("published > ?", earliest_still_in_feed)
        .where.not(guid: guids_in_feed)

    unless items_no_longer_present.empty?
      logger.info "#{items_no_longer_present.size} items no longer present in the feed (presumed removed at source). Deleting from planet db"
      items_no_longer_present.each do |item|
        logger.info "** DELETE | #{item.title}"
        item.destroy
      end
    end


    #  update  cached value last published for item
    ##  todo/check: force reload of items - why? why not??
    last_item_rec = items.latest.limit(1).first  # note limit(1) will return relation/arrar - use first to get first element or nil from ary
    if last_item_rec
      if last_item_rec.updated?   ## note: checks for updated & published with attr_reader_w_fallback
        self.items_last_updated = last_item_rec.updated
        ## save!  ## note: will get save w/ update_from_struct!  - why? why not??
      else
        ## skip - no updated / published present
      end
    end
  end  # check for if data.items.size > 0  (that is, feed has feed items/entries)

  update_from_struct!( data )
end
feed_url() click to toggle source
# File lib/pluto/models/feed.rb, line 69
def feed_url()   read_attribute_w_fallbacks( :feed_url, :auto_feed_url ); end
feed_url?() click to toggle source
# File lib/pluto/models/feed.rb, line 73
def feed_url?()  feed_url.present?;  end
fix_dates( data ) click to toggle source

helpers to fix-up some “broken” feed data

# File lib/pluto/models/feed.rb, line 230
def fix_dates( data )

  ## check for missing / no dates
  ##   examples
  ##    - rubytogether feed @ https://rubytogether.org/news.xml
  data.items.each do |item|
    if item.updated.nil?  &&
       item.published.nil?
        ## try to get date from slug in url
        ##  e.g. /news/2019-10-17-growing-ruby-together
        if (m=FIX_DATE_SLUG_RE.match( item.url ))
          ## todo/fix: make sure DateTime gets utc (no timezone/offset +000)
          published = DateTime.new( m[:year].to_i(10),
                                    m[:month].to_i(10),
                                    m[:day].to_i(10) )
          item.published_local  = published
          item.published        = published
        end
    end
  end


  ## check if all updated dates are the same (uniq count is 1)
  ##   AND if all published dates are present
  ##  than assume "fake" updated dates and nullify updated dates
  ##   example real-world "messed-up" feeds include:
  ##   -  https://bundler.io/blog/feed.xml
  ##   -  https://dry-rb.org/feed.xml
  ##
  ##  todo/check - limit to atom feed format only - why? why not?

  count           = data.items.size
  count_published = data.items.reduce( 0 ) {|sum,item| sum += 1  if item.published; sum }

  if count == count_published
    uniq_count_updated  = 0
    last_updated        = nil

    data.items.each do |item|
      uniq_count_updated += 1   if item.updated != last_updated
      last_updated = item.updated
    end

    if uniq_count_updated == 1
      puts "bingo!! nullify all updated dates"
      ## todo/fix: log report updated date fix!!!!
      data.items.each do |item|
        item.updated       = nil
        item.updated_local = nil
      end
    end
  end
end
published() click to toggle source
# File lib/pluto/models/feed.rb, line 80
def published()  read_attribute_w_fallbacks( :published, :updated, );   end
published?() click to toggle source
# File lib/pluto/models/feed.rb, line 83
def published?() published.present?;  end
title() click to toggle source
# File lib/pluto/models/feed.rb, line 68
def title()      read_attribute_w_fallbacks( :title,    :auto_title );    end
title?() click to toggle source
# File lib/pluto/models/feed.rb, line 72
def title?()     title.present?;     end
update_from_struct!( data ) click to toggle source
# File lib/pluto/models/feed.rb, line 285
  def update_from_struct!( data )
    logger = LogUtils::Logger.root

##
# todo:
##  strip all tags from summary (subtitle)
##  limit to 255 chars
## e.g. summary (subtitle) such as this exist
##  This is a low-traffic announce-only list for people interested
##  in hearing news about Polymer (<a href="http://polymer-project.org">http://polymer-project.org</a>).
## The higher-traffic mailing list for all kinds of discussion is
##  <a href="https://groups.google.com/group/polymer-dev">https://groups.google.com/group/polymer-dev</a>

    feed_attribs = {
        format:         data.format,
        updated:        data.updated,
        published:      data.published,
        summary:        data.summary,
        generator:      data.generator.to_s,    ## note: use single-line/string generator stringified -- might return null (if no data)

        ## note: always auto-update auto_* fields for now
        auto_title:     data.title,
        auto_url:       data.url,
        auto_feed_url:  data.feed_url,
      }

    if debug?
        ## puts "*** dump feed_attribs:"
        ## pp feed_attribs
        logger.debug "*** dump feed_attribs w/ class types:"
        feed_attribs.each do |key,value|
          logger.debug "  #{key}: >#{value}< : #{value.class.name}"
        end
    end

    update!( feed_attribs )
  end
updated() click to toggle source

note:

only use fallback for updated, that is, updated (or published)
 ~~do NOT use fallback for published / created    -- why? why not?~~
 add items_last_updated  to updated as last fall back - why? why not?
# File lib/pluto/models/feed.rb, line 79
def updated()    read_attribute_w_fallbacks( :updated,   :published );  end
updated?() click to toggle source
# File lib/pluto/models/feed.rb, line 82
def updated?()   updated.present?;  end
url() click to toggle source

attributes with fallbacks or (auto-)backups - use feed.data.<attribute> for “raw” / “original” access

# File lib/pluto/models/feed.rb, line 67
def url()        read_attribute_w_fallbacks( :url,      :auto_url );      end
url?() click to toggle source
# File lib/pluto/models/feed.rb, line 71
def url?()       url.present?;       end