class UncleKryon::KryonAumYearParser

Attributes

artist[RW]
exclude_album[RW]
release[RW]
title[RW]
trainers[RW]
training[RW]
training?[RW]
updated_on[R]
url[RW]

Public Class Methods

fix_kryon_year_title(year) click to toggle source
# File lib/unclekryon/parsers/kryon_aum_year_parser.rb, line 386
def self.fix_kryon_year_title(year)
  year = '2002_05' if year == '2002-2005'

  return year
end
get_kryon_year_mirrors(year) click to toggle source
# File lib/unclekryon/parsers/kryon_aum_year_parser.rb, line 392
def self.get_kryon_year_mirrors(year)
  year = fix_kryon_year_title(year)

  mirrors = {
    'original' => "https://www.kryon.com/freeAudio_folder/#{year}_freeAudio.html"
  }

  return mirrors
end
get_kryon_year_url(year,url_version=2) click to toggle source
# File lib/unclekryon/parsers/kryon_aum_year_parser.rb, line 402
def self.get_kryon_year_url(year,url_version=2)
  year = fix_kryon_year_title(year)

  return "https://www.kryon.com/freeAudio_folder/mobile_pages/#{year}_freeAudio_m.html"
end
new(title=nil,url=nil,artist=ArtistDataData.new(),training: false,train_filepath: nil, updated_on: nil,**options) click to toggle source
# File lib/unclekryon/parsers/kryon_aum_year_parser.rb, line 40
def initialize(title=nil,url=nil,artist=ArtistDataData.new(),training: false,train_filepath: nil,
      updated_on: nil,**options)
  @artist = artist
  @exclude_album = false
  @title = title
  @trainers = Trainers.new(train_filepath)
  @training = training
  @updated_on = Util.format_datetime(DateTime.now) if Util.empty_s?(updated_on)
  @url = Util.empty_s?(url) ? self.class.get_kryon_year_url(title) : url
end
parse_kryon_date(date,year=nil) click to toggle source
# File lib/unclekryon/parsers/kryon_aum_year_parser.rb, line 51
def self.parse_kryon_date(date,year=nil)
  # Don't modify args and clean them up so can use /\s/ instead of /[[:space:]]/
  date = Util.clean_data(date.clone)
  year = Util.clean_data(year.clone)

  # Fix misspellings and/or weird shortenings
  date.gsub!(/Feburary/i,'February') # "Feburary 2-13, 2017"
  date.gsub!(/SEPT(\s+|\-)/i,'Sep\1') # "SEPT 29 - OCT 9, 2017", "Sept-Oct 2015"
  date.gsub!(/Septembe\s+/i,'September ') # "Septembe 4, 2016"
  date.gsub!(/Ocotber/i,'October') # "Ocotber 10, 2015"

  comma = date.include?(',') ? ',' : '' # "May 6 2017"
  r = Array.new(2)

  begin
    if date.include?('-')
      case date
      # "Sept-Oct 2015"
      when /\A[[:alpha:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+\z/
        r[1] = Date.strptime(date,'%b-%b %Y')
        r[0] = Date.strptime(date,'%b')
        r[0] = Date.new(r[1].year,r[0].month,r[0].day)
      # "4/28/12 - 4/29/12"
      when %r{\A[[:digit:]]+\s*/\s*[[:digit:]]+\s*/\s*[[:digit:]]+\s*\-}
        date = date.split(/\s*-\s*/)

        r[0] = Date.strptime(date[0],'%m/%d/%y')
        r[1] = Date.strptime(date[1],'%m/%d/%y')
      # "10-17 to 11-18, 2012"
      when /\A[[:digit:]]+\s*\-\s*[[:digit:]]+\s+to\s+
              [[:digit:]]+\s*\-\s*[[:digit:]]+\s*,\s*
              [[:digit:]]+\z/xi
        date = date.split(/\s*to\s*/i)

        r[1] = Date.strptime(date[1],'%m-%d, %Y')
        r[0] = Date.strptime(date[0],'%m-%d')
        r[0] = Date.new(r[1].year,r[0].month,r[0].day)
      else
        case date
        # "SEPT 29 - OCT 9, 2017", "May 31-June 1, 2014"
        when /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+[\,\s]+[[:digit:]]+\z/
          date = date.gsub(/\s*\-\s*/,'-')
          r1f = "%B %d-%B %d#{comma} %Y"
        # "OCT 25 - NOV 3" (2014)
        when /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+\z/
          date = date.gsub(/\s*\-\s*/,'-')
          r1f = '%B %d-%B %d'

          if !year.nil?
            date << ", #{year}"
            r1f << ', %Y'
          end
        # "December 12-13"
        when /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\z/
          date = date.gsub(/\s*\-\s*/,'-')

          # "September 16 - 2018"
          if date =~ /-[[:digit:]]{4}\z/
            r1f = '%B %d-%Y'
          else
            r1f = '%B %d-%d'.dup

            if !year.nil?
              date << ", #{year}"
              r1f << ', %Y'
            end
          end
        # "June 30-July 1-2018"
        when /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*
                         [[:alpha:]]+\s+[[:digit:]]+\s*\-\s*
                         [[:digit:]]+\z/x
          date = date.gsub(/\s*\-\s*/,'-')
          r1f = '%B %d-%B %d-%Y'
        # "September 7 & 9-2018"
        when /\A[[:alpha:]]+\s+[[:digit:]]+\s+\&\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\z/
          date = date.gsub(/\s*\-\s*/,'-')
          r1f = '%B %d & %d-%Y'
        else
          # "OCT 27 - 28 - 29, 2017"; remove spaces around dashes
          date.gsub!(/\s+\-\s+/,'-')

          # "June 7-9-16-17" & "June 9-10-11-12"
          if date =~ /\A[[:alpha:]]+\s*[[:digit:]]+\-[[:digit:]]+\-[[:digit:]]+\-[[:digit:]]+\z/
            r1f = '%B %d-%d-%d-%d'

            if !year.nil?
              date << ", #{year}"
              r1f << ', %Y'
            end
          else
            # "MAY 15-16-17, 2017" and "January 7-8, 2017"
            r1f = (date =~ /\-.*\-/) ? "%B %d-%d-%d#{comma} %Y" : "%B %d-%d#{comma} %Y"
          end
        end

        r[1] = Date.strptime(date,r1f)
        r[0] = Date.strptime(date,'%B %d')
        r[0] = Date.new(r[1].year,r[0].month,r[0].day)
      end
    elsif date.include?('/')
      # "1/7/2012"
      if date =~ %r{\A[[:digit:]]+\s*/\s*[[:digit:]]+\s*/\s*[[:digit:]]+\z}
        date = date.gsub(/\s+/,'')

        r[0] = Date.strptime(date,'%m/%d/%Y')
        r[1] = nil
      else
        # "JULY/AUG 2017"
        r[1] = Date.strptime(date,'%b/%b %Y')
        r[0] = Date.strptime(date,'%b')
        r[0] = Date.new(r[1].year,r[0].month,r[0].day)
      end
    else
      case date
      # "April 11, 12, 2015"
      when /\A[[:alpha:]]+\s*[[:digit:]]+\s*,\s*[[:digit:]]+\s*,\s*[[:digit:]]+\z/
        r[1] = Date.strptime(date,'%B %d, %d, %Y')
        r[0] = Date.strptime(date,'%B %d')
        r[0] = Date.new(r[1].year,r[0].month,r[0].day)
      # "March, 2014"
      when /\A[[:alpha:]]+\s*,\s*[[:digit:]]+\z/
        r[0] = Date.strptime(date,'%B, %Y')
        r[1] = nil
      else
        r[0] = Date.strptime(date,"%B %d#{comma} %Y")
        r[1] = nil
      end
    end
  rescue ArgumentError => e
    Log.instance.fatal("Invalid Date: '#{date}'",error: e)
    raise
  end

  r[0] = (!r[0].nil?) ? Util.format_date(r[0]) : ''
  r[1] = (!r[1].nil?) ? Util.format_date(r[1]) : ''

  return r
end

Public Instance Methods

parse_date_cell(cells,album) click to toggle source
# File lib/unclekryon/parsers/kryon_aum_year_parser.rb, line 267
def parse_date_cell(cells,album)
  # Get url from date because sometimes there is not a topic

  return false if cells.length <= 1
  return false if (cell = cells[1]).nil?
  return false if (cell = cell.css('a')).nil?
  return false if cell.length < 1

  # For 2014 albums
  cells = cell
  cell = nil

  cells.each do |c|
    if !c.nil? && !Util.empty_s?(c.content) && !c['href'].nil?
      cell = c
      break
    end
  end

  return false if cell.nil?

  r_date = self.class.parse_kryon_date(Util.clean_data(cell.content),@title)
  album.date_begin = r_date[0]
  album.date_end = r_date[1]
  album.url = Util.clean_link(@release.url,cell['href'])

  return false if album.date_begin.empty? || album.url.empty?
  return true
end
parse_language_cell(cells,album) click to toggle source
# File lib/unclekryon/parsers/kryon_aum_year_parser.rb, line 297
def parse_language_cell(cells,album)
  return false if cells.length <= 4
  return false if (cell = cells[4]).nil?
  return false if (cell = cell.content).nil?

  cell = Util.clean_data(cell)
  # For the official site, they always have English, so add it if not present
  album.languages = Iso.languages.find_by_kryon(cell,add_english: true)

  return false if album.languages.nil? || album.languages.empty?
  return true
end
parse_location_cell(cells,album) click to toggle source
# File lib/unclekryon/parsers/kryon_aum_year_parser.rb, line 310
def parse_location_cell(cells,album)
  return false if cells.length <= 3
  return false if (cell = cells[3]).nil?
  return false if (cell = cell.content).nil?
  return false if cell =~ /[[:space:]]*RADIO[[:space:]]+SHOW[[:space:]]*/ # 2014
  return false if (cell = Util.clean_data(cell)).empty?

  album.locations = Iso.find_kryon_locations(cell)

  return false if album.locations.nil? || album.locations.empty?

  return true
end
parse_site(title=nil,url=nil,artist=nil) click to toggle source
# File lib/unclekryon/parsers/kryon_aum_year_parser.rb, line 190
def parse_site(title=nil,url=nil,artist=nil)
  @artist = artist unless artist.nil?
  @title = title unless title.nil?

  @url = Util.empty_s?(url) ? self.class.get_kryon_year_url(@title) : url

  raise ArgumentError,'Artist cannot be nil' if @artist.nil?
  raise ArgumentError,'Title cannot be empty' if @title.nil? || (@title = @title.strip).empty?
  raise ArgumentError,'URL cannot be empty' if @url.nil? || (@url = @url.strip).empty?

  @release = @artist.releases[@title]
  @trainers.load_file

  if @release.nil?
    @release = ReleaseData.new
    @release.mirrors = self.class.get_kryon_year_mirrors(@title)
    @release.title = @title
    @release.updated_on = @updated_on
    @release.url = @url

    @artist.releases[@title] = @release
  end

  doc = Nokogiri::HTML(URI(@release.url).open,nil,'utf-8') # Force utf-8 encoding
  row_pos = 1
  rows = doc.css('table tr tr')

  rows.each do |row|
    next if row.nil?
    next if (cells = row.css('td')).nil?

    album = AlbumData.new
    album.updated_on = @updated_on
    @exclude_album = false

    # There is always a date cell
    has_date_cell = parse_date_cell(cells,album)

    # Sometimes there is not a topic, location, or language cell, but not all 3!
    # - Put || last because of short-circuit ||!
    # - For some reason, "or" does not work (even though it is supposed to be non-short-circuit)
    has_other_cell = parse_topic_cell(cells,album)
    has_other_cell = parse_location_cell(cells,album) || has_other_cell
    has_other_cell = parse_language_cell(cells,album) || has_other_cell

    if !has_date_cell || !has_other_cell || @exclude_album
      # - If it doesn't have any cells, it is probably javascript or something else, so don't log it
      # - If @exclude_album, then it has already been logged, so don't log it
      if (!has_date_cell && has_other_cell) || (has_date_cell && !@exclude_album)
        log.warn("Excluding album: #{row_pos},#{album.date_begin},#{album.date_end},#{album.title}," \
                 "#{album.locations},#{album.languages}")
        row_pos += 1
      end

      next
    end

    # Is it actually old or new?
    if @artist.albums.key?(album.url) && album == @artist.albums[album.url]
      album.updated_on = @artist.albums[album.url].updated_on
    end

    album.url = Util.fix_link(album.url)

    @artist.albums[album.url] = album

    if !@release.albums.include?(album.url)
      @release.albums.push(album.url)
      @release.updated_on = @updated_on
    end

    row_pos += 1
  end

  return @release
end
parse_topic_cell(cells,album) click to toggle source
# File lib/unclekryon/parsers/kryon_aum_year_parser.rb, line 324
def parse_topic_cell(cells,album)
  return false if cells.length <= 2
  return false if (cell = cells[2]).nil?
  return false if (cell = cell.css('a')).nil?
  return false if cell.length < 1

  # For 2017 "San Jose, California (3)"
  cells = cell
  cell = nil

  cells.each do |c|
    if !c.nil? && !Util.empty_s?(c.content)
      cell = c
      break
    end
  end

  return false if cell.nil?

  album.title = Util.fix_shortwith_text(Util.clean_data(cell.content))

  exclude_topics = /
    GROUP[[:space:]]+PHOTO|
    PLEASE[[:space:]]+READ
  /ix

  if album.title =~ exclude_topics
    log.warn("Excluding album: Topic[#{album.title}]")
    @exclude_album = true
    return false
  end

  # Sometimes, the date cell's href is an image (See 2016 'Las Vegas, NV - "Numerology" - (3)')
  good_urls = /
    \.html?[[:space:]]*\z
  /ix

  date_url = album.url
  topic_url = Util.clean_link(@release.url,cell['href'])

  # Sometimes, the date cell's href is wrong (See 2016 '"Five Concepts for the New Human" (2)')
  if album.url !~ good_urls || (!Util.empty_s?(topic_url) && date_url != topic_url)
    album.url = topic_url
    log.warn("Using topic cell's href for URL: #{File.basename(date_url)}=>#{File.basename(album.url)}")

    if Util.empty_s?(album.url)
      msg = "Date and topic cells' hrefs are empty: Topic[#{album.title}]"

      if DevOpts.instance.dev?
        raise msg
      else
        log.warn(msg)
      end

      return false
    end
  end

  return false if album.title.empty?
  return true
end