require 'csv'

namespace :gblsci do

namespace :sample_data do
  desc 'Ingests a directory of geoblacklight.json files'
  task seed: :environment do
    Dir.glob(File.join(Rails.root, 'solr', 'geoblacklight', 'example_docs', '**', '*.json')).each do |fn|
      puts "Ingesting #{fn}"
      begin
        Blacklight.default_index.connection.add(JSON.parse(File.read(fn)))
      rescue => e
        puts "Failed to ingest #{fn}: #{e.inspect}"
      end
    end
    puts 'Committing changes to Solr'
    Blacklight.default_index.connection.commit
  end
end

namespace :images do
  desc 'Harvest image for specific document'
  task :harvest_doc_id, [:doc_id] => [:environment] do |_t, args|
    GeoblacklightSidecarImages::StoreImageJob.perform_later(args[:doc_id])
  end

  desc 'Harvest all images'
  task harvest_all: :environment do
    begin
      query = '*:*'
      index = Geoblacklight::SolrDocument.index
      results = index.send_and_receive(index.blacklight_config.solr_path,
                                       q: query,
                                       fl: "*",
                                       rows: 100_000_000)
      num_found = results.response[:numFound]
      doc_counter = 0
      results.docs.each do |document|
        sleep(1)
        begin
          GeoblacklightSidecarImages::StoreImageJob.perform_later(document.id)
        rescue Blacklight::Exceptions::RecordNotFound
          next
        end
      end
    end
  end

  desc 'Hash of SolrDocumentSidecar image state counts'
  task harvest_states: :environment do
    states = [
      :initialized,
      :queued,
      :processing,
      :succeeded,
      :failed,
      :placeheld
    ]

    col_state = {}
    states.each do |state|
      sidecars = SolrDocumentSidecar.in_state(state)
      col_state[state] = sidecars.size
    end

    col_state.each do |col,state|
      puts "#{col} - #{state}"
    end
  end

  desc 'Re-queues incomplete states for harvesting'
  task harvest_retry: :environment do
    states = [
      :initialized,
      :queued,
      :processing,
      :failed,
      :placeheld
    ]

    states.each do |state|
      sidecars = SolrDocumentSidecar.in_state(state)

      puts "#{state} - #{sidecars.size}"

      sidecars.each do |sc|
        begin
          document = Geoblacklight::SolrDocument.find(sc.document_id)
          GeoblacklightSidecarImages::StoreImageJob.perform_later(document.id)
        rescue
          puts "orphaned / #{sc.document_id}"
        end
      end
    end
  end

  desc 'Write harvest state report (CSV)'
  task harvest_report: :environment do
    # Create a CSV Dump of Results
    file = "#{Rails.root}/public/#{Time.now.strftime('%Y-%m-%d_%H-%M-%S')}.sidecar_report.csv"

    sidecars = SolrDocumentSidecar.all

    CSV.open(file, 'w') do |writer|
      header = [
        "Sidecar ID",
        "Document ID",
        "Current State",
        "Doc Data Type",
        "Doc Title",
        "Doc Institution",
        "Error",
        "Viewer Protocol",
        "Image URL",
        "GBLSI Thumbnail URL"
      ]

      writer << header

      sidecars.each do |sc|
        cat = CatalogController.new
        begin
          document = Geoblacklight::SolrDocument.find(sc.document_id)
          writer << [
            sc.id,
            sc.document_id,
            sc.image_state.current_state,
            document._source['layer_geom_type_s'],
            document._source['dc_title_s'],
            document._source['dct_provenance_s'],
            sc.image_state.last_transition.metadata['exception'],
            sc.image_state.last_transition.metadata['viewer_protocol'],
            sc.image_state.last_transition.metadata['image_url'],
            sc.image_state.last_transition.metadata['gblsi_thumbnail_uri']
          ]
        rescue Exception => e
          puts "Exception: #{e.inspect}"
          puts "orphaned / #{sc.document_id}"
          next
        end
      end
    end
  end

  desc 'Destroy all harvested images and sidecar AR objects'
  task harvest_purge_all: :environment do
    # Remove all images
    sidecars = SolrDocumentSidecar.all
    sidecars.each do |sc|
      sc.image.purge
    end

    # Delete all Transitions and Sidecars
    SidecarImageTransition.destroy_all
    SolrDocumentSidecar.destroy_all
  end

  desc 'Destroy orphaned images and sidecar AR objects'
  # When a SolrDocumentSidecar AR object exists,
  # but it's corresponding SolrDocument is no longer in the Solr index.
  task harvest_purge_orphans: :environment do
    # Remove all images
    sidecars = SolrDocumentSidecar.all
    sidecars.each do |sc|
      begin
        document = Geoblacklight::SolrDocument.find(sc.document_id)
      rescue
        sc.destroy
        puts "orphaned / #{sc.document_id} / destroyed"
      end
    end
  end

  desc 'Destroy select sidecar AR objects by CSV file'
  task harvest_destroy_batch: :environment do
    # Expects a CSV file in Rails.root/tmp/destroy_batch.csv
    #
    # From your local machine, copy it up to production server like this:
    # scp destroy_batch.csv swadm@geoprod:/swadm/var/www/geoblacklight/current/tmp/
    CSV.foreach("#{Rails.root}/tmp/destroy_batch.csv", headers: true) do |row|
      sc = SolrDocumentSidecar.find_by(:document_id => row[0])
      sc.destroy
      puts "document_id - #{row[0]} - destroyed"
    end
  end

  desc 'Inspect failed state objects'
  task harvest_failed_state_inspect: :environment do
    states = [
      :failed
    ]

    states.each do |state|
      sidecars = SolrDocumentSidecar.in_state(state).each do |sc|
        puts "#{state} - #{sc.document_id} - #{sc.image_state.last_transition.metadata.inspect}"
      end
    end
  end
end

end