class Eco::API::UseCases::DefaultCases::AnalysePeople

Attributes

options[R]
people[R]
session[R]

Public Instance Methods

main(people, session, options, usecase) click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 7
def main(people, session, options, usecase)
  options[:end_get] = false
  @session = session; @options = options; @people = people

  case
  when case_options[:identify_duplicates]
    identify_duplicates
  when case_options[:identify_unnamed]
    identify_unnamed
  else
    session.logger.info("No analysis operation was specified")
  end.tap do |people_involved|
    if people_involved
      to_csv(people_involved) if to_csv?
      create_people_backup(people_involved) if results_people_backup?
    end
  end
end

Private Instance Methods

attribute_access(person, expression) click to toggle source

A way to use command line to specify part

> i.e. person.details

# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 183
def attribute_access(person, expression)
  parts = expression.split(".")
  parts_to_value(person, parts).tap do |value|
    unless value.is_a?(String) || !value
      raise "Something is wrong with #{expression} to parts #{parts}. Expecting String, obtained: #{value.class}"
    end
  end
end
case_options() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 146
def case_options
  options.dig(:usecase, :analyse_people) || {}
end
create_people_backup(cut = people, file = results_people_backup) click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 84
def create_people_backup(cut = people, file = results_people_backup)
  session.file_manager.save_json(cut, file)
end
csv_file() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 130
def csv_file
  case_options.dig(:csv_file)
end
facet_field() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 122
def facet_field
  case_options.dig(:facet_field)
end
facet_field?() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 126
def facet_field?
  !!facet_field
end
facet_field_proc() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 110
def facet_field_proc
  proc_value_access(facet_field)
end
field_similarity() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 101
def field_similarity
  return :name unless use_field?
  use_field_proc
end
get_attr(obj, part) click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 198
def get_attr(obj, part)
  case
  when !obj
    nil
  when part.is_a?(Symbol) || obj.respond_to?(part.to_sym)
    obj.send(part.to_sym)
  when part.start_with?(":")
    get_attr(obj, part[1..-1])
  when part.start_with?("details[")
    if (obj.respond_to?(:details)) && details = obj.details
      if match = part.match(/details\[(?<field>.*)\]/)
        details[match[:field]]
      else
        raise "Review your -use-field expression. It should read: person.details[target-alt_id]"
      end
    end
  when part.start_with?("account")
    obj.account if obj.respond_to?(:account)
  when part.start_with?("person")
    obj
  else
    raise "Review your expression. Cannot recognize '#{part}' as part of '#{obj.class}'"
  end
end
identify_duplicates() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 36
def identify_duplicates
  analysed = similarity_screening
  if case_options[:ignore_matching_words]
    puts "Fine tune results by ignoring matching words..."
    analysed = strict_similarity(analysed)
  end

  similarity_analytics.newSimilarity(analysed).tap do |related_people|
    if related_people.empty?
      session.logger.info("There were no possible duplicates identified!!")
    else
      report = similarity_analytics.report(analysed, format: :txt)
      save!(report)
    end
  end
end
identify_unnamed() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 28
def identify_unnamed
  similarity_analytics.unnamed.tap do |unnamed|
    if unnamed.empty?
      session.logger.info("There were no people with no name!!")
    end
  end
end
output_file() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 150
def output_file
  @output_file ||= options.dig(:output, :file) || "analytics.txt"
end
parts_to_value(obj, parts) click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 192
def parts_to_value(obj, parts)
  parts.reduce(obj) do |object, part|
    get_attr(object, part)
  end
end
proc_value_access(expression) click to toggle source

A way to use command line to specify part

> i.e. details AND details

# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 172
def proc_value_access(expression)
  #return expression.to_sym if expression.start_with?(":")
  subexpressions = expression.split(" AND ")
  Proc.new do |person|
    values = subexpressions.map {|exp| attribute_access(person, exp)}
    values.compact.join(" ")
  end
end
results_people_backup() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 138
def results_people_backup
  case_options.dig(:backup_people)
end
results_people_backup?() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 142
def results_people_backup?
  !!results_people_backup
end
save!(data) click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 154
def save!(data)
  ext  = File.extname(output_file).downcase.delete(".")
  session.logger.info("Generating file '#{output_file}'")
  File.open(output_file, "w") do |fd|
    if ext == "txt"
      fd << data
    elsif ext == "html"
      puts "html is still not supported"
      exit(1)
    elsif ext == "json"
      puts "json is still not supported"
      exit(1)
    end
  end
end
similarity_analytics() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 80
def similarity_analytics
  @analytics ||= people.similarity
end
similarity_screening() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 60
def similarity_screening
  similarity_analytics.attribute = field_similarity
  options = {
    threshold: 0.4,
    order:     [:average, :dice]
  }.tap do |opts|
    opts.merge!(needle_read: facet_field_proc) if facet_field?
    opts.merge!(unique_words: true) if unique_words?
  end
  analysed = similarity_analytics.analyse(**options)
  puts "Got #{analysed.count} results after basic screening with #{options}"

  return analysed if case_options[:only_screening]
  options = {threshold: 0.5, order: [:average]}
  puts "Going to rearrange results... with #{options}"
  similarity_analytics.rearrange(analysed, **options).tap do |analysed|
    puts "... got #{analysed.count} results after rearranging"
  end
end
strict_similarity(analysed) click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 53
def strict_similarity(analysed)
  similarity_analytics.ignore_matching_words(analysed, **{
    threshold: 0.5,
    order: [:ngrams]
  })
end
to_csv(data = people, file = csv_file) click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 88
def to_csv(data = people, file = csv_file)
  opts = {}
  opts.deep_merge!(export: {file: {name: file, format: :csv}})
  opts.deep_merge!(export: {options: {nice_header: true}})
  opts.deep_merge!(export: {options: {internal_names: true}})
  #opts.deep_merge!(export: {options: {split_schemas: true}})
  session.process_case("to-csv", type: :export, people: data, options: opts.merge(options.slice(:export)))
end
to_csv?() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 134
def to_csv?
  !!csv_file
end
unique_words?() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 97
def unique_words?
  case_options[:unique_words]
end
use_field() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 114
def use_field
  case_options.dig(:use_field)
end
use_field?() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 118
def use_field?
  !!use_field
end
use_field_proc() click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 106
def use_field_proc
  proc_value_access(use_field)
end