class Eco::API::UseCases::DefaultCases::AnalysePeople
Attributes
options[R]
people[R]
session[R]
Public Instance Methods
main(people, session, options, usecase)
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 7 def main(people, session, options, usecase) options[:end_get] = false @session = session; @options = options; @people = people case when case_options[:identify_duplicates] identify_duplicates when case_options[:identify_unnamed] identify_unnamed else session.logger.info("No analysis operation was specified") end.tap do |people_involved| if people_involved to_csv(people_involved) if to_csv? create_people_backup(people_involved) if results_people_backup? end end end
Private Instance Methods
attribute_access(person, expression)
click to toggle source
A way to use command line to specify part
> i.e. person.details¶ ↑
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 183 def attribute_access(person, expression) parts = expression.split(".") parts_to_value(person, parts).tap do |value| unless value.is_a?(String) || !value raise "Something is wrong with #{expression} to parts #{parts}. Expecting String, obtained: #{value.class}" end end end
case_options()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 146 def case_options options.dig(:usecase, :analyse_people) || {} end
create_people_backup(cut = people, file = results_people_backup)
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 84 def create_people_backup(cut = people, file = results_people_backup) session.file_manager.save_json(cut, file) end
csv_file()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 130 def csv_file case_options.dig(:csv_file) end
facet_field()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 122 def facet_field case_options.dig(:facet_field) end
facet_field?()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 126 def facet_field? !!facet_field end
facet_field_proc()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 110 def facet_field_proc proc_value_access(facet_field) end
field_similarity()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 101 def field_similarity return :name unless use_field? use_field_proc end
get_attr(obj, part)
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 198 def get_attr(obj, part) case when !obj nil when part.is_a?(Symbol) || obj.respond_to?(part.to_sym) obj.send(part.to_sym) when part.start_with?(":") get_attr(obj, part[1..-1]) when part.start_with?("details[") if (obj.respond_to?(:details)) && details = obj.details if match = part.match(/details\[(?<field>.*)\]/) details[match[:field]] else raise "Review your -use-field expression. It should read: person.details[target-alt_id]" end end when part.start_with?("account") obj.account if obj.respond_to?(:account) when part.start_with?("person") obj else raise "Review your expression. Cannot recognize '#{part}' as part of '#{obj.class}'" end end
identify_duplicates()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 36 def identify_duplicates analysed = similarity_screening if case_options[:ignore_matching_words] puts "Fine tune results by ignoring matching words..." analysed = strict_similarity(analysed) end similarity_analytics.newSimilarity(analysed).tap do |related_people| if related_people.empty? session.logger.info("There were no possible duplicates identified!!") else report = similarity_analytics.report(analysed, format: :txt) save!(report) end end end
identify_unnamed()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 28 def identify_unnamed similarity_analytics.unnamed.tap do |unnamed| if unnamed.empty? session.logger.info("There were no people with no name!!") end end end
output_file()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 150 def output_file @output_file ||= options.dig(:output, :file) || "analytics.txt" end
parts_to_value(obj, parts)
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 192 def parts_to_value(obj, parts) parts.reduce(obj) do |object, part| get_attr(object, part) end end
proc_value_access(expression)
click to toggle source
A way to use command line to specify part
> i.e. details AND details¶ ↑
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 172 def proc_value_access(expression) #return expression.to_sym if expression.start_with?(":") subexpressions = expression.split(" AND ") Proc.new do |person| values = subexpressions.map {|exp| attribute_access(person, exp)} values.compact.join(" ") end end
results_people_backup()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 138 def results_people_backup case_options.dig(:backup_people) end
results_people_backup?()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 142 def results_people_backup? !!results_people_backup end
save!(data)
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 154 def save!(data) ext = File.extname(output_file).downcase.delete(".") session.logger.info("Generating file '#{output_file}'") File.open(output_file, "w") do |fd| if ext == "txt" fd << data elsif ext == "html" puts "html is still not supported" exit(1) elsif ext == "json" puts "json is still not supported" exit(1) end end end
similarity_analytics()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 80 def similarity_analytics @analytics ||= people.similarity end
similarity_screening()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 60 def similarity_screening similarity_analytics.attribute = field_similarity options = { threshold: 0.4, order: [:average, :dice] }.tap do |opts| opts.merge!(needle_read: facet_field_proc) if facet_field? opts.merge!(unique_words: true) if unique_words? end analysed = similarity_analytics.analyse(**options) puts "Got #{analysed.count} results after basic screening with #{options}" return analysed if case_options[:only_screening] options = {threshold: 0.5, order: [:average]} puts "Going to rearrange results... with #{options}" similarity_analytics.rearrange(analysed, **options).tap do |analysed| puts "... got #{analysed.count} results after rearranging" end end
strict_similarity(analysed)
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 53 def strict_similarity(analysed) similarity_analytics.ignore_matching_words(analysed, **{ threshold: 0.5, order: [:ngrams] }) end
to_csv(data = people, file = csv_file)
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 88 def to_csv(data = people, file = csv_file) opts = {} opts.deep_merge!(export: {file: {name: file, format: :csv}}) opts.deep_merge!(export: {options: {nice_header: true}}) opts.deep_merge!(export: {options: {internal_names: true}}) #opts.deep_merge!(export: {options: {split_schemas: true}}) session.process_case("to-csv", type: :export, people: data, options: opts.merge(options.slice(:export))) end
to_csv?()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 134 def to_csv? !!csv_file end
unique_words?()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 97 def unique_words? case_options[:unique_words] end
use_field()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 114 def use_field case_options.dig(:use_field) end
use_field?()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 118 def use_field? !!use_field end
use_field_proc()
click to toggle source
# File lib/eco/api/usecases/default_cases/analyse_people_case.rb, line 106 def use_field_proc proc_value_access(use_field) end