class VChainClient::VectorBasedDecisionAlgorithm
Constants
- CREDENTIALS_FIELD_WEIGHT
- NON_CREDENTIALS_FIELD_WEIGHT
- PROBABILITY_THRESHOLD
Public Class Methods
new(app_config)
click to toggle source
# File lib/vchain_client/decision_algos/vector_based_decision_algorithm.rb, line 13 def initialize(app_config) @config = app_config @log = Log4r::Logger["vchain_client"] end
Public Instance Methods
get_vector_weight(vector, credentials_fields)
click to toggle source
# File lib/vchain_client/decision_algos/vector_based_decision_algorithm.rb, line 19 def get_vector_weight(vector, credentials_fields) vector_weight = 0 vector.each { |field_hash, values| field = values[1] if field_hash != "names_parts" weight = values[2] if credentials_fields.include?(field) vector_weight += weight * CREDENTIALS_FIELD_WEIGHT else vector_weight += weight * NON_CREDENTIALS_FIELD_WEIGHT end end } return vector_weight end
make_decision(sent_document, res, validated_data_points)
click to toggle source
# File lib/vchain_client/decision_algos/vector_based_decision_algorithm.rb, line 39 def make_decision(sent_document, res, validated_data_points) sent_type_credentials_fields = VChainClient::Client.get_credentials_fields(sent_document["type"]) # total_response_weight = 0 # # sent_document_fully_hashed = VChainClient::Client.full_hash(sent_document) # # sent_type_credentials_fields_hashed = [] # sent_type_credentials_fields.each { |credentials_field| # sent_type_credentials_fields_hashed.push(Digest::SHA512.hexdigest(credentials_field)) # } # # validated_data_points.each { |data_point_hash, validated_data_point| # validated_data_point.each { |field_hashed, weight| # # if sent_document_fully_hashed.key?(field_hashed) || sent_type_credentials_fields_hashed.include?(field_hashed) # # if sent_type_credentials_fields_hashed.include?(field_hashed) # total_response_weight += weight * CREDENTIALS_FIELD_WEIGHT # else # total_response_weight += weight * NON_CREDENTIALS_FIELD_WEIGHT # end # # end # } # } # 1. cut non-input fields, rebuild doc_hashes # 2. build vectors out of cut documents cut_res_docs = [] vectors = [] res["docs"].each { |res_doc| cut_doc = {} vector = {} names_parts = {} if res_doc.key?("names_parts") names_parts = res_doc["names_parts"] res_doc.delete("names_parts") end full_doc_hash = VChainClient::Client.get_doc_hash(res_doc) res_doc.each { |res_doc_field, res_doc_value| if sent_document.key?(res_doc_field) || sent_type_credentials_fields.include?(res_doc_field) cut_doc[res_doc_field] = res_doc_value vector[Digest::SHA512.hexdigest(res_doc_field)] = [res_doc_value, res_doc_field, 0] end } cut_doc["doc_hash"] = VChainClient::Client.get_doc_hash(cut_doc) cut_doc["full_doc_hash"] = full_doc_hash hashed_full_doc_hash = Digest::SHA512.hexdigest(full_doc_hash); if validated_data_points.key?(hashed_full_doc_hash) data_points = validated_data_points[hashed_full_doc_hash] data_points.each { |data_point_field, data_point_value| if vector.key?(data_point_field) vector[data_point_field][2] += data_point_value end } end if !names_parts.empty? cut_doc["names_parts"] = names_parts vector["names_parts"] = names_parts end vectors.push(vector) cut_res_docs.push(cut_doc) } res["docs"] = cut_res_docs # 3. combine vectors part 1 - marking => absorb smaller, merge equal for i in 0..(vectors.length - 1) vector_i = vectors[i] for j in i+1..(vectors.length - 1) if j >= vectors.length break end vector_j = vectors[j] i_is_less_j = false j_is_less_i = false need_to_combine = true vector_i.each { |vector_i_hashed_field, vector_i_values| if vector_i_hashed_field != "names_parts" && vector_i_hashed_field != "resolutions" if !vector_j.key?(vector_i_hashed_field) j_is_less_i = true else vector_j_values = vector_j[vector_i_hashed_field] if vector_i_values[0] != vector_j_values[0] need_to_combine = false break end end end } vector_j.each { |vector_j_hashed_field, vector_j_values| if vector_j_hashed_field != "names_parts" && vector_j_hashed_field != "resolutions" if !vector_i.key?(vector_j_hashed_field) i_is_less_j = true else vector_i_values = vector_i[vector_j_hashed_field] if vector_j_values[0] != vector_i_values[0] need_to_combine = false break end end end } if need_to_combine if i_is_less_j && j_is_less_i # differs, no need to combine elsif i_is_less_j && !j_is_less_i # combine i to j if !vectors[i].key?("resolutions") vectors[i]["resolutions"] = [] end if !vectors[j].key?("resolutions") vectors[j]["resolutions"] = [] end vectors[i]["resolutions"].push(["combine_to", j]) vectors[j]["resolutions"].push(["absorb", i]) elsif !i_is_less_j && j_is_less_i # combine j to i if !vectors[i].key?("resolutions") vectors[i]["resolutions"] = [] end if !vectors[j].key?("resolutions") vectors[j]["resolutions"] = [] end vectors[j]["resolutions"].push(["combine_to", i]) vectors[i]["resolutions"].push(["absorb", j]) else # equals, combine j to i and delete j if !vectors[i].key?("resolutions") vectors[i]["resolutions"] = [] end if !vectors[j].key?("resolutions") vectors[j]["resolutions"] = [] end vectors[j]["resolutions"].push(["combine_to", i]) vectors[i]["resolutions"].push(["absorb", j]) end end end end # 4. combine vectors part 2 - resolutions execution => absorb smaller, merge equal vectors_to_remove = [] for i in 0..(vectors.length-1) vector_i = vectors[i] if vector_i.key?("resolutions") need_to_delete = false vector_i["resolutions"].each { |resolution| resolution_type = resolution[0] if resolution_type == "combine_to" vector_i["resolutions"].each { |resolution_b| if resolution_b[0] == "absorb" if !vectors[resolution[1]].key?("resolutions") vectors[resolution[1]]["resolutions"] = [] end # check for dublicates need_to_add = true vectors[resolution[1]]["resolutions"].each { |resolution_c| if resolution_c[0] == resolution_b[0] && resolution_c[1] == resolution_b[1] need_to_add = false break end } if need_to_add vectors[resolution[1]]["resolutions"].push(resolution_b) end end } need_to_delete = true end } if need_to_delete vectors_to_remove.push(i) end end end for i in 0..(vectors.length - 1) vector_i = vectors[i] if vector_i.key?("resolutions") if !vectors_to_remove.include?(i) vector_i["resolutions"].each { |resolution| if resolution[0] == "absorb" vector_j = vectors[resolution[1]] vector_j.each { |vector_j_hashed_field, vector_j_values| if vector_j_hashed_field != "names_parts" && vector_j_hashed_field != "resolutions" vectors[i][vector_j_hashed_field][2] += vector_j_values[2] end } end } vectors[i].delete("resolutions") end end end # remove marked vectors vectors_removed_number = 0 vectors_to_remove.each { |index| vectors.delete_at(index - vectors_removed_number) vectors_removed_number += 1 } total_response_weight = 0 vectors.each { |vector| vector_weight = self.get_vector_weight(vector, sent_type_credentials_fields) total_response_weight += vector_weight } diff_variants = [] vectors.each { |vector| vector_weight = self.get_vector_weight(vector, sent_type_credentials_fields) vector_probability = (vector_weight / total_response_weight) * 100 compare_document = sent_document.clone sent_names = compare_document["names"].clone compare_document.delete("names") # let's compare found vector with sent_document diff_fields = [] validated = {} # flip vector vector_fliped = {} vector.each { |hashed_vector_field, vector_values| if hashed_vector_field != "names_parts" vector_fliped[vector_values[1]] = vector_values[2] end } # all fields except of names compare_document.each { |compare_field, compared_value_hashed| compare_field_hashed = Digest::SHA512.hexdigest(compare_field) if vector.key?(compare_field_hashed) vector_values = vector[compare_field_hashed] if sent_document[vector_values[1]] != vector_values[0] diff_fields.push(compare_field) else validated[compare_field] = { "validations_weight" => vector_values[2] } end else validated[compare_field] = { "validations_weight" => 0 } end } # names surname_matched = false given_names_matched = false if vector.key?("names_parts") if vector["names_parts"].key?("surname") validated["surname"] = { "vector_values" => vector["names_parts"]["surname"], "validations_weight" => vector_fliped["surname"] } surname_matched = true else diff_fields.push("surname") end else diff_fields.push("surname") end if vector.key?("names_parts") if vector["names_parts"].key?("given_names") validated["given_names"] = { "vector_values" => vector["names_parts"]["given_names"], "validations_weight" => vector_fliped["given_names"] } given_names_matched = true else diff_fields.push("given_names") end else diff_fields.push("given_names") end if surname_matched && given_names_matched diff = sent_names - vector["names_parts"]["surname"] - vector["names_parts"]["given_names"] if diff.length > 0 diff_fields.push("surname") diff_fields.push("given_names") end end if vector_probability > PROBABILITY_THRESHOLD ## COULD MAKE A DECISION if diff_fields.length == 0 return { "decision_made" => true, "decision" => "MATCH", "validations" => validated } end return { "decision_made" => true, "decision" => "POSSIBLE_MISTAKES", "possible_mistakes" => diff_fields, "validations" => validated } end diff_variants.push(diff_fields) } ## ANALYSE DIFF_VARIANTS all_the_same = true prev_diff_fields = nil diff_variants.each { |diff_fields| if prev_diff_fields != nil if prev_diff_fields.uniq.sort != diff_fields.uniq.sort all_the_same = false break end end prev_diff_fields = diff_fields } if all_the_same return { "decision_made" => true, "decision" => "POSSIBLE_MISTAKES", "possible_mistakes" => prev_diff_fields } end return { "decision_made" => true, "decision" => "POSSIBLE_MISTAKES", "possible_mistakes" => diff_variants } end