class VChainClient::VectorBasedDecisionAlgorithm

Constants

CREDENTIALS_FIELD_WEIGHT
NON_CREDENTIALS_FIELD_WEIGHT
PROBABILITY_THRESHOLD

Public Class Methods

new(app_config) click to toggle source
# File lib/vchain_client/decision_algos/vector_based_decision_algorithm.rb, line 13
def initialize(app_config)
  @config = app_config

  @log = Log4r::Logger["vchain_client"]
end

Public Instance Methods

get_vector_weight(vector, credentials_fields) click to toggle source
# File lib/vchain_client/decision_algos/vector_based_decision_algorithm.rb, line 19
def get_vector_weight(vector, credentials_fields)
  vector_weight = 0

  vector.each { |field_hash, values|
    field = values[1]
    
    if field_hash != "names_parts"
      weight = values[2]

      if credentials_fields.include?(field)
        vector_weight += weight * CREDENTIALS_FIELD_WEIGHT
      else
        vector_weight += weight * NON_CREDENTIALS_FIELD_WEIGHT
      end
    end
  }

  return vector_weight
end
make_decision(sent_document, res, validated_data_points) click to toggle source
# File lib/vchain_client/decision_algos/vector_based_decision_algorithm.rb, line 39
    def make_decision(sent_document, res, validated_data_points)

      sent_type_credentials_fields = VChainClient::Client.get_credentials_fields(sent_document["type"])

#      total_response_weight = 0
#
#      sent_document_fully_hashed = VChainClient::Client.full_hash(sent_document)
#
#      sent_type_credentials_fields_hashed = []
#      sent_type_credentials_fields.each { |credentials_field|
#        sent_type_credentials_fields_hashed.push(Digest::SHA512.hexdigest(credentials_field))
#      }
#
#      validated_data_points.each { |data_point_hash, validated_data_point|
#        validated_data_point.each { |field_hashed, weight|
#
#          if sent_document_fully_hashed.key?(field_hashed) || sent_type_credentials_fields_hashed.include?(field_hashed)
#
#            if sent_type_credentials_fields_hashed.include?(field_hashed)
#              total_response_weight += weight * CREDENTIALS_FIELD_WEIGHT
#            else
#              total_response_weight += weight * NON_CREDENTIALS_FIELD_WEIGHT
#            end
#
#          end
#        }
#      }

      # 1. cut non-input fields, rebuild doc_hashes
      # 2. build vectors out of cut documents

      cut_res_docs = []

      vectors = []

      res["docs"].each { |res_doc|

        cut_doc = {}
        vector = {}

        names_parts = {}
        if res_doc.key?("names_parts")
          names_parts = res_doc["names_parts"]

          res_doc.delete("names_parts")
        end

        full_doc_hash = VChainClient::Client.get_doc_hash(res_doc)

        res_doc.each { |res_doc_field, res_doc_value|
          if sent_document.key?(res_doc_field) || sent_type_credentials_fields.include?(res_doc_field)
            cut_doc[res_doc_field] = res_doc_value
            
            vector[Digest::SHA512.hexdigest(res_doc_field)]  = [res_doc_value, res_doc_field, 0]
          end
        }

        cut_doc["doc_hash"]      = VChainClient::Client.get_doc_hash(cut_doc)

        cut_doc["full_doc_hash"] = full_doc_hash

        hashed_full_doc_hash = Digest::SHA512.hexdigest(full_doc_hash);

        if validated_data_points.key?(hashed_full_doc_hash)
          data_points = validated_data_points[hashed_full_doc_hash]

          data_points.each { |data_point_field, data_point_value|
            if vector.key?(data_point_field)
              vector[data_point_field][2] += data_point_value
            end
          }
        end

        if !names_parts.empty?
          cut_doc["names_parts"] = names_parts
          vector["names_parts"]  = names_parts
        end

        vectors.push(vector)

        cut_res_docs.push(cut_doc)
      }

      res["docs"] = cut_res_docs


      # 3. combine vectors part 1 - marking => absorb smaller, merge equal
      for i in 0..(vectors.length - 1)

        vector_i = vectors[i]
        
        for j in i+1..(vectors.length - 1)

          if j >= vectors.length
            break
          end

          vector_j = vectors[j]

          i_is_less_j = false
          j_is_less_i = false

          need_to_combine = true

          vector_i.each { |vector_i_hashed_field, vector_i_values|
            if vector_i_hashed_field != "names_parts" && vector_i_hashed_field != "resolutions"
              if !vector_j.key?(vector_i_hashed_field)
                j_is_less_i = true
              else
                vector_j_values = vector_j[vector_i_hashed_field]
                if vector_i_values[0] != vector_j_values[0]
                  need_to_combine = false
                  break
                end
              end
            end
          }

          vector_j.each { |vector_j_hashed_field, vector_j_values|
            if vector_j_hashed_field != "names_parts" && vector_j_hashed_field != "resolutions"
              if !vector_i.key?(vector_j_hashed_field)
                i_is_less_j = true
              else
                vector_i_values = vector_i[vector_j_hashed_field]
                if vector_j_values[0] != vector_i_values[0]
                  need_to_combine = false
                  break
                end
              end
            end
          }

          if need_to_combine
            if i_is_less_j && j_is_less_i
              # differs, no need to combine

            elsif i_is_less_j && !j_is_less_i
              # combine i to j
              if !vectors[i].key?("resolutions")
                vectors[i]["resolutions"] = []
              end
              if !vectors[j].key?("resolutions")
                vectors[j]["resolutions"] = []
              end
              vectors[i]["resolutions"].push(["combine_to", j])
              vectors[j]["resolutions"].push(["absorb", i])

            elsif !i_is_less_j && j_is_less_i
              # combine j to i
              if !vectors[i].key?("resolutions")
                vectors[i]["resolutions"] = []
              end
              if !vectors[j].key?("resolutions")
                vectors[j]["resolutions"] = []
              end
              vectors[j]["resolutions"].push(["combine_to", i])
              vectors[i]["resolutions"].push(["absorb", j])

            else
              # equals, combine j to i and delete j
              if !vectors[i].key?("resolutions")
                vectors[i]["resolutions"] = []
              end
              if !vectors[j].key?("resolutions")
                vectors[j]["resolutions"] = []
              end
              vectors[j]["resolutions"].push(["combine_to", i])
              vectors[i]["resolutions"].push(["absorb", j])

            end
          end
        end
      end

      # 4. combine vectors part 2 - resolutions execution => absorb smaller, merge equal
      vectors_to_remove = []

      for i in 0..(vectors.length-1)

        vector_i = vectors[i]

        if vector_i.key?("resolutions")

          need_to_delete = false

          vector_i["resolutions"].each { |resolution|
            
            resolution_type = resolution[0]

            if resolution_type == "combine_to"
              
              vector_i["resolutions"].each { |resolution_b|
                
                if resolution_b[0] == "absorb"
                  if !vectors[resolution[1]].key?("resolutions")
                    vectors[resolution[1]]["resolutions"] = []
                  end

                  # check for dublicates
                  need_to_add = true

                  vectors[resolution[1]]["resolutions"].each { |resolution_c|
                    if resolution_c[0] == resolution_b[0] && resolution_c[1] == resolution_b[1]
                      need_to_add = false
                      break
                    end
                  }

                  if need_to_add
                    vectors[resolution[1]]["resolutions"].push(resolution_b)
                  end
                end
              }

              need_to_delete = true
            end
          }

          if need_to_delete
            vectors_to_remove.push(i)
          end
        end
      end

      for i in 0..(vectors.length - 1)

        vector_i = vectors[i]

        if vector_i.key?("resolutions")
          if !vectors_to_remove.include?(i)

            vector_i["resolutions"].each { |resolution|

              if resolution[0] == "absorb"
                vector_j = vectors[resolution[1]]

                vector_j.each { |vector_j_hashed_field, vector_j_values|
                  if vector_j_hashed_field != "names_parts" && vector_j_hashed_field != "resolutions"
                    vectors[i][vector_j_hashed_field][2] += vector_j_values[2]
                  end
                }
              end
            
            }

            vectors[i].delete("resolutions")

          end
        end
      end

      # remove marked vectors
      vectors_removed_number = 0
      vectors_to_remove.each { |index|
        vectors.delete_at(index - vectors_removed_number)
        vectors_removed_number += 1
      }

      total_response_weight = 0

      vectors.each { |vector|

        vector_weight = self.get_vector_weight(vector, sent_type_credentials_fields)

        total_response_weight += vector_weight
      }

      diff_variants = []

      vectors.each { |vector|

        vector_weight = self.get_vector_weight(vector, sent_type_credentials_fields)
        vector_probability = (vector_weight / total_response_weight) * 100

        compare_document = sent_document.clone

        sent_names = compare_document["names"].clone

        compare_document.delete("names")

        # let's compare found vector with sent_document
        diff_fields = []
        validated = {}

        # flip vector
        vector_fliped = {}

        vector.each { |hashed_vector_field, vector_values|
          if hashed_vector_field != "names_parts"
            vector_fliped[vector_values[1]] = vector_values[2]
          end
        }

        # all fields except of names
        compare_document.each { |compare_field, compared_value_hashed|
          
          compare_field_hashed = Digest::SHA512.hexdigest(compare_field)

          if vector.key?(compare_field_hashed)
          
            vector_values = vector[compare_field_hashed]

            if sent_document[vector_values[1]] != vector_values[0]
              
              diff_fields.push(compare_field)
            
            else

              validated[compare_field] = {
                "validations_weight" => vector_values[2]
              }

            end

          else

            validated[compare_field] = {
              "validations_weight" => 0
            }

          end
        }

        # names

        surname_matched = false
        given_names_matched = false
        
        if vector.key?("names_parts")
          if vector["names_parts"].key?("surname")

            validated["surname"] = {
              "vector_values"      => vector["names_parts"]["surname"],
              "validations_weight" => vector_fliped["surname"]
            }

            surname_matched = true
            
          else
            diff_fields.push("surname")
          end
        else
          diff_fields.push("surname")
        end

        if vector.key?("names_parts")
          if vector["names_parts"].key?("given_names")

            validated["given_names"] = {
              "vector_values"      => vector["names_parts"]["given_names"],
              "validations_weight" => vector_fliped["given_names"]
            }

            given_names_matched = true
            
          else
            diff_fields.push("given_names")
          end
        else
          diff_fields.push("given_names")
        end

        if surname_matched && given_names_matched
          diff = sent_names - vector["names_parts"]["surname"] - vector["names_parts"]["given_names"]
          
          if diff.length > 0
            diff_fields.push("surname")
            diff_fields.push("given_names")
          end
        end

        if vector_probability > PROBABILITY_THRESHOLD
          
          ## COULD MAKE A DECISION

          if diff_fields.length == 0
            return {
              "decision_made" => true,
              "decision"      => "MATCH",
              "validations"   => validated
            }
          end

          return {
            "decision_made"     => true,
            "decision"          => "POSSIBLE_MISTAKES",
            "possible_mistakes" => diff_fields,
            "validations"       => validated
          }
        end

        diff_variants.push(diff_fields)
      }

      ## ANALYSE DIFF_VARIANTS
      all_the_same = true

      prev_diff_fields = nil

      diff_variants.each { |diff_fields|
        if prev_diff_fields != nil
          if prev_diff_fields.uniq.sort != diff_fields.uniq.sort
            all_the_same = false
            break
          end
        end

        prev_diff_fields = diff_fields
      }

      if all_the_same
        return {
          "decision_made"     => true,
          "decision"          => "POSSIBLE_MISTAKES",
          "possible_mistakes" => prev_diff_fields
        }
      end

      return {
        "decision_made"     => true,
        "decision"          => "POSSIBLE_MISTAKES",
        "possible_mistakes" => diff_variants
      }

    end