module Elasticsearch::Git::Repository
Public Class Methods
For Overwrite
# File lib/elasticsearch/git/repository.rb, line 326 def self.repositories_count 10 end
# File lib/elasticsearch/git/repository.rb, line 352 def self.search(query, type: :all, page: 1, per: 20, options: {}) results = { blobs: [], commits: []} case type.to_sym when :all results[:blobs] = search_blob(query, page: page, per: per, options: options) results[:commits] = search_commit(query, page: page, per: per, options: options) when :blob results[:blobs] = search_blob(query, page: page, per: per, options: options) when :commit results[:commits] = search_commit(query, page: page, per: per, options: options) end results end
Public Instance Methods
Representation of repository as indexed json Attention: It can be very very very huge hash
# File lib/elasticsearch/git/repository.rb, line 227 def as_indexed_json(options = {}) ij = {} ij[:blobs] = index_blobs_array ij[:commits] = index_commits_array ij end
Index text-like files which size less 1.mb
# File lib/elasticsearch/git/repository.rb, line 118 def can_index_blob?(blob) blob.text? && (blob.size && blob.size.to_i < 1048576) end
# File lib/elasticsearch/git/repository.rb, line 348 def client_for_indexing @client_for_indexing ||= Elasticsearch::Client.new log: true end
# File lib/elasticsearch/git/repository.rb, line 122 def delete_from_index_blob(blob) if blob.text? begin client_for_indexing.delete \ index: "#{self.class.index_name}", type: "repository", id: "#{repository_id}_#{blob.path}" rescue Elasticsearch::Transport::Transport::Errors::NotFound return true rescue Exception => ex logger.warn "Error with remove file from index #{repository_id}_#{blob.path}. Reason: #{ex.message}" end end end
# File lib/elasticsearch/git/repository.rb, line 85 def index_blob(blob, target_sha) if can_index_blob?(blob) tries = 0 begin client_for_indexing.index \ index: "#{self.class.index_name}", type: "repository", id: "#{repository_id}_#{blob.path}", body: { blob: { type: "blob", oid: blob.id, rid: repository_id, content: blob.data, commit_sha: target_sha, path: blob.path, language: blob.language ? blob.language.name : "Text" } } rescue Exception => ex # Retry 10 times send request if tries < 10 tries += 1 sleep tries * 10 * rand(10) retry else logger.warn "Can't index #{repository_id}_#{blob.path}. Reason: #{ex.message}" end end end end
Indexing all text-like blobs in repository
All data stored in global index Repository
can be selected by ‘rid’ field If you want - this field can be used for store ‘project’ id
blob {
id - uniq id of blob from all repositories oid - blob id in repository content - blob content commit_sha - last actual commit sha
}
For search from blobs use type ‘blob’
# File lib/elasticsearch/git/repository.rb, line 64 def index_blobs(from_rev: nil, to_rev: repository_for_indexing.last_commit.oid) from, to = parse_revs(from_rev, to_rev) diff = repository_for_indexing.diff(from, to) diff.deltas.reverse.each_with_index do |delta, step| if delta.status == :deleted next if delta.old_file[:mode].to_s(8) == "160000" b = LiteBlob.new(repository_for_indexing, delta.old_file) delete_from_index_blob(b) else next if delta.new_file[:mode].to_s(8) == "160000" b = LiteBlob.new(repository_for_indexing, delta.new_file) index_blob(b, to) end # Run GC every 100 blobs ObjectSpace.garbage_collect if step % 100 == 0 end end
Indexing blob from current index
# File lib/elasticsearch/git/repository.rb, line 235 def index_blobs_array result = [] target_sha = repository_for_indexing.head.target.oid if repository_for_indexing.bare? tree = repository_for_indexing.lookup(target_sha).tree result.push(recurse_blobs_index_hash(tree)) else repository_for_indexing.index.each do |blob| b = LiteBlob.new(repository_for_indexing, blob) result.push( { type: 'blob', id: "#{target_sha}_#{b.path}", rid: repository_id, oid: b.id, content: b.data, commit_sha: target_sha } ) if b.text? end end result end
# File lib/elasticsearch/git/repository.rb, line 178 def index_commit(commit) tries = 0 begin client_for_indexing.index \ index: "#{self.class.index_name}", type: "repository", id: "#{repository_id}_#{commit.oid}", body: { commit: { type: "commit", rid: repository_id, sha: commit.oid, author: commit.author, committer: commit.committer, message: encode!(commit.message) } } rescue Exception => ex # Retry 10 times send request if tries < 10 tries += 1 sleep tries * 10 * rand(10) retry else logger.warn "Can't index #{repository_id}_#{commit.oid}. Reason: #{ex.message}" end end end
Indexing all commits in repository
All data stored in global index Repository
can be filtered by ‘rid’ field If you want - this field can be used git store ‘project’ id
commit {
sha - commit sha author { name - commit author name email - commit author email time - commit time } commiter { name - committer name email - committer email time - commit time } message - commit message
}
For search from commits use type ‘commit’
# File lib/elasticsearch/git/repository.rb, line 159 def index_commits(from_rev: nil, to_rev: repository_for_indexing.last_commit.oid) from, to = parse_revs(from_rev, to_rev) range = [from, to].reject(&:nil?).join('..') out, err, status = Open3.capture3("git log #{range} --format=\"%H\"", chdir: repository_for_indexing.path) if status.success? && err.blank? #TODO use rugged walker!!! commit_oids = out.split("\n") commit_oids.each_with_index do |commit, step| index_commit(repository_for_indexing.lookup(commit)) ObjectSpace.garbage_collect if step % 100 == 0 end return commit_oids.count end 0 end
Lookup all object ids for commit objects
# File lib/elasticsearch/git/repository.rb, line 288 def index_commits_array res = [] repository_for_indexing.each_id do |oid| obj = repository_for_indexing.lookup(oid) if obj.type == :commit res.push( { type: 'commit', sha: obj.oid, author: obj.author, committer: obj.committer, message: encode!(obj.message) } ) end end res end
# File lib/elasticsearch/git/repository.rb, line 221 def index_new_branch?(from) from == '0000000000000000000000000000000000000000' end
# File lib/elasticsearch/git/repository.rb, line 367 def logger @logger ||= Logger.new(STDOUT) end
# File lib/elasticsearch/git/repository.rb, line 207 def parse_revs(from_rev, to_rev) from = if index_new_branch?(from_rev) if to_rev == repository_for_indexing.last_commit.oid nil else merge_base(to_rev) end else from_rev end return from, to_rev end
# File lib/elasticsearch/git/repository.rb, line 331 def path_to_repo if @path_to_repo.blank? raise NotImplementedError, 'Please, define "path_to_repo" method, or set "path_to_repo" via "repository_for_indexing" method' else @path_to_repo end end
# File lib/elasticsearch/git/repository.rb, line 262 def recurse_blobs_index_hash(tree, path = "") result = [] tree.each_blob do |blob| blob[:path] = path + blob[:name] b = LiteBlob.new(repository_for_indexing, blob) result.push( { type: 'blob', id: "#{repository_for_indexing.head.target.oid}_#{path}#{blob[:name]}", rid: repository_id, oid: b.id, content: b.data, commit_sha: repository_for_indexing.head.target.oid } ) if b.text? end tree.each_tree do |nested_tree| result.push(recurse_blobs_index_hash(repository_for_indexing.lookup(nested_tree[:oid]), "#{nested_tree[:name]}/")) end result.flatten end
# File lib/elasticsearch/git/repository.rb, line 340 def repository_for_indexing(repo_path = "") return @rugged_repo_indexer if defined? @rugged_repo_indexer @path_to_repo ||= repo_path set_repository_id @rugged_repo_indexer = Rugged::Repository.new(@path_to_repo) end
For Overwrite
# File lib/elasticsearch/git/repository.rb, line 321 def repository_id @repository_id end
# File lib/elasticsearch/git/repository.rb, line 309 def search(query, type: :all, page: 1, per: 20, options: {}) options[:repository_id] = repository_id if options[:repository_id].nil? self.class.search(query, type: type, page: page, per: per, options: options) end
Repository
id used for identity data from different repositories Update this value if need
# File lib/elasticsearch/git/repository.rb, line 316 def set_repository_id id = nil @repository_id = id || path_to_repo end
Private Instance Methods
# File lib/elasticsearch/git/repository.rb, line 373 def merge_base(to_rev) head_sha = repository_for_indexing.last_commit.oid repository_for_indexing.merge_base(to_rev, head_sha) end