class Dependabot::FileFetchers::Base

Constants

CLIENT_NOT_FOUND_ERRORS

Attributes

credentials[R]
repo_contents_path[R]
source[R]

Public Class Methods

new(source:, credentials:, repo_contents_path: nil) click to toggle source

Creates a new FileFetcher for retrieving `DependencyFile`s.

Files are typically grabbed individually via the source's API. repo_contents_path is an optional empty directory that will be used to clone the entire source repository on first read.

If provided, file data will be loaded from the clone. Submodules and directory listings are not currently supported by repo_contents_path and still use an API trip.

# File lib/dependabot/file_fetchers/base.rb, line 45
def initialize(source:, credentials:, repo_contents_path: nil)
  @source = source
  @credentials = credentials
  @repo_contents_path = repo_contents_path
  @linked_paths = {}
end
required_files_in?(_filename_array) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 28
def self.required_files_in?(_filename_array)
  raise NotImplementedError
end
required_files_message() click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 32
def self.required_files_message
  raise NotImplementedError
end

Public Instance Methods

clone_repo_contents() click to toggle source

Returns the path to the cloned repo

# File lib/dependabot/file_fetchers/base.rb, line 81
def clone_repo_contents
  @clone_repo_contents ||=
    _clone_repo_contents(target_directory: repo_contents_path)
rescue Dependabot::SharedHelpers::HelperSubprocessFailed
  raise Dependabot::RepoNotFound, source
end
commit() click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 68
def commit
  return source.commit if source.commit

  branch = target_branch || default_branch_for_repo

  @commit ||= client_for_provider.fetch_commit(repo, branch)
rescue *CLIENT_NOT_FOUND_ERRORS
  raise Dependabot::BranchNotFound, branch
rescue Octokit::Conflict => e
  raise unless e.message.include?("Repository is empty")
end
directory() click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 56
def directory
  Pathname.new(source.directory || "/").cleanpath.to_path
end
files() click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 64
def files
  @files ||= fetch_files
end
repo() click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 52
def repo
  source.repo
end
target_branch() click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 60
def target_branch
  source.branch
end

Private Instance Methods

_azure_repo_contents(path, commit) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 308
def _azure_repo_contents(path, commit)
  response = azure_client.fetch_repo_contents(commit, path)

  response.map do |entry|
    type = case entry.fetch("gitObjectType")
           when "blob" then "file"
           when "tree" then "dir"
           else entry.fetch("gitObjectType")
           end

    OpenStruct.new(
      name: File.basename(entry.fetch("relativePath")),
      path: entry.fetch("relativePath"),
      type: type,
      size: entry.fetch("size")
    )
  end
end
_bitbucket_repo_contents(repo, path, commit) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 327
def _bitbucket_repo_contents(repo, path, commit)
  response = bitbucket_client.fetch_repo_contents(
    repo,
    commit,
    path
  )

  response.map do |file|
    type = case file.fetch("type")
           when "commit_file" then "file"
           when "commit_directory" then "dir"
           else file.fetch("type")
           end

    OpenStruct.new(
      name: File.basename(file.fetch("path")),
      path: file.fetch("path"),
      type: type,
      size: file.fetch("size", 0)
    )
  end
end
_build_github_file_struct(file) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 277
def _build_github_file_struct(file)
  OpenStruct.new(
    name: file.name,
    path: file.path,
    type: file.type,
    sha: file.sha,
    size: file.size
  )
end
_clone_repo_contents(target_directory:) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 498
      def _clone_repo_contents(target_directory:)
        SharedHelpers.with_git_configured(credentials: credentials) do
          path = target_directory || File.join("tmp", source.repo)
          # Assume we're retrying the same branch, or that a `target_directory`
          # is specified when retrying a different branch.
          return path if Dir.exist?(File.join(path, ".git"))

          FileUtils.mkdir_p(path)
          br_opt = " --branch #{source.branch} --single-branch" if source.branch
          SharedHelpers.run_shell_command(
            <<~CMD
              git clone --no-tags --no-recurse-submodules --depth 1#{br_opt} #{source.url} #{path}
            CMD
          )
          path
        end
      end
_cloned_repo_contents(relative_path) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 229
def _cloned_repo_contents(relative_path)
  repo_path = File.join(clone_repo_contents, relative_path)
  return [] unless Dir.exist?(repo_path)

  Dir.entries(repo_path).map do |name|
    next if [".", ".."].include?(name)

    absolute_path = File.join(repo_path, name)
    type = if File.symlink?(absolute_path)
             "symlink"
           elsif Dir.exist?(absolute_path)
             "dir"
           else
             "file"
           end

    OpenStruct.new(
      name: name,
      path: Pathname.new(File.join(relative_path, name)).cleanpath.to_path,
      type: type,
      size: 0 # NOTE: added for parity with github contents API
    )
  end.compact
end
_codecommit_repo_contents(repo, path, commit) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 350
def _codecommit_repo_contents(repo, path, commit)
  response = codecommit_client.fetch_repo_contents(
    repo,
    commit,
    path
  )

  response.files.map do |file|
    OpenStruct.new(
      name: File.basename(file.relative_path),
      path: file.relative_path,
      type: "file",
      size: 0 # file size would require new api call per file..
    )
  end
end
_fetch_file_content(path, fetch_submodules: false) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 392
def _fetch_file_content(path, fetch_submodules: false)
  path = path.gsub(%r{^/*}, "")

  provider, repo, path, commit =
    _full_specification_for(path, fetch_submodules: fetch_submodules).
    values_at(:provider, :repo, :path, :commit)

  _fetch_file_content_fully_specified(provider, repo, path, commit)
rescue *CLIENT_NOT_FOUND_ERRORS
  retrying ||= false

  raise unless fetch_submodules && !retrying && !_linked_dir_for(path)

  _find_linked_dirs(path)
  raise unless _linked_dir_for(path)

  retrying = true
  retry
end
_fetch_file_content_from_github(path, repo, commit) click to toggle source

rubocop:disable Metrics/AbcSize

# File lib/dependabot/file_fetchers/base.rb, line 430
def _fetch_file_content_from_github(path, repo, commit)
  tmp = github_client.contents(repo, path: path, ref: commit)

  raise Octokit::NotFound if tmp.is_a?(Array)

  if tmp.type == "symlink"
    @linked_paths[path] = {
      repo: repo,
      provider: "github",
      commit: commit,
      path: Pathname.new(tmp.target).cleanpath.to_path
    }
    tmp = github_client.contents(
      repo,
      path: Pathname.new(tmp.target).cleanpath.to_path,
      ref: commit
    )
  end

  Base64.decode64(tmp.content).force_encoding("UTF-8").encode
rescue Octokit::Forbidden => e
  raise unless e.message.include?("too_large")

  # Fall back to Git Data API to fetch the file
  prefix_dir = directory.gsub(%r{(^/|/$)}, "")
  dir = File.dirname(path).gsub(%r{^/?#{Regexp.escape(prefix_dir)}/?}, "")
  basename = File.basename(path)
  file_details = repo_contents(dir: dir).find { |f| f.name == basename }
  raise unless file_details

  tmp = github_client.blob(repo, file_details.sha)
  return tmp.content if tmp.encoding == "utf-8"

  Base64.decode64(tmp.content).force_encoding("UTF-8").encode
end
_fetch_file_content_fully_specified(provider, repo, path, commit) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 412
def _fetch_file_content_fully_specified(provider, repo, path, commit)
  case provider
  when "github"
    _fetch_file_content_from_github(path, repo, commit)
  when "gitlab"
    tmp = gitlab_client.get_file(repo, path, commit).content
    Base64.decode64(tmp).force_encoding("UTF-8").encode
  when "azure"
    azure_client.fetch_file_contents(commit, path)
  when "bitbucket"
    bitbucket_client.fetch_file_contents(repo, commit, path)
  when "codecommit"
    codecommit_client.fetch_file_contents(repo, commit, path)
  else raise "Unsupported provider '#{source.provider}'."
  end
end
_fetch_repo_contents(path, fetch_submodules: false, raise_errors: true) click to toggle source

INTERNAL METHODS (not for use by sub-classes) #

# File lib/dependabot/file_fetchers/base.rb, line 172
def _fetch_repo_contents(path, fetch_submodules: false,
                         raise_errors: true)
  path = path.gsub(" ", "%20")
  provider, repo, tmp_path, commit =
    _full_specification_for(path, fetch_submodules: fetch_submodules).
    values_at(:provider, :repo, :path, :commit)

  _fetch_repo_contents_fully_specified(provider, repo, tmp_path, commit)
rescue *CLIENT_NOT_FOUND_ERRORS
  result = raise_errors ? -> { raise } : -> { [] }
  retrying ||= false

  # If the path changes after calling _fetch_repo_contents_fully_specified
  # it's because we've found a sub-module (and are fetching them). Trigger
  # a retry to get its contents.
  updated_path =
    _full_specification_for(path, fetch_submodules: fetch_submodules).
    fetch(:path)
  retry if updated_path != tmp_path

  return result.call unless fetch_submodules && !retrying

  _find_linked_dirs(path)
  return result.call unless _linked_dir_for(path)

  retrying = true
  retry
end
_fetch_repo_contents_fully_specified(provider, repo, path, commit) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 201
def _fetch_repo_contents_fully_specified(provider, repo, path, commit)
  case provider
  when "github"
    _github_repo_contents(repo, path, commit)
  when "gitlab"
    _gitlab_repo_contents(repo, path, commit)
  when "azure"
    _azure_repo_contents(path, commit)
  when "bitbucket"
    _bitbucket_repo_contents(repo, path, commit)
  when "codecommit"
    _codecommit_repo_contents(repo, path, commit)
  else raise "Unsupported provider '#{provider}'."
  end
end
_find_linked_dirs(path) click to toggle source

Update the @linked_paths hash by exploiting a side-effect of recursively calling `repo_contents` for each directory up the tree until a submodule or symlink is found

# File lib/dependabot/file_fetchers/base.rb, line 477
def _find_linked_dirs(path)
  path = Pathname.new(path).cleanpath.to_path.gsub(%r{^/*}, "")
  dir = File.dirname(path)

  return if [directory, "."].include?(dir)

  repo_contents(
    dir: dir,
    ignore_base_directory: true,
    fetch_submodules: true,
    raise_errors: false
  )
end
_full_specification_for(path, fetch_submodules:) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 367
def _full_specification_for(path, fetch_submodules:)
  if fetch_submodules && _linked_dir_for(path)
    linked_dir_details = @linked_paths[_linked_dir_for(path)]
    sub_path =
      path.gsub(%r{^#{Regexp.quote(_linked_dir_for(path))}(/|$)}, "")
    new_path =
      Pathname.new(File.join(linked_dir_details.fetch(:path), sub_path)).
      cleanpath.to_path.
      gsub(%r{^/}, "")
    {
      repo: linked_dir_details.fetch(:repo),
      commit: linked_dir_details.fetch(:commit),
      provider: linked_dir_details.fetch(:provider),
      path: new_path
    }
  else
    {
      repo: source.repo,
      path: path,
      commit: commit,
      provider: source.provider
    }
  end
end
_github_repo_contents(repo, path, commit) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 217
def _github_repo_contents(repo, path, commit)
  path = path.gsub(" ", "%20")
  github_response = github_client.contents(repo, path: path, ref: commit)

  if github_response.respond_to?(:type)
    update_linked_paths(repo, path, commit, github_response)
    raise Octokit::NotFound
  end

  github_response.map { |f| _build_github_file_struct(f) }
end
_gitlab_repo_contents(repo, path, commit) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 287
def _gitlab_repo_contents(repo, path, commit)
  gitlab_client.
    repo_tree(repo, path: path, ref_name: commit, per_page: 100).
    map do |file|
      # GitLab API essentially returns the output from `git ls-tree`
      type = case file.type
             when "blob" then "file"
             when "tree" then "dir"
             when "commit" then "submodule"
             else file.fetch("type")
             end

      OpenStruct.new(
        name: file.name,
        path: file.path,
        type: type,
        size: 0 # GitLab doesn't return file size
      )
    end
end
_linked_dir_for(path) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 491
def _linked_dir_for(path)
  linked_dirs = @linked_paths.keys
  linked_dirs.
    select { |k| path.match?(%r{^#{Regexp.quote(k)}(/|$)}) }.
    max_by(&:length)
end
azure_client() click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 543
def azure_client
  @azure_client ||=
    Dependabot::Clients::Azure.
    for_source(source: source, credentials: credentials)
end
bitbucket_client() click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 549
def bitbucket_client
  # TODO: When self-hosted Bitbucket is supported this should use
  # `Bitbucket.for_source`
  @bitbucket_client ||=
    Dependabot::Clients::BitbucketWithRetries.
    for_bitbucket_dot_org(credentials: credentials)
end
client_for_provider() click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 516
def client_for_provider
  case source.provider
  when "github" then github_client
  when "gitlab" then gitlab_client
  when "azure" then azure_client
  when "bitbucket" then bitbucket_client
  when "codecommit" then codecommit_client
  else raise "Unsupported provider '#{source.provider}'."
  end
end
codecommit_client() click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 557
def codecommit_client
  @codecommit_client ||=
    Dependabot::Clients::CodeCommit.
    for_source(source: source, credentials: credentials)
end
default_branch_for_repo() click to toggle source

rubocop:enable Metrics/AbcSize

# File lib/dependabot/file_fetchers/base.rb, line 467
def default_branch_for_repo
  @default_branch_for_repo ||= client_for_provider.
                               fetch_default_branch(repo)
rescue *CLIENT_NOT_FOUND_ERRORS
  raise Dependabot::RepoNotFound, source
end
fetch_file_from_host(filename, type: "file", fetch_submodules: false) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 136
def fetch_file_from_host(filename, type: "file", fetch_submodules: false)
  return load_cloned_file_if_present(filename) unless repo_contents_path.nil?

  path = Pathname.new(File.join(directory, filename)).cleanpath.to_path
  content = _fetch_file_content(path, fetch_submodules: fetch_submodules)
  type = @linked_paths.key?(path.gsub(%r{^/}, "")) ? "symlink" : type

  DependencyFile.new(
    name: Pathname.new(filename).cleanpath.to_path,
    directory: directory,
    type: type,
    content: content,
    symlink_target: @linked_paths.dig(path.gsub(%r{^/}, ""), :path)
  )
rescue *CLIENT_NOT_FOUND_ERRORS
  raise Dependabot::DependencyFileNotFound, path
end
fetch_file_if_present(filename, fetch_submodules: false) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 90
def fetch_file_if_present(filename, fetch_submodules: false)
  unless repo_contents_path.nil?
    begin
      return load_cloned_file_if_present(filename)
    rescue Dependabot::DependencyFileNotFound
      return
    end
  end

  dir = File.dirname(filename)
  basename = File.basename(filename)

  repo_includes_basename =
    repo_contents(dir: dir, fetch_submodules: fetch_submodules).
    reject { |f| f.type == "dir" }.
    map(&:name).include?(basename)
  return unless repo_includes_basename

  fetch_file_from_host(filename, fetch_submodules: fetch_submodules)
rescue *CLIENT_NOT_FOUND_ERRORS
  path = Pathname.new(File.join(directory, filename)).cleanpath.to_path
  raise Dependabot::DependencyFileNotFound, path
end
github_client() click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 527
def github_client
  @github_client ||=
    Dependabot::Clients::GithubWithRetries.for_source(
      source: source,
      credentials: credentials
    )
end
gitlab_client() click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 535
def gitlab_client
  @gitlab_client ||=
    Dependabot::Clients::GitlabWithRetries.for_source(
      source: source,
      credentials: credentials
    )
end
load_cloned_file_if_present(filename) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 114
def load_cloned_file_if_present(filename)
  path = Pathname.new(File.join(directory, filename)).cleanpath.to_path
  repo_path = File.join(clone_repo_contents, path)
  raise Dependabot::DependencyFileNotFound, path unless File.exist?(repo_path)

  content = File.read(repo_path)
  type = if File.symlink?(repo_path)
           symlink_target = File.readlink(repo_path)
           "symlink"
         else
           "file"
         end

  DependencyFile.new(
    name: Pathname.new(filename).cleanpath.to_path,
    directory: directory,
    type: type,
    content: content,
    symlink_target: symlink_target
  )
end
repo_contents(dir: ".", ignore_base_directory: false, raise_errors: true, fetch_submodules: false) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 154
def repo_contents(dir: ".", ignore_base_directory: false,
                  raise_errors: true, fetch_submodules: false)
  dir = File.join(directory, dir) unless ignore_base_directory
  path = Pathname.new(File.join(dir)).cleanpath.to_path.gsub(%r{^/*}, "")

  @repo_contents ||= {}
  @repo_contents[dir] ||= if repo_contents_path
                            _cloned_repo_contents(path)
                          else
                            _fetch_repo_contents(path, raise_errors: raise_errors,
                                                       fetch_submodules: fetch_submodules)
                          end
end
update_linked_paths(repo, path, commit, github_response) click to toggle source
# File lib/dependabot/file_fetchers/base.rb, line 254
def update_linked_paths(repo, path, commit, github_response)
  case github_response.type
  when "submodule"
    sub_source = Source.from_url(github_response.submodule_git_url)
    return unless sub_source

    @linked_paths[path] = {
      repo: sub_source.repo,
      provider: sub_source.provider,
      commit: github_response.sha,
      path: "/"
    }
  when "symlink"
    updated_path = File.join(File.dirname(path), github_response.target)
    @linked_paths[path] = {
      repo: repo,
      provider: "github",
      commit: commit,
      path: Pathname.new(updated_path).cleanpath.to_path
    }
  end
end