class Cluster

Attributes

cluster_name[R]
config_file[R]
configuration[R]
hetzner_client[R]
hetzner_token[R]
k3s_version[R]
kubeconfig_path[R]
kubernetes_client[R]
location[R]
masters_config[R]
networks[R]
new_k3s_version[R]
servers[RW]
ssh_key_path[R]
tls_sans[R]
verify_host_key[R]
worker_node_pools[R]

Public Class Methods

new(hetzner_client:, hetzner_token:) click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 19
def initialize(hetzner_client:, hetzner_token:)
  @hetzner_client = hetzner_client
  @hetzner_token = hetzner_token
end

Public Instance Methods

create(configuration:) click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 24
def create(configuration:)
  @cluster_name = configuration.dig("cluster_name")
  @kubeconfig_path = File.expand_path(configuration.dig("kubeconfig_path"))
  @ssh_key_path = File.expand_path(configuration.dig("ssh_key_path"))
  @k3s_version = configuration.dig("k3s_version")
  @masters_config = configuration.dig("masters")
  @worker_node_pools = configuration.dig("worker_node_pools")
  @location = configuration.dig("location")
  @verify_host_key = configuration.fetch("verify_host_key", false)
  @servers = []
  @networks = configuration.dig("ssh_allowed_networks")

  create_resources

  deploy_kubernetes

  sleep 10

  deploy_cloud_controller_manager
  deploy_csi_driver
  deploy_system_upgrade_controller
end
delete(configuration:) click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 47
def delete(configuration:)
  @cluster_name = configuration.dig("cluster_name")
  @kubeconfig_path = File.expand_path(configuration.dig("kubeconfig_path"))
  @ssh_key_path = File.expand_path(configuration.dig("ssh_key_path"))

  delete_resources
end
upgrade(configuration:, new_k3s_version:, config_file:) click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 55
def upgrade(configuration:, new_k3s_version:, config_file:)
  @configuration = configuration
  @cluster_name = configuration.dig("cluster_name")
  @kubeconfig_path = File.expand_path(configuration.dig("kubeconfig_path"))
  @new_k3s_version = new_k3s_version
  @config_file = config_file

  upgrade_cluster
end

Private Instance Methods

all_servers() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 480
def all_servers
  @all_servers ||= hetzner_client.get("/servers")["servers"].select{ |server| belongs_to_cluster?(server) == true }
end
api_server_ip() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 512
def api_server_ip
  return @api_server_ip if @api_server_ip

  @api_server_ip = if masters.size > 1
    load_balancer_name = "#{cluster_name}-api"
    load_balancer = hetzner_client.get("/load_balancers")["load_balancers"].detect{ |load_balancer| load_balancer["name"] == load_balancer_name }
    load_balancer["public_net"]["ipv4"]["ip"]
  else
    first_master_public_ip
  end
end
belongs_to_cluster?(server) click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 604
def belongs_to_cluster?(server)
  server.dig("labels", "cluster") == cluster_name
end
create_resources() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 81
def create_resources
  master_instance_type = masters_config["instance_type"]
  masters_count = masters_config["instance_count"]

  firewall_id = Hetzner::Firewall.new(
    hetzner_client: hetzner_client,
    cluster_name: cluster_name
  ).create(ha: (masters_count > 1), networks: networks)

  network_id = Hetzner::Network.new(
    hetzner_client: hetzner_client,
    cluster_name: cluster_name
  ).create

  ssh_key_id = Hetzner::SSHKey.new(
    hetzner_client: hetzner_client,
    cluster_name: cluster_name
  ).create(ssh_key_path: ssh_key_path)

  server_configs = []

  masters_count.times do |i|
    server_configs << {
      location: location,
      instance_type: master_instance_type,
      instance_id: "master#{i+1}",
      firewall_id: firewall_id,
      network_id: network_id,
      ssh_key_id: ssh_key_id
    }
  end

  if masters_count > 1
    Hetzner::LoadBalancer.new(
      hetzner_client: hetzner_client,
      cluster_name: cluster_name
    ).create(location: location, network_id: network_id)
  end

  worker_node_pools.each do |worker_node_pool|
    worker_node_pool_name = worker_node_pool["name"]
    worker_instance_type = worker_node_pool["instance_type"]
    worker_count = worker_node_pool["instance_count"]

    worker_count.times do |i|
      server_configs << {
        location: location,
        instance_type: worker_instance_type,
        instance_id: "pool-#{worker_node_pool_name}-worker#{i+1}",
        firewall_id: firewall_id,
        network_id: network_id,
        ssh_key_id: ssh_key_id
      }
    end
  end

  threads = server_configs.map do |server_config|
    Thread.new do
      servers << Hetzner::Server.new(hetzner_client: hetzner_client, cluster_name: cluster_name).create(server_config)
    end
  end

  threads.each(&:join) unless threads.empty?

  puts
  threads = servers.map do |server|
    Thread.new { wait_for_ssh server }
  end

  threads.each(&:join) unless threads.empty?
end
delete_resources() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 153
def delete_resources
  Hetzner::LoadBalancer.new(
    hetzner_client: hetzner_client,
    cluster_name: cluster_name
  ).delete(ha: (masters.size > 1))

  Hetzner::Firewall.new(
    hetzner_client: hetzner_client,
    cluster_name: cluster_name
  ).delete(all_servers)

  Hetzner::Network.new(
    hetzner_client: hetzner_client,
    cluster_name: cluster_name
  ).delete

  Hetzner::SSHKey.new(
    hetzner_client: hetzner_client,
    cluster_name: cluster_name
  ).delete(ssh_key_path: ssh_key_path)

  threads = all_servers.map do |server|
    Thread.new do
      Hetzner::Server.new(hetzner_client: hetzner_client, cluster_name: cluster_name).delete(server_name: server["name"])
    end
  end

  threads.each(&:join) unless threads.empty?
end
deploy_cloud_controller_manager() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 291
def deploy_cloud_controller_manager
  puts
  puts "Deploying Hetzner Cloud Controller Manager..."

  begin
    kubernetes_client.api("v1").resource("secrets").get("hcloud", namespace: "kube-system")

  rescue K8s::Error::NotFound
    secret = K8s::Resource.new(
      apiVersion: "v1",
      kind: "Secret",
      metadata: {
        namespace: 'kube-system',
        name: 'hcloud',
      },
      data: {
        network: Base64.encode64(cluster_name),
        token: Base64.encode64(hetzner_token)
      }
    )

    kubernetes_client.api('v1').resource('secrets').create_resource(secret)
  end


  manifest = HTTP.follow.get("https://github.com/hetznercloud/hcloud-cloud-controller-manager/releases/latest/download/ccm-networks.yaml").body

  File.write("/tmp/cloud-controller-manager.yaml", manifest)

  resources = K8s::Resource.from_files("/tmp/cloud-controller-manager.yaml")

  begin
    kubernetes_client.api("apps/v1").resource("deployments").get("hcloud-cloud-controller-manager", namespace: "kube-system")

    resources.each do |resource|
      kubernetes_client.update_resource(resource)
    end

  rescue K8s::Error::NotFound
    resources.each do |resource|
      kubernetes_client.create_resource(resource)
    end

  end

  puts "...Cloud Controller Manager deployed"
rescue Excon::Error::Socket
  retry
end
deploy_csi_driver() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 370
def deploy_csi_driver
  puts
  puts "Deploying Hetzner CSI Driver..."

  begin
    kubernetes_client.api("v1").resource("secrets").get("hcloud-csi", namespace: "kube-system")

  rescue K8s::Error::NotFound
    secret = K8s::Resource.new(
      apiVersion: "v1",
      kind: "Secret",
      metadata: {
        namespace: 'kube-system',
        name: 'hcloud-csi',
      },
      data: {
        token: Base64.encode64(hetzner_token)
      }
    )

    kubernetes_client.api('v1').resource('secrets').create_resource(secret)
  end


  manifest = HTTP.follow.get("https://raw.githubusercontent.com/hetznercloud/csi-driver/v1.5.3/deploy/kubernetes/hcloud-csi.yml").body

  File.write("/tmp/csi-driver.yaml", manifest)

  resources = K8s::Resource.from_files("/tmp/csi-driver.yaml")

  begin
    kubernetes_client.api("apps/v1").resource("daemonsets").get("hcloud-csi-node", namespace: "kube-system")


    resources.each do |resource|
      begin
        kubernetes_client.update_resource(resource)
      rescue K8s::Error::Invalid => e
        raise e unless e.message =~ /must be specified/i
      end
    end

  rescue K8s::Error::NotFound
    resources.each do |resource|
      kubernetes_client.create_resource(resource)
    end

  end

  puts "...CSI Driver deployed"
rescue Excon::Error::Socket
  retry
end
deploy_kubernetes() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 249
def deploy_kubernetes
  puts
  puts "Deploying k3s to first master (#{first_master["name"]})..."

  ssh first_master, master_script(first_master), print_output: true

  puts
  puts "...k3s has been deployed to first master."

  save_kubeconfig

  if masters.size > 1
    threads = masters[1..-1].map do |master|
      Thread.new do
        puts
        puts "Deploying k3s to master #{master["name"]}..."

        ssh master, master_script(master), print_output: true

        puts
        puts "...k3s has been deployed to master #{master["name"]}."
      end
    end

    threads.each(&:join) unless threads.empty?
  end

  threads = workers.map do |worker|
    Thread.new do
      puts
      puts "Deploying k3s to worker (#{worker["name"]})..."

      ssh worker, worker_script(worker), print_output: true

      puts
      puts "...k3s has been deployed to worker (#{worker["name"]})."
    end
  end

  threads.each(&:join) unless threads.empty?
end
deploy_system_upgrade_controller() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 341
def deploy_system_upgrade_controller
  puts
  puts "Deploying k3s System Upgrade Controller..."

  manifest = HTTP.follow.get("https://github.com/rancher/system-upgrade-controller/releases/download/v0.7.3/system-upgrade-controller.yaml").body

  File.write("/tmp/system-upgrade-controller.yaml", manifest)

  resources = K8s::Resource.from_files("/tmp/system-upgrade-controller.yaml")

  begin
    kubernetes_client.api("apps/v1").resource("deployments").get("system-upgrade-controller", namespace: "system-upgrade")

    resources.each do |resource|
      kubernetes_client.update_resource(resource)
    end

  rescue K8s::Error::NotFound
    resources.each do |resource|
      kubernetes_client.create_resource(resource)
    end

  end

  puts "...k3s System Upgrade Controller deployed"
rescue Excon::Error::Socket
  retry
end
find_flannel_interface(server) click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 472
def find_flannel_interface(server)
  if ssh(server, "lscpu | grep Vendor") =~ /Intel/
    "ens10"
  else
    "enp7s0"
  end
end
first_master() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 508
def first_master
  masters.first
end
first_master_private_ip() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 504
def first_master_private_ip
  @first_master_private_ip ||= first_master["private_net"][0]["ip"]
end
first_master_public_ip() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 535
def first_master_public_ip
  @first_master_public_ip ||= first_master.dig("public_net", "ipv4", "ip")
end
k3s_token() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 492
def k3s_token
  @k3s_token ||= begin
    token = ssh(first_master, "{ TOKEN=$(< /var/lib/rancher/k3s/server/node-token); } 2> /dev/null; echo $TOKEN")

    if token.empty?
      SecureRandom.hex
    else
      token.split(":").last
    end
  end
end
latest_k3s_version() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 76
def latest_k3s_version
  response = HTTP.get("https://api.github.com/repos/k3s-io/k3s/tags").body
  JSON.parse(response).first["name"]
end
master_script(master) click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 206
    def master_script(master)
      server = master == first_master ? " --cluster-init " : " --server https://#{first_master_private_ip}:6443 "
      flannel_interface = find_flannel_interface(master)

      <<~EOF
      curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="#{k3s_version}" K3S_TOKEN="#{k3s_token}" INSTALL_K3S_EXEC="server \
        --disable-cloud-controller \
        --disable servicelb \
        --disable traefik \
        --disable local-storage \
        --disable metrics-server \
        --write-kubeconfig-mode=644 \
        --node-name="$(hostname -f)" \
        --cluster-cidr=10.244.0.0/16 \
        --etcd-expose-metrics=true \
        --kube-controller-manager-arg="address=0.0.0.0" \
        --kube-controller-manager-arg="bind-address=0.0.0.0" \
        --kube-proxy-arg="metrics-bind-address=0.0.0.0" \
        --kube-scheduler-arg="address=0.0.0.0" \
        --kube-scheduler-arg="bind-address=0.0.0.0" \
        --node-taint CriticalAddonsOnly=true:NoExecute \
        --kubelet-arg="cloud-provider=external" \
        --advertise-address=$(hostname -I | awk '{print $2}') \
        --node-ip=$(hostname -I | awk '{print $2}') \
        --node-external-ip=$(hostname -I | awk '{print $1}') \
        --flannel-iface=#{flannel_interface} \
        #{server} #{tls_sans}" sh -
      EOF
    end
masters() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 484
def masters
  @masters ||= all_servers.select{ |server| server["name"] =~ /master\d+\Z/ }.sort{ |a, b| a["name"] <=> b["name"] }
end
save_kubeconfig() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 539
def save_kubeconfig
  kubeconfig = ssh(first_master, "cat /etc/rancher/k3s/k3s.yaml").
    gsub("127.0.0.1", api_server_ip).
    gsub("default", cluster_name)

  File.write(kubeconfig_path, kubeconfig)
end
ssh(server, command, print_output: false) click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 441
def ssh(server, command, print_output: false)
  public_ip = server.dig("public_net", "ipv4", "ip")
  output = ""

  Net::SSH.start(public_ip, "root", verify_host_key: (verify_host_key ? :always : :never)) do |session|
    session.exec!(command) do |channel, stream, data|
      output << data
      puts data if print_output
    end
  end
  output.chop
rescue Net::SSH::Disconnect => e
  retry unless e.message =~ /Too many authentication failures/
rescue Net::SSH::ConnectionTimeout, Errno::ECONNREFUSED, Errno::ENETUNREACH, Errno::EHOSTUNREACH
  retry
rescue Net::SSH::HostKeyMismatch
  puts
  puts "Cannot continue: Unable to SSH into server with IP #{public_ip} because the existing fingerprint in the known_hosts file does not match that of the actual host key."
  puts "This is due to a security check but can also happen when creating a new server that gets assigned the same IP address as another server you've owned in the past."
  puts "If are sure no security is being violated here and you're just creating new servers, you can eiher remove the relevant lines from your known_hosts (see IPs from the cloud console) or disable host key verification by setting the option 'verify_host_key' to false in the configuration file for the cluster."
  exit 1
end
ugrade_plan_manifest_path() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 547
    def ugrade_plan_manifest_path
      worker_upgrade_concurrency = workers.size - 1
      worker_upgrade_concurrency = 1 if worker_upgrade_concurrency == 0

      manifest = <<~EOF
        apiVersion: upgrade.cattle.io/v1
        kind: Plan
        metadata:
          name: k3s-server
          namespace: system-upgrade
          labels:
            k3s-upgrade: server
        spec:
          concurrency: 1
          version: #{new_k3s_version}
          nodeSelector:
            matchExpressions:
              - {key: node-role.kubernetes.io/master, operator: In, values: ["true"]}
          serviceAccountName: system-upgrade
          tolerations:
          - key: "CriticalAddonsOnly"
            operator: "Equal"
            value: "true"
            effect: "NoExecute"
          cordon: true
          upgrade:
            image: rancher/k3s-upgrade
        ---
        apiVersion: upgrade.cattle.io/v1
        kind: Plan
        metadata:
          name: k3s-agent
          namespace: system-upgrade
          labels:
            k3s-upgrade: agent
        spec:
          concurrency: #{worker_upgrade_concurrency}
          version: #{new_k3s_version}
          nodeSelector:
            matchExpressions:
              - {key: node-role.kubernetes.io/master, operator: NotIn, values: ["true"]}
          serviceAccountName: system-upgrade
          prepare:
            image: rancher/k3s-upgrade
            args: ["prepare", "k3s-server"]
          cordon: true
          upgrade:
            image: rancher/k3s-upgrade
      EOF

      temp_file_path = "/tmp/k3s-upgrade-plan.yaml"

      File.write(temp_file_path, manifest)

      temp_file_path
    end
upgrade_cluster() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 183
def upgrade_cluster
  resources = K8s::Resource.from_files(ugrade_plan_manifest_path)

  begin
    kubernetes_client.api("upgrade.cattle.io/v1").resource("plans").get("k3s-server", namespace: "system-upgrade")

    puts "Aborting - an upgrade is already in progress."

  rescue K8s::Error::NotFound
    resources.each do |resource|
      kubernetes_client.create_resource(resource)
    end

    puts "Upgrade will now start. Run `watch kubectl get nodes` to see the nodes being upgraded. This should take a few minutes for a small cluster."
    puts "The API server may be briefly unavailable during the upgrade of the controlplane."

    configuration["k3s_version"] = new_k3s_version

    File.write(config_file, configuration.to_yaml)
  end
end
wait_for_ssh(server) click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 424
def wait_for_ssh(server)
  Timeout::timeout(5) do
    server_name = server["name"]

    puts "Waiting for server #{server_name} to be up..."

    loop do
      result = ssh(server, "echo UP")
      break if result == "UP"
    end

    puts "...server #{server_name} is now up."
  end
rescue Errno::ENETUNREACH, Errno::EHOSTUNREACH, Timeout::Error, IOError
  retry
end
worker_script(worker) click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 236
    def worker_script(worker)
      flannel_interface = find_flannel_interface(worker)

      <<~EOF
      curl -sfL https://get.k3s.io | K3S_TOKEN="#{k3s_token}" INSTALL_K3S_VERSION="#{k3s_version}" K3S_URL=https://#{first_master_private_ip}:6443 INSTALL_K3S_EXEC="agent \
        --node-name="$(hostname -f)" \
        --kubelet-arg="cloud-provider=external" \
        --node-ip=$(hostname -I | awk '{print $2}') \
        --node-external-ip=$(hostname -I | awk '{print $1}') \
        --flannel-iface=#{flannel_interface}" sh -
      EOF
    end
workers() click to toggle source
# File lib/hetzner/k3s/cluster.rb, line 488
def workers
  @workers = all_servers.select{ |server| server["name"] =~ /worker\d+\Z/ }.sort{ |a, b| a["name"] <=> b["name"] }
end