From bbab1013c5afd295f2c011fce982f742c2f7c3b7 Mon Sep 17 00:00:00 2001 From: Fredrik Liv Date: Thu, 24 Dec 2020 18:16:26 +0100 Subject: [PATCH] Added gcp terraform support (#6974) * Added gcp terraform support * Added http/https firewall rule * Ignoring lifecycle changes for attached disks on the google_compute_instance --- contrib/terraform/gcp/README.md | 90 +++++ contrib/terraform/gcp/generate-inventory.sh | 76 ++++ contrib/terraform/gcp/main.tf | 24 ++ .../gcp/modules/kubernetes-cluster/main.tf | 360 ++++++++++++++++++ .../gcp/modules/kubernetes-cluster/output.tf | 27 ++ .../modules/kubernetes-cluster/variables.tf | 54 +++ contrib/terraform/gcp/output.tf | 15 + contrib/terraform/gcp/tfvars.json | 60 +++ contrib/terraform/gcp/variables.tf | 72 ++++ 9 files changed, 778 insertions(+) create mode 100644 contrib/terraform/gcp/README.md create mode 100755 contrib/terraform/gcp/generate-inventory.sh create mode 100644 contrib/terraform/gcp/main.tf create mode 100644 contrib/terraform/gcp/modules/kubernetes-cluster/main.tf create mode 100644 contrib/terraform/gcp/modules/kubernetes-cluster/output.tf create mode 100644 contrib/terraform/gcp/modules/kubernetes-cluster/variables.tf create mode 100644 contrib/terraform/gcp/output.tf create mode 100644 contrib/terraform/gcp/tfvars.json create mode 100644 contrib/terraform/gcp/variables.tf diff --git a/contrib/terraform/gcp/README.md b/contrib/terraform/gcp/README.md new file mode 100644 index 000000000..b2d74d940 --- /dev/null +++ b/contrib/terraform/gcp/README.md @@ -0,0 +1,90 @@ +# Kubernetes on GCP with Terraform + +Provision a Kubernetes cluster on GCP using Terraform and Kubespray + +## Overview + +The setup looks like following + +``` + Kubernetes cluster + +-----------------------+ ++---------------+ | +--------------+ | +| | | | +--------------+ | +| API server LB +---------> | | | | +| | | | | Master/etcd | | ++---------------+ | | | node(s) | | + | +-+ | | + | +--------------+ | + | ^ | + | | | + | v | ++---------------+ | +--------------+ | +| | | | +--------------+ | +| Ingress LB +---------> | | | | +| | | | | Worker | | ++---------------+ | | | node(s) | | + | +-+ | | + | +--------------+ | + +-----------------------+ +``` + +## Requirements + +* Terraform 0.12.0 or newer + +## Quickstart + +To get a cluster up and running you'll need a JSON keyfile. +Set the path to the file in the `tfvars.json` file and run the following: + +```bash +terraform apply -var-file tfvars.json -state dev-cluster.tfstate -var gcp_project_id= -var keyfile_location= +``` + +To generate kubespray inventory based on the terraform state file you can run the following: + +```bash +./generate-inventory.sh dev-cluster.tfstate > inventory.ini +``` + +You should now have a inventory file named `inventory.ini` that you can use with kubespray, e.g. + +```bash +ansible-playbook -i contrib/terraform/gcs/inventory.ini cluster.yml -b -v +``` + +## Variables + +### Required + +* `keyfile_location`: Location to the keyfile to use as credentials for the google terraform provider +* `gcp_project_id`: ID of the GCP project to deploy the cluster in +* `ssh_pub_key`: Path to public ssh key to use for all machines +* `region`: The region where to run the cluster +* `machines`: Machines to provision. Key of this object will be used as the name of the machine + * `node_type`: The role of this node *(master|worker)* + * `size`: The size to use + * `zone`: The zone the machine should run in + * `additional_disks`: Extra disks to add to the machine. Key of this object will be used as the disk name + * `size`: Size of the disk (in GB) + * `boot_disk`: The boot disk to use + * `image_name`: Name of the image + * `size`: Size of the boot disk (in GB) +* `ssh_whitelist`: List of IP ranges (CIDR) that will be allowed to ssh to the nodes +* `api_server_whitelist`: List of IP ranges (CIDR) that will be allowed to connect to the API server +* `nodeport_whitelist`: List of IP ranges (CIDR) that will be allowed to connect to the kubernetes nodes on port 30000-32767 (kubernetes nodeports) + +### Optional + +* `prefix`: Prefix to use for all resources, required to be unique for all clusters in the same project *(Defaults to `default`)* +* `master_sa_email`: Service account email to use for the master nodes *(Defaults to `""`, auto generate one)* +* `master_sa_scopes`: Service account email to use for the master nodes *(Defaults to `["https://www.googleapis.com/auth/cloud-platform"]`)* +* `worker_sa_email`: Service account email to use for the worker nodes *(Defaults to `""`, auto generate one)* +* `worker_sa_scopes`: Service account email to use for the worker nodes *(Defaults to `["https://www.googleapis.com/auth/cloud-platform"]`)* + +An example variables file can be found `tfvars.json` + +## Known limitations + +This solution does not provide a solution to use a bastion host. Thus all the nodes must expose a public IP for kubespray to work. diff --git a/contrib/terraform/gcp/generate-inventory.sh b/contrib/terraform/gcp/generate-inventory.sh new file mode 100755 index 000000000..36cbcd776 --- /dev/null +++ b/contrib/terraform/gcp/generate-inventory.sh @@ -0,0 +1,76 @@ +#!/bin/bash + +# +# Generates a inventory file based on the terraform output. +# After provisioning a cluster, simply run this command and supply the terraform state file +# Default state file is terraform.tfstate +# + +set -e + +usage () { + echo "Usage: $0 " >&2 + exit 1 +} + +if [[ $# -ne 1 ]]; then + usage +fi + +TF_STATE_FILE=${1} + +if [[ ! -f "${TF_STATE_FILE}" ]]; then + echo "ERROR: state file ${TF_STATE_FILE} doesn't exist" >&2 + usage +fi + +TF_OUT=$(terraform output -state "${TF_STATE_FILE}" -json) + +MASTERS=$(jq -r '.master_ips.value | to_entries[]' <(echo "${TF_OUT}")) +WORKERS=$(jq -r '.worker_ips.value | to_entries[]' <(echo "${TF_OUT}")) +mapfile -t MASTER_NAMES < <(jq -r '.key' <(echo "${MASTERS}")) +mapfile -t WORKER_NAMES < <(jq -r '.key' <(echo "${WORKERS}")) + +API_LB=$(jq -r '.control_plane_lb_ip_address.value' <(echo "${TF_OUT}")) + +# Generate master hosts +i=1 +for name in "${MASTER_NAMES[@]}"; do + private_ip=$(jq -r '. | select( .key=='"\"${name}\""' ) | .value.private_ip' <(echo "${MASTERS}")) + public_ip=$(jq -r '. | select( .key=='"\"${name}\""' ) | .value.public_ip' <(echo "${MASTERS}")) + echo "${name} ansible_user=ubuntu ansible_host=${public_ip} ip=${private_ip} etcd_member_name=etcd${i}" + i=$(( i + 1 )) +done + +# Generate worker hosts +for name in "${WORKER_NAMES[@]}"; do + private_ip=$(jq -r '. | select( .key=='"\"${name}\""' ) | .value.private_ip' <(echo "${WORKERS}")) + public_ip=$(jq -r '. | select( .key=='"\"${name}\""' ) | .value.public_ip' <(echo "${WORKERS}")) + echo "${name} ansible_user=ubuntu ansible_host=${public_ip} ip=${private_ip}" +done + +echo "" +echo "[kube-master]" +for name in "${MASTER_NAMES[@]}"; do + echo "${name}" +done + +echo "" +echo "[kube-master:vars]" +echo "supplementary_addresses_in_ssl_keys = [ '${API_LB}' ]" # Add LB address to API server certificate +echo "" +echo "[etcd]" +for name in "${MASTER_NAMES[@]}"; do + echo "${name}" +done + +echo "" +echo "[kube-node]" +for name in "${WORKER_NAMES[@]}"; do + echo "${name}" +done + +echo "" +echo "[k8s-cluster:children]" +echo "kube-master" +echo "kube-node" diff --git a/contrib/terraform/gcp/main.tf b/contrib/terraform/gcp/main.tf new file mode 100644 index 000000000..3cff429bd --- /dev/null +++ b/contrib/terraform/gcp/main.tf @@ -0,0 +1,24 @@ +provider "google" { + credentials = file(var.keyfile_location) + region = var.region + project = var.gcp_project_id + version = "~> 3.48" +} + +module "kubernetes" { + source = "./modules/kubernetes-cluster" + region = var.region + prefix = var.prefix + + machines = var.machines + ssh_pub_key = var.ssh_pub_key + + master_sa_email = var.master_sa_email + master_sa_scopes = var.master_sa_scopes + worker_sa_email = var.worker_sa_email + worker_sa_scopes = var.worker_sa_scopes + + ssh_whitelist = var.ssh_whitelist + api_server_whitelist = var.api_server_whitelist + nodeport_whitelist = var.nodeport_whitelist +} diff --git a/contrib/terraform/gcp/modules/kubernetes-cluster/main.tf b/contrib/terraform/gcp/modules/kubernetes-cluster/main.tf new file mode 100644 index 000000000..41e60fe8e --- /dev/null +++ b/contrib/terraform/gcp/modules/kubernetes-cluster/main.tf @@ -0,0 +1,360 @@ +################################################# +## +## General +## + +resource "google_compute_network" "main" { + name = "${var.prefix}-network" +} + +resource "google_compute_subnetwork" "main" { + name = "${var.prefix}-subnet" + network = google_compute_network.main.name + ip_cidr_range = var.private_network_cidr + region = var.region +} + +resource "google_compute_firewall" "deny_all" { + name = "${var.prefix}-default-firewall" + network = google_compute_network.main.name + + priority = 1000 + + deny { + protocol = "all" + } +} + +resource "google_compute_firewall" "allow_internal" { + name = "${var.prefix}-internal-firewall" + network = google_compute_network.main.name + + priority = 500 + + source_ranges = [var.private_network_cidr] + + allow { + protocol = "all" + } +} + +resource "google_compute_firewall" "ssh" { + name = "${var.prefix}-ssh-firewall" + network = google_compute_network.main.name + + priority = 100 + + source_ranges = var.ssh_whitelist + + allow { + protocol = "tcp" + ports = ["22"] + } +} + +resource "google_compute_firewall" "api_server" { + name = "${var.prefix}-api-server-firewall" + network = google_compute_network.main.name + + priority = 100 + + source_ranges = var.api_server_whitelist + + allow { + protocol = "tcp" + ports = ["6443"] + } +} + +resource "google_compute_firewall" "nodeport" { + name = "${var.prefix}-nodeport-firewall" + network = google_compute_network.main.name + + priority = 100 + + source_ranges = var.nodeport_whitelist + + allow { + protocol = "tcp" + ports = ["30000-32767"] + } +} + +resource "google_compute_firewall" "ingress_http" { + name = "${var.prefix}-http-ingress-firewall" + network = google_compute_network.main.name + + priority = 100 + + allow { + protocol = "tcp" + ports = ["80"] + } +} + +resource "google_compute_firewall" "ingress_https" { + name = "${var.prefix}-https-ingress-firewall" + network = google_compute_network.main.name + + priority = 100 + + allow { + protocol = "tcp" + ports = ["443"] + } +} + +################################################# +## +## Local variables +## + +locals { + master_target_list = [ + for name, machine in google_compute_instance.master : + "${machine.zone}/${machine.name}" + ] + + worker_target_list = [ + for name, machine in google_compute_instance.worker : + "${machine.zone}/${machine.name}" + ] + + master_disks = flatten([ + for machine_name, machine in var.machines : [ + for disk_name, disk in machine.additional_disks : { + "${machine_name}-${disk_name}" = { + "machine_name": machine_name, + "machine": machine, + "disk_size": disk.size, + "disk_name": disk_name + } + } + ] + if machine.node_type == "master" + ]) + + worker_disks = flatten([ + for machine_name, machine in var.machines : [ + for disk_name, disk in machine.additional_disks : { + "${machine_name}-${disk_name}" = { + "machine_name": machine_name, + "machine": machine, + "disk_size": disk.size, + "disk_name": disk_name + } + } + ] + if machine.node_type == "worker" + ]) +} + +################################################# +## +## Master +## + +resource "google_compute_address" "master" { + for_each = { + for name, machine in var.machines : + name => machine + if machine.node_type == "master" + } + + name = "${var.prefix}-${each.key}-pip" + address_type = "EXTERNAL" + region = var.region +} + +resource "google_compute_disk" "master" { + for_each = { + for item in local.master_disks : + keys(item)[0] => values(item)[0] + } + + name = "${var.prefix}-${each.key}" + type = "pd-ssd" + zone = each.value.machine.zone + size = each.value.disk_size + + physical_block_size_bytes = 4096 +} + +resource "google_compute_attached_disk" "master" { + for_each = { + for item in local.master_disks : + keys(item)[0] => values(item)[0] + } + + disk = google_compute_disk.master[each.key].id + instance = google_compute_instance.master[each.value.machine_name].id +} + +resource "google_compute_instance" "master" { + for_each = { + for name, machine in var.machines : + name => machine + if machine.node_type == "master" + } + + name = "${var.prefix}-${each.key}" + machine_type = each.value.size + zone = each.value.zone + + tags = ["master"] + + boot_disk { + initialize_params { + image = each.value.boot_disk.image_name + size = each.value.boot_disk.size + } + } + + network_interface { + subnetwork = google_compute_subnetwork.main.name + + access_config { + nat_ip = google_compute_address.master[each.key].address + } + } + + metadata = { + ssh-keys = "ubuntu:${trimspace(file(pathexpand(var.ssh_pub_key)))}" + } + + service_account { + email = var.master_sa_email + scopes = var.master_sa_scopes + } + + # Since we use google_compute_attached_disk we need to ignore this + lifecycle { + ignore_changes = ["attached_disk"] + } +} + +resource "google_compute_forwarding_rule" "master_lb" { + name = "${var.prefix}-master-lb-forward-rule" + + port_range = "6443" + + target = google_compute_target_pool.master_lb.id +} + +resource "google_compute_target_pool" "master_lb" { + name = "${var.prefix}-master-lb-pool" + instances = local.master_target_list +} + +################################################# +## +## Worker +## + +resource "google_compute_disk" "worker" { + for_each = { + for item in local.worker_disks : + keys(item)[0] => values(item)[0] + } + + name = "${var.prefix}-${each.key}" + type = "pd-ssd" + zone = each.value.machine.zone + size = each.value.disk_size + + physical_block_size_bytes = 4096 +} + +resource "google_compute_attached_disk" "worker" { + for_each = { + for item in local.worker_disks : + keys(item)[0] => values(item)[0] + } + + disk = google_compute_disk.worker[each.key].id + instance = google_compute_instance.worker[each.value.machine_name].id +} + +resource "google_compute_address" "worker" { + for_each = { + for name, machine in var.machines : + name => machine + if machine.node_type == "worker" + } + + name = "${var.prefix}-${each.key}-pip" + address_type = "EXTERNAL" + region = var.region +} + +resource "google_compute_instance" "worker" { + for_each = { + for name, machine in var.machines : + name => machine + if machine.node_type == "worker" + } + + name = "${var.prefix}-${each.key}" + machine_type = each.value.size + zone = each.value.zone + + tags = ["worker"] + + boot_disk { + initialize_params { + image = each.value.boot_disk.image_name + size = each.value.boot_disk.size + } + } + + network_interface { + subnetwork = google_compute_subnetwork.main.name + + access_config { + nat_ip = google_compute_address.worker[each.key].address + } + } + + metadata = { + ssh-keys = "ubuntu:${trimspace(file(pathexpand(var.ssh_pub_key)))}" + } + + service_account { + email = var.worker_sa_email + scopes = var.worker_sa_scopes + } + + # Since we use google_compute_attached_disk we need to ignore this + lifecycle { + ignore_changes = ["attached_disk"] + } +} + +resource "google_compute_address" "worker_lb" { + name = "${var.prefix}-worker-lb-address" + address_type = "EXTERNAL" + region = var.region +} + +resource "google_compute_forwarding_rule" "worker_http_lb" { + name = "${var.prefix}-worker-http-lb-forward-rule" + + ip_address = google_compute_address.worker_lb.address + port_range = "80" + + target = google_compute_target_pool.worker_lb.id +} + +resource "google_compute_forwarding_rule" "worker_https_lb" { + name = "${var.prefix}-worker-https-lb-forward-rule" + + ip_address = google_compute_address.worker_lb.address + port_range = "443" + + target = google_compute_target_pool.worker_lb.id +} + +resource "google_compute_target_pool" "worker_lb" { + name = "${var.prefix}-worker-lb-pool" + instances = local.worker_target_list +} diff --git a/contrib/terraform/gcp/modules/kubernetes-cluster/output.tf b/contrib/terraform/gcp/modules/kubernetes-cluster/output.tf new file mode 100644 index 000000000..8e5b08016 --- /dev/null +++ b/contrib/terraform/gcp/modules/kubernetes-cluster/output.tf @@ -0,0 +1,27 @@ +output "master_ip_addresses" { + value = { + for key, instance in google_compute_instance.master : + instance.name => { + "private_ip" = instance.network_interface.0.network_ip + "public_ip" = instance.network_interface.0.access_config.0.nat_ip + } + } +} + +output "worker_ip_addresses" { + value = { + for key, instance in google_compute_instance.worker : + instance.name => { + "private_ip" = instance.network_interface.0.network_ip + "public_ip" = instance.network_interface.0.access_config.0.nat_ip + } + } +} + +output "ingress_controller_lb_ip_address" { + value = google_compute_address.worker_lb.address +} + +output "control_plane_lb_ip_address" { + value = google_compute_forwarding_rule.master_lb.ip_address +} diff --git a/contrib/terraform/gcp/modules/kubernetes-cluster/variables.tf b/contrib/terraform/gcp/modules/kubernetes-cluster/variables.tf new file mode 100644 index 000000000..d6632ac4b --- /dev/null +++ b/contrib/terraform/gcp/modules/kubernetes-cluster/variables.tf @@ -0,0 +1,54 @@ +variable "region" { + type = string +} + +variable "prefix" {} + +variable "machines" { + type = map(object({ + node_type = string + size = string + zone = string + additional_disks = map(object({ + size = number + })) + boot_disk = object({ + image_name = string + size = number + }) + })) +} + +variable "master_sa_email" { + type = string +} + +variable "master_sa_scopes" { + type = list(string) +} + +variable "worker_sa_email" { + type = string +} + +variable "worker_sa_scopes" { + type = list(string) +} + +variable "ssh_pub_key" {} + +variable "ssh_whitelist" { + type = list(string) +} + +variable "api_server_whitelist" { + type = list(string) +} + +variable "nodeport_whitelist" { + type = list(string) +} + +variable "private_network_cidr" { + default = "10.0.10.0/24" +} diff --git a/contrib/terraform/gcp/output.tf b/contrib/terraform/gcp/output.tf new file mode 100644 index 000000000..09bf7fa4a --- /dev/null +++ b/contrib/terraform/gcp/output.tf @@ -0,0 +1,15 @@ +output "master_ips" { + value = module.kubernetes.master_ip_addresses +} + +output "worker_ips" { + value = module.kubernetes.worker_ip_addresses +} + +output "ingress_controller_lb_ip_address" { + value = module.kubernetes.ingress_controller_lb_ip_address +} + +output "control_plane_lb_ip_address" { + value = module.kubernetes.control_plane_lb_ip_address +} diff --git a/contrib/terraform/gcp/tfvars.json b/contrib/terraform/gcp/tfvars.json new file mode 100644 index 000000000..f154d8aa1 --- /dev/null +++ b/contrib/terraform/gcp/tfvars.json @@ -0,0 +1,60 @@ +{ + "gcp_project_id": "GCP_PROJECT_ID", + "region": "us-central1", + "ssh_pub_key": "~/.ssh/id_rsa.pub", + + "keyfile_location": "service-account.json", + + "prefix": "development", + + "ssh_whitelist": [ + "1.2.3.4/32" + ], + "api_server_whitelist": [ + "1.2.3.4/32" + ], + "nodeport_whitelist": [ + "1.2.3.4/32" + ], + + "machines": { + "master-0": { + "node_type": "master", + "size": "n1-standard-2", + "zone": "us-central1-a", + "additional_disks": {}, + "boot_disk": { + "image_name": "ubuntu-os-cloud/ubuntu-1804-bionic-v20201116", + "size": 50 + } + }, + "worker-0": { + "node_type": "worker", + "size": "n1-standard-8", + "zone": "us-central1-a", + "additional_disks": { + "extra-disk-1": { + "size": 100 + } + }, + "boot_disk": { + "image_name": "ubuntu-os-cloud/ubuntu-1804-bionic-v20201116", + "size": 50 + } + }, + "worker-1": { + "node_type": "worker", + "size": "n1-standard-8", + "zone": "us-central1-a", + "additional_disks": { + "extra-disk-1": { + "size": 100 + } + }, + "boot_disk": { + "image_name": "ubuntu-os-cloud/ubuntu-1804-bionic-v20201116", + "size": 50 + } + } + } +} diff --git a/contrib/terraform/gcp/variables.tf b/contrib/terraform/gcp/variables.tf new file mode 100644 index 000000000..9850e61c5 --- /dev/null +++ b/contrib/terraform/gcp/variables.tf @@ -0,0 +1,72 @@ +variable keyfile_location { + description = "Location of the json keyfile to use with the google provider" + type = string +} + +variable region { + description = "Region of all resources" + type = string +} + +variable gcp_project_id { + description = "ID of the project" + type = string +} + +variable prefix { + description = "Prefix for resource names" + default = "default" +} + +variable machines { + description = "Cluster machines" + type = map(object({ + node_type = string + size = string + zone = string + additional_disks = map(object({ + size = number + })) + boot_disk = object({ + image_name = string + size = number + }) + })) +} + +variable "master_sa_email" { + type = string + default = "" +} + +variable "master_sa_scopes" { + type = list(string) + default = ["https://www.googleapis.com/auth/cloud-platform"] +} + +variable "worker_sa_email" { + type = string + default = "" +} + +variable "worker_sa_scopes" { + type = list(string) + default = ["https://www.googleapis.com/auth/cloud-platform"] +} + +variable ssh_pub_key { + description = "Path to public SSH key file which is injected into the VMs." + type = string +} + +variable ssh_whitelist { + type = list(string) +} + +variable api_server_whitelist { + type = list(string) +} + +variable nodeport_whitelist { + type = list(string) +}