Browse Source

Add graceful upgrade process

Based on #718 introduced by rsmitty.

Includes all roles and all options to support deployment of
new hosts in case they were added to inventory.

Main difference here is that master role is evaluated first
so that master components get upgraded first.

Fixes #694
pull/1029/head
Matthew Mosesohn 7 years ago
parent
commit
97ebbb9672
5 changed files with 96 additions and 22 deletions
  1. 15
      .gitlab-ci.yml
  2. 16
      docs/upgrades.md
  3. 2
      roles/upgrade/post-upgrade/tasks/main.yml
  4. 4
      roles/upgrade/pre-upgrade/tasks/main.yml
  5. 81
      upgrade-cluster.yml

15
.gitlab-ci.yml

@ -101,8 +101,8 @@ before_script:
# Check out latest tag if testing upgrade
# Uncomment when gitlab kargo repo has tags
#- test "${UPGRADE_TEST}" = "true" && git fetch --all && git checkout $(git describe --tags $(git rev-list --tags --max-count=1))
- test "${UPGRADE_TEST}" = "true" && git checkout 031cf565ec3ccd3ebbe80eeef3454c3780e5c598 && pip install ansible==2.2.0
#- test "${UPGRADE_TEST}" != "false" && git fetch --all && git checkout $(git describe --tags $(git rev-list --tags --max-count=1))
- test "${UPGRADE_TEST}" != "false" && git checkout 031cf565ec3ccd3ebbe80eeef3454c3780e5c598 && pip install ansible==2.2.0
# Create cluster
@ -127,9 +127,10 @@ before_script:
cluster.yml
# Repeat deployment if testing upgrade
#FIXME(mattymo): repeat "Create cluster" above without duplicating code
- >
if [ "${UPGRADE_TEST}" = "true" ]; then
if [ "${UPGRADE_TEST}" != "false" ]; then
test "${UPGRADE_TEST}" == "basic" && PLAYBOOK="cluster.yml";
test "${UPGRADE_TEST}" == "graceful" && PLAYBOOK="upgrade-cluster.yml";
pip install ansible==2.2.1.0;
git checkout "${CI_BUILD_REF}";
ansible-playbook -i inventory/inventory.ini -b --become-user=root --private-key=${HOME}/.ssh/id_rsa -u $SSH_USER
@ -149,7 +150,7 @@ before_script:
-e resolvconf_mode=${RESOLVCONF_MODE}
-e weave_cpu_requests=${WEAVE_CPU_LIMIT}
-e weave_cpu_limit=${WEAVE_CPU_LIMIT}
cluster.yml;
$PLAYBOOK;
fi
# Tests Cases
@ -253,7 +254,7 @@ before_script:
KUBE_NETWORK_PLUGIN: canal
CLOUD_IMAGE: debian-8-kubespray
CLOUD_REGION: us-east1-b
UPGRADE_TEST: "true"
UPGRADE_TEST: "basic"
CLUSTER_MODE: ha
.rhel7_weave_variables: &rhel7_weave_variables
@ -261,7 +262,7 @@ before_script:
KUBE_NETWORK_PLUGIN: weave
CLOUD_IMAGE: rhel-7
CLOUD_REGION: europe-west1-b
UPGRADE_TEST: "true"
UPGRADE_TEST: "graceful"
CLUSTER_MODE: default
.centos7_flannel_variables: &centos7_flannel_variables

16
docs/upgrades.md

@ -18,7 +18,7 @@ versions. Here are all version vars for each component:
* flannel_version
* kubedns_version
#### Example
#### Unsafe upgrade example
If you wanted to upgrade just kube_version from v1.4.3 to v1.4.6, you could
deploy the following way:
@ -33,6 +33,20 @@ And then repeat with v1.4.6 as kube_version:
ansible-playbook cluster.yml -i inventory/inventory.cfg -e kube_version=v1.4.6
```
#### Graceful upgrade
Kargo also supports cordon, drain and uncordoning of nodes when performing
a cluster upgrade. There is a separate playbook used for this purpose. It is
important to note that upgrade-cluster.yml can only be used for upgrading an
existing cluster. That means there must be at least 1 kube-master already
deployed.
```
git fetch origin
git checkout origin/master
ansible-playbook upgrade-cluster cluster.yml -i inventory/inventory.cfg
```
#### Upgrade order
As mentioned above, components are upgraded in the order in which they were

2
roles/upgrade/post-upgrade/tasks/main.yml

@ -1,5 +1,5 @@
---
- name: Uncordon node
command: kubectl uncordon {{ ansible_hostname }}
command: "{{ bin_dir }}/kubectl uncordon {{ ansible_hostname }}"
delegate_to: "{{ groups['kube-master'][0] }}"

4
roles/upgrade/pre-upgrade/tasks/main.yml

@ -1,11 +1,11 @@
---
- name: Cordon node
command: kubectl cordon {{ ansible_hostname }}
command: "{{ bin_dir }}/kubectl cordon {{ ansible_hostname }}"
delegate_to: "{{ groups['kube-master'][0] }}"
- name: Drain node
command: kubectl drain --force --ignore-daemonsets --grace-period 30 --delete-local-data {{ ansible_hostname }}
command: "{{ bin_dir }}/kubectl drain --force --ignore-daemonsets --grace-period 30 --delete-local-data {{ ansible_hostname }}"
delegate_to: "{{ groups['kube-master'][0] }}"
- name: Sleep for grace period for draining

81
upgrade-cluster.yml

@ -1,33 +1,92 @@
---
- hosts: all
- hosts: localhost
gather_facts: False
roles:
- bastion-ssh-config
tags: [localhost, bastion]
- hosts: k8s-cluster:etcd:calico-rr
any_errors_fatal: true
gather_facts: false
vars:
# Need to disable pipelining for bootstrap-os as some systems have requiretty in sudoers set, which makes pipelining
# fail. bootstrap-os fixes this on these systems, so in later plays it can be enabled.
ansible_ssh_pipelining: false
roles:
- bootstrap-os
tags:
- bootstrap-os
- hosts: k8s-cluster:etcd:calico-rr
any_errors_fatal: true
vars:
ansible_ssh_pipelining: true
gather_facts: true
- hosts: all:!network-storage
- hosts: k8s-cluster:etcd:calico-rr
any_errors_fatal: true
roles:
- { role: kernel-upgrade, tags: kernel-upgrade, when: kernel_upgrade is defined and kernel_upgrade }
- { role: kubernetes/preinstall, tags: preinstall }
- { role: docker, tags: docker }
- role: rkt
tags: rkt
when: "'rkt' in [etcd_deployment_type, kubelet_deployment_type, vault_deployment_type]"
- hosts: etcd:k8s-cluster:vault
any_errors_fatal: true
roles:
- { role: vault, tags: vault, vault_bootstrap: true, when: "cert_management == 'vault'" }
- hosts: etcd:!k8s-cluster
any_errors_fatal: true
serial: 1
roles:
- { role: etcd, tags: etcd }
- hosts: kube-node
- hosts: k8s-cluster
any_errors_fatal: true
serial: 1
roles:
- { role: etcd, tags: etcd }
- { role: upgrade/pre-upgrade, tags: upgrade/pre-upgrade }
- { role: kubernetes/node, tags: node }
- { role: network_plugin, tags: network }
- { role: upgrade/post-upgrade, tags: upgrade/post-upgrade }
- hosts: etcd:k8s-cluster:vault
any_errors_fatal: true
roles:
- { role: vault, tags: vault, when: "cert_management == 'vault'"}
#Handle upgrades to master components first to maintain backwards compat.
- hosts: kube-master
any_errors_fatal: true
serial: 1
roles:
- { role: etcd, tags: etcd }
- { role: upgrade/pre-upgrade, tags: pre-upgrade }
- { role: kubernetes/node, tags: node }
- { role: kubernetes/master, tags: master }
- { role: kubernetes/master, tags: master }
- { role: network_plugin, tags: network }
- { role: upgrade/post-upgrade, tags: post-upgrade }
#Finally handle worker upgrades, based on given batch size
- hosts: kube-node:!kube-master
any_errors_fatal: true
serial: "{{ serial | default('20%') }}"
roles:
- { role: upgrade/pre-upgrade, tags: pre-upgrade }
- { role: kubernetes/node, tags: node }
- { role: network_plugin, tags: network }
- { role: upgrade/post-upgrade, tags: post-upgrade }
- { role: kubernetes-apps/network_plugin, tags: network }
- hosts: calico-rr
any_errors_fatal: true
roles:
- { role: network_plugin/calico/rr, tags: network }
- hosts: k8s-cluster
any_errors_fatal: true
roles:
- { role: dnsmasq, when: "dns_mode == 'dnsmasq_kubedns'", tags: dnsmasq }
- { role: kubernetes/preinstall, when: "dns_mode != 'none' and resolvconf_mode == 'host_resolvconf'", tags: resolvconf }
- hosts: kube-master[0]
any_errors_fatal: true
roles:
- { role: kubernetes-apps, tags: apps }
Loading…
Cancel
Save