From 45262da7267aea1162c5196ab826f2f7226b98bc Mon Sep 17 00:00:00 2001 From: Cristian Calin <6627509+cristicalin@users.noreply.github.com> Date: Thu, 14 Apr 2022 11:08:46 +0300 Subject: [PATCH] [calico] call calico checks early on to prevent altering the cluster with bad configuration (#8707) --- docs/calico.md | 37 ++++--- .../preinstall/tasks/0020-verify-settings.yml | 85 --------------- roles/kubernetes/preinstall/tasks/main.yml | 8 ++ roles/network_plugin/calico/tasks/check.yml | 102 ++++++++++++++++++ roles/network_plugin/calico/tasks/main.yml | 2 - 5 files changed, 135 insertions(+), 99 deletions(-) diff --git a/docs/calico.md b/docs/calico.md index 51c2ec655..1cec1392c 100644 --- a/docs/calico.md +++ b/docs/calico.md @@ -214,6 +214,13 @@ Calico supports two types of encapsulation: [VXLAN and IP in IP](https://docs.pr *IP in IP* and *VXLAN* is mutualy exclusive modes. +Kubespray defaults have changed after version 2.18 from auto-enabling `ipip` mode to auto-enabling `vxlan`. This was done to facilitate wider deployment scenarios including those where vxlan acceleration is provided by the underlying network devices. + +If you are running your cluster with the default calico settings and are upgrading to a release post 2.18.x (i.e. 2.19 and later or `master` branch) then you have two options: + +* perform a manual migration to vxlan before upgrading kubespray (see migrating from IP in IP to VXLAN below) +* pin the pre-2.19 settings in your ansible inventory (see IP in IP mode settings below) + ### IP in IP mode To configure Ip in Ip mode you need to use the bird network backend. @@ -224,27 +231,33 @@ calico_vxlan_mode: 'Never' calico_network_backend: 'bird' ``` -### VXLAN mode (default) +### BGP mode -To configure VXLAN mode you can use the default settings, the example below is provided for your reference. +To enable BGP no-encapsulation mode: ```yml calico_ipip_mode: 'Never' -calico_vxlan_mode: 'Always' # Possible values is `Always`, `CrossSubnet`, `Never`. -calico_network_backend: 'vxlan' +calico_vxlan_mode: 'Never' +calico_network_backend: 'bird' ``` -In VXLAN mode BGP networking is not required. -We disable BGP to reduce the moving parts in your cluster by `calico_network_backend: vxlan` +### Migrating from IP in IP to VXLAN -### BGP mode +If you would like to migrate from the old IP in IP with `bird` network backends default to the new VXLAN based encapsulation you need to perform this change before running an upgrade of your cluster; the `cluster.yml` and `upgrade-cluster.yml` playbooks will refuse to continue if they detect incompatible settings. -To enable BGP no-encapsulation mode: +Execute the following sters on one of the control plane nodes, ensure the cluster in healthy before proceeding. -```yml -calico_ipip_mode: 'Never' -calico_vxlan_mode: 'Never' -calico_network_backend: 'bird' +```shell +calicoctl.sh patch felixconfig default -p '{"spec":{"vxlanEnabled":true}}' +calicoctl.sh patch ippool default-pool -p '{"spec":{"ipipMode":"Never", "vxlanMode":"Always"}}' +``` + +**Note:** if you created multiple ippools you will need to patch all of them individually to change their encapsulation. The kubespray playbooks only handle the default ippool creaded by kubespray. + +Wait for the `vxlan.calico` interfaces to be created on all cluster nodes and traffic to be routed through it then you can disable `ipip`. + +```shell +calicoctl.sh patch felixconfig default -p '{"spec":{"ipipEnabled":false}}' ``` ## Configuring interface MTU diff --git a/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml b/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml index 08f4eaeb1..eae32a4c6 100644 --- a/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml +++ b/roles/kubernetes/preinstall/tasks/0020-verify-settings.yml @@ -36,44 +36,6 @@ - kube_network_plugin is defined - not ignore_assert_errors -- name: Stop if legacy encapsulation variables are detected (ipip) - assert: - that: - - ipip is not defined - msg: "'ipip' configuration variable is deprecated, please configure your inventory with 'calico_ipip_mode' set to 'Always' or 'CrossSubnet' according to your specific needs" - when: - - kube_network_plugin == 'calico' - - not ignore_assert_errors - -- name: Stop if legacy encapsulation variables are detected (ipip_mode) - assert: - that: - - ipip_mode is not defined - msg: "'ipip_mode' configuration variable is deprecated, please configure your inventory with 'calico_ipip_mode' set to 'Always' or 'CrossSubnet' according to your specific needs" - when: - - kube_network_plugin == 'calico' - - not ignore_assert_errors - -- name: Stop if incompatible network plugin and cloudprovider - assert: - that: - - calico_ipip_mode == 'Never' - - calico_vxlan_mode in ['Always', 'CrossSubnet'] - msg: "When using cloud_provider azure and network_plugin calico calico_ipip_mode must be 'Never' and calico_vxlan_mode 'Always' or 'CrossSubnet'" - when: - - cloud_provider is defined and cloud_provider == 'azure' - - kube_network_plugin == 'calico' - - not ignore_assert_errors - -- name: Stop if supported Calico versions - assert: - that: - - "calico_version in calico_crds_archive_checksums.keys()" - msg: "Calico version not supported {{ calico_version }} not in {{ calico_crds_archive_checksums.keys() }}" - when: - - kube_network_plugin == 'calico' - - not ignore_assert_errors - - name: Stop if unsupported version of Kubernetes assert: that: kube_version is version(kube_version_min_required, '>=') @@ -200,53 +162,6 @@ - cloud-provider - facts -- name: Get current calico cluster version - shell: "set -o pipefail && {{ bin_dir }}/calicoctl.sh version | grep 'Cluster Version:' | awk '{ print $3}'" - args: - executable: /bin/bash - register: calico_version_on_server - async: 10 - poll: 3 - run_once: yes - changed_when: false - failed_when: false - when: - - kube_network_plugin == 'calico' - -- name: Check that current calico version is enough for upgrade - assert: - that: - - calico_version_on_server.stdout is version(calico_min_version_required, '>=') - msg: > - Your version of calico is not fresh enough for upgrade. - Minimum version is {{ calico_min_version_required }} supported by the previous kubespray release. - when: - - kube_network_plugin == 'calico' - - 'calico_version_on_server.stdout is defined' - - calico_version_on_server.stdout - - inventory_hostname == groups['kube_control_plane'][0] - run_once: yes - -- name: "Check that cluster_id is set if calico_rr enabled" - assert: - that: - - cluster_id is defined - msg: "A unique cluster_id is required if using calico_rr" - when: - - kube_network_plugin == 'calico' - - peer_with_calico_rr - - inventory_hostname == groups['kube_control_plane'][0] - run_once: yes - -- name: "Check that calico_rr nodes are in k8s_cluster group" - assert: - that: - - '"k8s_cluster" in group_names' - msg: "calico_rr must be a child group of k8s_cluster group" - when: - - kube_network_plugin == 'calico' - - '"calico_rr" in group_names' - - name: "Check that kube_service_addresses is a network range" assert: that: diff --git a/roles/kubernetes/preinstall/tasks/main.yml b/roles/kubernetes/preinstall/tasks/main.yml index 718f36092..495be4abb 100644 --- a/roles/kubernetes/preinstall/tasks/main.yml +++ b/roles/kubernetes/preinstall/tasks/main.yml @@ -117,3 +117,11 @@ - ansible_os_family == "RedHat" tags: - bootstrap-os + +- name: Run calico checks + include_role: + name: network_plugin/calico + tasks_from: check + when: + - kube_network_plugin == 'calico' + - not ignore_assert_errors diff --git a/roles/network_plugin/calico/tasks/check.yml b/roles/network_plugin/calico/tasks/check.yml index 41bf77cd3..0b164534a 100644 --- a/roles/network_plugin/calico/tasks/check.yml +++ b/roles/network_plugin/calico/tasks/check.yml @@ -1,16 +1,102 @@ --- +- name: Stop if legacy encapsulation variables are detected (ipip) + assert: + that: + - ipip is not defined + msg: "'ipip' configuration variable is deprecated, please configure your inventory with 'calico_ipip_mode' set to 'Always' or 'CrossSubnet' according to your specific needs" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + +- name: Stop if legacy encapsulation variables are detected (ipip_mode) + assert: + that: + - ipip_mode is not defined + msg: "'ipip_mode' configuration variable is deprecated, please configure your inventory with 'calico_ipip_mode' set to 'Always' or 'CrossSubnet' according to your specific needs" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + +- name: Stop if incompatible network plugin and cloudprovider + assert: + that: + - calico_ipip_mode == 'Never' + - calico_vxlan_mode in ['Always', 'CrossSubnet'] + msg: "When using cloud_provider azure and network_plugin calico calico_ipip_mode must be 'Never' and calico_vxlan_mode 'Always' or 'CrossSubnet'" + when: + - cloud_provider is defined and cloud_provider == 'azure' + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + +- name: Stop if supported Calico versions + assert: + that: + - "calico_version in calico_crds_archive_checksums.keys()" + msg: "Calico version not supported {{ calico_version }} not in {{ calico_crds_archive_checksums.keys() }}" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + +- name: Get current calico cluster version + shell: "set -o pipefail && {{ bin_dir }}/calicoctl.sh version | grep 'Cluster Version:' | awk '{ print $3}'" + args: + executable: /bin/bash + register: calico_version_on_server + async: 10 + poll: 3 + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + changed_when: false + failed_when: false + +- name: Check that current calico version is enough for upgrade + assert: + that: + - calico_version_on_server.stdout is version(calico_min_version_required, '>=') + msg: > + Your version of calico is not fresh enough for upgrade. + Minimum version is {{ calico_min_version_required }} supported by the previous kubespray release. + when: + - 'calico_version_on_server.stdout is defined' + - calico_version_on_server.stdout + - inventory_hostname == groups['kube_control_plane'][0] + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + +- name: "Check that cluster_id is set if calico_rr enabled" + assert: + that: + - cluster_id is defined + msg: "A unique cluster_id is required if using calico_rr" + when: + - peer_with_calico_rr + - inventory_hostname == groups['kube_control_plane'][0] + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + +- name: "Check that calico_rr nodes are in k8s_cluster group" + assert: + that: + - '"k8s_cluster" in group_names' + msg: "calico_rr must be a child group of k8s_cluster group" + when: + - '"calico_rr" in group_names' + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" + - name: "Check vars defined correctly" assert: that: - "calico_pool_name is defined" - "calico_pool_name is match('^[a-zA-Z0-9-_\\\\.]{2,63}$')" msg: "calico_pool_name contains invalid characters" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Check calico network backend defined correctly" assert: that: - "calico_network_backend in ['bird', 'vxlan', 'none']" msg: "calico network backend is not 'bird', 'vxlan' or 'none'" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Check ipip and vxlan mode defined correctly" assert: @@ -18,6 +104,8 @@ - "calico_ipip_mode in ['Always', 'CrossSubnet', 'Never']" - "calico_vxlan_mode in ['Always', 'CrossSubnet', 'Never']" msg: "calico inter host encapsulation mode is not 'Always', 'CrossSubnet' or 'Never'" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Check ipip and vxlan mode if simultaneously enabled" assert: @@ -26,6 +114,8 @@ msg: "IP in IP and VXLAN mode is mutualy exclusive modes" when: - "calico_ipip_mode in ['Always', 'CrossSubnet']" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Check ipip and vxlan mode if simultaneously enabled" assert: @@ -34,6 +124,8 @@ msg: "IP in IP and VXLAN mode is mutualy exclusive modes" when: - "calico_vxlan_mode in ['Always', 'CrossSubnet']" + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Get Calico {{ calico_pool_name }} configuration" command: calicoctl.sh get ipPool {{ calico_pool_name }} -o json @@ -48,6 +140,8 @@ set_fact: calico_pool_conf: '{{ calico.stdout | from_json }}' when: calico.rc == 0 and calico.stdout + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Check if inventory match current cluster configuration" assert: @@ -59,15 +153,23 @@ msg: "Your inventory doesn't match the current cluster configuration" when: - calico_pool_conf is defined + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Check kdd calico_datastore if calico_apiserver_enabled" assert: that: calico_datastore == "kdd" + msg: "When using calico apiserver you need to use the kubernetes datastore" when: - calico_apiserver_enabled + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" - name: "Check kdd calico_datastore if typha_enabled" assert: that: calico_datastore == "kdd" + msg: "When using typha you need to use the kubernetes datastore" when: - typha_enabled + run_once: True + delegate_to: "{{ groups['kube_control_plane'][0] }}" diff --git a/roles/network_plugin/calico/tasks/main.yml b/roles/network_plugin/calico/tasks/main.yml index df5812036..81844fa4f 100644 --- a/roles/network_plugin/calico/tasks/main.yml +++ b/roles/network_plugin/calico/tasks/main.yml @@ -1,6 +1,4 @@ --- -- import_tasks: check.yml - - import_tasks: pre.yml - import_tasks: repos.yml