From 2826b357d43a602957f813756e5c612172baedb7 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Mon, 30 Sep 2024 14:24:51 +0200 Subject: [PATCH 1/3] Remove serialized collect of ansible_default_ipv4 The fallback_ips tasks are essentially serializing the gathering of one fact on all the hosts, which can have dramatic performance implications on large clusters (several minutes). This is essentially a reversal of 35f248dff0ddb430e2293af98ba73aa5062c89c1 Being able to run without refreshing the cache facts is not worth it. We keep fallback_ip for now, simply changing the access to a normal hostvars variable instead of a custom dictionnary. --- roles/etcd/tasks/join_etcd-events_member.yml | 2 +- roles/etcd/tasks/join_etcd_member.yml | 2 +- roles/etcd/templates/openssl.conf.j2 | 2 +- .../control-plane/tasks/kubeadm-upgrade.yml | 2 +- .../templates/kubeadm-config.v1beta3.yaml.j2 | 2 +- roles/kubernetes/node/defaults/main.yml | 4 +-- .../templates/loadbalancer/haproxy.cfg.j2 | 2 +- .../node/templates/loadbalancer/nginx.conf.j2 | 2 +- .../kubespray-defaults/defaults/main/main.yml | 16 ++++----- .../kubespray-defaults/tasks/fallback_ips.yml | 35 ------------------- roles/kubespray-defaults/tasks/main.yaml | 17 +++++++-- roles/kubespray-defaults/tasks/no_proxy.yml | 2 +- .../network_plugin/flannel/defaults/main.yml | 2 +- .../network_plugin/kube-ovn/defaults/main.yml | 2 +- 14 files changed, 34 insertions(+), 58 deletions(-) delete mode 100644 roles/kubespray-defaults/tasks/fallback_ips.yml diff --git a/roles/etcd/tasks/join_etcd-events_member.yml b/roles/etcd/tasks/join_etcd-events_member.yml index 106f06e03..10dd1c635 100644 --- a/roles/etcd/tasks/join_etcd-events_member.yml +++ b/roles/etcd/tasks/join_etcd-events_member.yml @@ -19,7 +19,7 @@ etcd_events_peer_addresses: >- {% for host in groups['etcd'] -%} {%- if hostvars[host]['etcd_events_member_in_cluster'].rc == 0 -%} - {{ "etcd" + loop.index | string }}=https://{{ hostvars[host].etcd_events_access_address | default(hostvars[host].ip | default(fallback_ips[host])) }}:2382, + {{ "etcd" + loop.index | string }}=https://{{ hostvars[host].etcd_events_access_address | default(hostvars[host].ip | default(hostvars[host]['fallback_ip'])) }}:2382, {%- endif -%} {%- if loop.last -%} {{ etcd_member_name }}={{ etcd_events_peer_url }} diff --git a/roles/etcd/tasks/join_etcd_member.yml b/roles/etcd/tasks/join_etcd_member.yml index a2e37714d..7599d7d26 100644 --- a/roles/etcd/tasks/join_etcd_member.yml +++ b/roles/etcd/tasks/join_etcd_member.yml @@ -20,7 +20,7 @@ etcd_peer_addresses: >- {% for host in groups['etcd'] -%} {%- if hostvars[host]['etcd_member_in_cluster'].rc == 0 -%} - {{ "etcd" + loop.index | string }}=https://{{ hostvars[host].etcd_access_address | default(hostvars[host].ip | default(fallback_ips[host])) }}:2380, + {{ "etcd" + loop.index | string }}=https://{{ hostvars[host].etcd_access_address | default(hostvars[host].ip | default(hostvars[host]['fallback_ip'])) }}:2380, {%- endif -%} {%- if loop.last -%} {{ etcd_member_name }}={{ etcd_peer_url }} diff --git a/roles/etcd/templates/openssl.conf.j2 b/roles/etcd/templates/openssl.conf.j2 index b4a2d43d6..6ac5dd410 100644 --- a/roles/etcd/templates/openssl.conf.j2 +++ b/roles/etcd/templates/openssl.conf.j2 @@ -42,7 +42,7 @@ DNS.{{ counter["dns"] }} = {{ etcd_alt_name }}{{ increment(counter, 'dns') }} {% if hostvars[host]['access_ip'] is defined %} IP.{{ counter["ip"] }} = {{ hostvars[host]['access_ip'] }}{{ increment(counter, 'ip') }} {% endif %} -IP.{{ counter["ip"] }} = {{ hostvars[host]['ip'] | default(fallback_ips[host]) }}{{ increment(counter, 'ip') }} +IP.{{ counter["ip"] }} = {{ hostvars[host]['ip'] | default(hostvars[host]['fallback_ip']) }}{{ increment(counter, 'ip') }} {% endfor %} {% for cert_alt_ip in etcd_cert_alt_ips %} IP.{{ counter["ip"] }} = {{ cert_alt_ip }}{{ increment(counter, 'ip') }} diff --git a/roles/kubernetes/control-plane/tasks/kubeadm-upgrade.yml b/roles/kubernetes/control-plane/tasks/kubeadm-upgrade.yml index 99d351e17..c9dbabd44 100644 --- a/roles/kubernetes/control-plane/tasks/kubeadm-upgrade.yml +++ b/roles/kubernetes/control-plane/tasks/kubeadm-upgrade.yml @@ -1,7 +1,7 @@ --- - name: Kubeadm | Check api is up uri: - url: "https://{{ ip | default(fallback_ips[inventory_hostname]) }}:{{ kube_apiserver_port }}/healthz" + url: "https://{{ ip | default(fallback_ip) }}:{{ kube_apiserver_port }}/healthz" validate_certs: false when: ('kube_control_plane' in group_names) register: _result diff --git a/roles/kubernetes/control-plane/templates/kubeadm-config.v1beta3.yaml.j2 b/roles/kubernetes/control-plane/templates/kubeadm-config.v1beta3.yaml.j2 index 1a9190829..f8c32c448 100644 --- a/roles/kubernetes/control-plane/templates/kubeadm-config.v1beta3.yaml.j2 +++ b/roles/kubernetes/control-plane/templates/kubeadm-config.v1beta3.yaml.j2 @@ -108,7 +108,7 @@ kubernetesVersion: {{ kube_version }} {% if kubeadm_config_api_fqdn is defined %} controlPlaneEndpoint: {{ kubeadm_config_api_fqdn }}:{{ loadbalancer_apiserver.port | default(kube_apiserver_port) }} {% else %} -controlPlaneEndpoint: {{ ip | default(fallback_ips[inventory_hostname]) }}:{{ kube_apiserver_port }} +controlPlaneEndpoint: {{ ip | default(fallback_ip) }}:{{ kube_apiserver_port }} {% endif %} certificatesDir: {{ kube_cert_dir }} imageRepository: {{ kube_image_repo }} diff --git a/roles/kubernetes/node/defaults/main.yml b/roles/kubernetes/node/defaults/main.yml index a0ab20854..d37878840 100644 --- a/roles/kubernetes/node/defaults/main.yml +++ b/roles/kubernetes/node/defaults/main.yml @@ -1,6 +1,6 @@ --- # advertised host IP for kubelet. This affects network plugin config. Take caution -kubelet_address: "{{ ip | default(fallback_ips[inventory_hostname]) }}{{ (',' + ip6) if enable_dual_stack_networks and ip6 is defined else '' }}" +kubelet_address: "{{ ip | default(fallback_ip) }}{{ (',' + ip6) if enable_dual_stack_networks and ip6 is defined else '' }}" # bind address for kubelet. Set to 0.0.0.0 to listen on all interfaces kubelet_bind_address: "{{ ip | default('0.0.0.0') }}" @@ -29,7 +29,7 @@ kubelet_systemd_wants_dependencies: [] # List of secure IPs for kubelet kube_node_addresses: >- {%- for host in (groups['k8s_cluster'] | union(groups['etcd'])) -%} - {{ hostvars[host]['ip'] | default(fallback_ips[host]) }}{{ ' ' if not loop.last else '' }} + {{ hostvars[host]['ip'] | default(hostvars[host]['fallback_ip']) }}{{ ' ' if not loop.last else '' }} {%- endfor -%} kubelet_secure_addresses: "localhost link-local {{ kube_pods_subnet }} {{ kube_node_addresses }}" diff --git a/roles/kubernetes/node/templates/loadbalancer/haproxy.cfg.j2 b/roles/kubernetes/node/templates/loadbalancer/haproxy.cfg.j2 index c62932506..1cd8b4114 100644 --- a/roles/kubernetes/node/templates/loadbalancer/haproxy.cfg.j2 +++ b/roles/kubernetes/node/templates/loadbalancer/haproxy.cfg.j2 @@ -45,5 +45,5 @@ backend kube_api_backend option httpchk GET /healthz http-check expect status 200 {% for host in groups['kube_control_plane'] -%} - server {{ host }} {{ hostvars[host]['access_ip'] | default(hostvars[host]['ip'] | default(fallback_ips[host])) }}:{{ kube_apiserver_port }} check check-ssl verify none + server {{ host }} {{ hostvars[host]['access_ip'] | default(hostvars[host]['ip'] | default(hostvars[host]['fallback_ip'])) }}:{{ kube_apiserver_port }} check check-ssl verify none {% endfor -%} diff --git a/roles/kubernetes/node/templates/loadbalancer/nginx.conf.j2 b/roles/kubernetes/node/templates/loadbalancer/nginx.conf.j2 index 07b937042..d6b5cce4e 100644 --- a/roles/kubernetes/node/templates/loadbalancer/nginx.conf.j2 +++ b/roles/kubernetes/node/templates/loadbalancer/nginx.conf.j2 @@ -14,7 +14,7 @@ stream { upstream kube_apiserver { least_conn; {% for host in groups['kube_control_plane'] -%} - server {{ hostvars[host]['access_ip'] | default(hostvars[host]['ip'] | default(fallback_ips[host])) }}:{{ kube_apiserver_port }}; + server {{ hostvars[host]['access_ip'] | default(hostvars[host]['ip'] | default(hostvars[host]['fallback_ip'])) }}:{{ kube_apiserver_port }}; {% endfor -%} } diff --git a/roles/kubespray-defaults/defaults/main/main.yml b/roles/kubespray-defaults/defaults/main/main.yml index 55dce775d..8da49ae34 100644 --- a/roles/kubespray-defaults/defaults/main/main.yml +++ b/roles/kubespray-defaults/defaults/main/main.yml @@ -535,9 +535,9 @@ ssl_ca_dirs: |- # Vars for pointing to kubernetes api endpoints kube_apiserver_count: "{{ groups['kube_control_plane'] | length }}" -kube_apiserver_address: "{{ ip | default(fallback_ips[inventory_hostname]) }}" +kube_apiserver_address: "{{ ip | default(hostvars[inventory_hostname]['fallback_ip']) }}" kube_apiserver_access_address: "{{ access_ip | default(kube_apiserver_address) }}" -first_kube_control_plane_address: "{{ hostvars[groups['kube_control_plane'][0]]['access_ip'] | default(hostvars[groups['kube_control_plane'][0]]['ip'] | default(fallback_ips[groups['kube_control_plane'][0]])) }}" +first_kube_control_plane_address: "{{ hostvars[groups['kube_control_plane'][0]]['access_ip'] | default(hostvars[groups['kube_control_plane'][0]]['ip'] | default(hostvars[groups['kube_control_plane'][0]]['fallback_ip'])) }}" loadbalancer_apiserver_localhost: "{{ loadbalancer_apiserver is not defined }}" loadbalancer_apiserver_type: "nginx" # applied if only external loadbalancer_apiserver is defined, otherwise ignored @@ -570,7 +570,7 @@ etcd_events_cluster_enabled: false etcd_hosts: "{{ groups['etcd'] | default(groups['kube_control_plane']) }}" # Vars for pointing to etcd endpoints -etcd_address: "{{ ip | default(fallback_ips[inventory_hostname]) }}" +etcd_address: "{{ ip | default(fallback_ip) }}" etcd_access_address: "{{ access_ip | default(etcd_address) }}" etcd_events_access_address: "{{ access_ip | default(etcd_address) }}" etcd_peer_url: "https://{{ etcd_access_address }}:2380" @@ -579,17 +579,17 @@ etcd_events_peer_url: "https://{{ etcd_events_access_address }}:2382" etcd_events_client_url: "https://{{ etcd_events_access_address }}:2383" etcd_access_addresses: |- {% for item in etcd_hosts -%} - https://{{ hostvars[item]['etcd_access_address'] | default(hostvars[item]['ip'] | default(fallback_ips[item])) }}:2379{% if not loop.last %},{% endif %} + https://{{ hostvars[item]['etcd_access_address'] | default(hostvars[item]['ip'] | default(hostvars[item]['fallback_ip'])) }}:2379{% if not loop.last %},{% endif %} {%- endfor %} etcd_events_access_addresses_list: |- [ {% for item in etcd_hosts -%} - 'https://{{ hostvars[item]['etcd_events_access_address'] | default(hostvars[item]['ip'] | default(fallback_ips[item])) }}:2383'{% if not loop.last %},{% endif %} + 'https://{{ hostvars[item]['etcd_events_access_address'] | default(hostvars[item]['ip'] | default(hostvars[item]['fallback_ip'])) }}:2383'{% if not loop.last %},{% endif %} {%- endfor %} ] etcd_metrics_addresses: |- {% for item in etcd_hosts -%} - https://{{ hostvars[item]['etcd_access_address'] | default(hostvars[item]['ip'] | default(fallback_ips[item])) }}:{{ etcd_metrics_port | default(2381) }}{% if not loop.last %},{% endif %} + https://{{ hostvars[item]['etcd_access_address'] | default(hostvars[item]['ip'] | default(hostvars[item]['fallback_ip'])) }}:{{ etcd_metrics_port | default(2381) }}{% if not loop.last %},{% endif %} {%- endfor %} etcd_events_access_addresses: "{{ etcd_events_access_addresses_list | join(',') }}" etcd_events_access_addresses_semicolon: "{{ etcd_events_access_addresses_list | join(';') }}" @@ -600,11 +600,11 @@ etcd_member_name: |- {% endfor %} etcd_peer_addresses: |- {% for item in groups['etcd'] -%} - {{ hostvars[item].etcd_member_name | default("etcd" + loop.index | string) }}=https://{{ hostvars[item].etcd_access_address | default(hostvars[item].ip | default(fallback_ips[item])) }}:2380{% if not loop.last %},{% endif %} + {{ hostvars[item].etcd_member_name | default("etcd" + loop.index | string) }}=https://{{ hostvars[item].etcd_access_address | default(hostvars[item].ip | default(hostvars[item]['fallback_ip'])) }}:2380{% if not loop.last %},{% endif %} {%- endfor %} etcd_events_peer_addresses: |- {% for item in groups['etcd'] -%} - {{ hostvars[item].etcd_member_name | default("etcd" + loop.index | string) }}-events=https://{{ hostvars[item].etcd_events_access_address | default(hostvars[item].ip | default(fallback_ips[item])) }}:2382{% if not loop.last %},{% endif %} + {{ hostvars[item].etcd_member_name | default("etcd" + loop.index | string) }}-events=https://{{ hostvars[item].etcd_events_access_address | default(hostvars[item].ip | default(hostvars[item]['fallback_ip'])) }}:2382{% if not loop.last %},{% endif %} {%- endfor %} etcd_heartbeat_interval: "250" diff --git a/roles/kubespray-defaults/tasks/fallback_ips.yml b/roles/kubespray-defaults/tasks/fallback_ips.yml deleted file mode 100644 index ae3b15150..000000000 --- a/roles/kubespray-defaults/tasks/fallback_ips.yml +++ /dev/null @@ -1,35 +0,0 @@ ---- -# Set 127.0.0.1 as fallback IP if we do not have host facts for host -# ansible_default_ipv4 isn't what you think. -# Thanks https://medium.com/opsops/ansible-default-ipv4-is-not-what-you-think-edb8ab154b10 - -- name: Gather ansible_default_ipv4 from all hosts or specific hosts - setup: - gather_subset: '!all,network' - filter: "ansible_default_ipv4" - delegate_to: "{{ item }}" - delegate_facts: true - when: hostvars[item].ansible_default_ipv4 is not defined - loop: "{{ (ansible_play_hosts_all + [groups['kube_control_plane'][0]]) | unique if ansible_limit is defined else (groups['k8s_cluster'] | default([]) + groups['etcd'] | default([]) + groups['calico_rr'] | default([])) | unique }}" - run_once: true - ignore_unreachable: true - tags: always - -- name: Create fallback_ips_base - set_fact: - fallback_ips_base: | - --- - {% set search_hosts = (ansible_play_hosts_all + [groups['kube_control_plane'][0]]) | unique if ansible_limit is defined else (groups['k8s_cluster'] | default([]) + groups['etcd'] | default([]) + groups['calico_rr'] | default([])) | unique %} - {% for item in search_hosts %} - {% set found = hostvars[item].get('ansible_default_ipv4') %} - {{ item }}: "{{ found.get('address', '127.0.0.1') }}" - {% endfor %} - delegate_to: localhost - connection: local - delegate_facts: true - become: false - run_once: true - -- name: Set fallback_ips - set_fact: - fallback_ips: "{{ hostvars.localhost.fallback_ips_base | from_yaml }}" diff --git a/roles/kubespray-defaults/tasks/main.yaml b/roles/kubespray-defaults/tasks/main.yaml index 282a4cd60..a26ce63a2 100644 --- a/roles/kubespray-defaults/tasks/main.yaml +++ b/roles/kubespray-defaults/tasks/main.yaml @@ -3,9 +3,20 @@ tags: - always block: - - name: Set fallback_ips - import_tasks: fallback_ips.yml - when: fallback_ips is not defined + - name: Gather ansible_default_ipv4 + setup: + gather_subset: '!all,network' + filter: "ansible_default_ipv4" + when: ansible_default_ipv4 is not defined + ignore_unreachable: true + # Set 127.0.0.1 as fallback IP if we do not have host facts for host + # ansible_default_ipv4 isn't what you think. + # https://medium.com/opsops/ansible-default-ipv4-is-not-what-you-think-edb8ab154b10 + # TODO: discard this and update all the location relying on it in "looping on hostvars" templates + - name: Set fallback_ip + set_fact: + fallback_ip: "{{ ansible_default_ipv4.address | d('127.0.0.1') }}" + when: fallback_ip is not defined - name: Set no_proxy import_tasks: no_proxy.yml diff --git a/roles/kubespray-defaults/tasks/no_proxy.yml b/roles/kubespray-defaults/tasks/no_proxy.yml index c686e655d..4aa85f7f2 100644 --- a/roles/kubespray-defaults/tasks/no_proxy.yml +++ b/roles/kubespray-defaults/tasks/no_proxy.yml @@ -13,7 +13,7 @@ {% set cluster_or_control_plane = 'k8s_cluster' %} {%- endif -%} {%- for item in (groups[cluster_or_control_plane] + groups['etcd'] | default([]) + groups['calico_rr'] | default([])) | unique -%} - {{ hostvars[item]['access_ip'] | default(hostvars[item]['ip'] | default(fallback_ips[item])) }}, + {{ hostvars[item]['access_ip'] | default(hostvars[item]['ip'] | default(hostvars[item]['fallback_ip'])) }}, {%- if item != hostvars[item].get('ansible_hostname', '') -%} {{ hostvars[item]['ansible_hostname'] }}, {{ hostvars[item]['ansible_hostname'] }}.{{ dns_domain }}, diff --git a/roles/network_plugin/flannel/defaults/main.yml b/roles/network_plugin/flannel/defaults/main.yml index 8d7713bb9..6bdbaf039 100644 --- a/roles/network_plugin/flannel/defaults/main.yml +++ b/roles/network_plugin/flannel/defaults/main.yml @@ -2,7 +2,7 @@ # Flannel public IP # The address that flannel should advertise as how to access the system # Disabled until https://github.com/coreos/flannel/issues/712 is fixed -# flannel_public_ip: "{{ access_ip | default(ip | default(fallback_ips[inventory_hostname])) }}" +# flannel_public_ip: "{{ access_ip | default(ip | default(fallback_ip)) }}" ## interface that should be used for flannel operations ## This is actually an inventory cluster-level item diff --git a/roles/network_plugin/kube-ovn/defaults/main.yml b/roles/network_plugin/kube-ovn/defaults/main.yml index 8b962072b..a06cba0b0 100644 --- a/roles/network_plugin/kube-ovn/defaults/main.yml +++ b/roles/network_plugin/kube-ovn/defaults/main.yml @@ -33,7 +33,7 @@ kube_ovn_central_replics: "{{ kube_ovn_central_hosts | length }}" kube_ovn_controller_replics: "{{ kube_ovn_central_hosts | length }}" kube_ovn_central_ips: |- {% for item in kube_ovn_central_hosts -%} - {{ hostvars[item]['ip'] | default(fallback_ips[item]) }}{% if not loop.last %},{% endif %} + {{ hostvars[item]['ip'] | default(hostvars[item]['fallback_ip']) }}{% if not loop.last %},{% endif %} {%- endfor %} kube_ovn_ic_enable: false From 1d032d06d1fef7360dcb8c3dd8849731af8bc507 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Mon, 30 Sep 2024 14:32:15 +0200 Subject: [PATCH 2/3] Docs update on access ip --- docs/ansible/vars.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ansible/vars.md b/docs/ansible/vars.md index 4768c536a..9d91894be 100644 --- a/docs/ansible/vars.md +++ b/docs/ansible/vars.md @@ -35,9 +35,9 @@ Some variables of note include: ## Addressing variables * *ip* - IP to use for binding services (host var). This would **usually** be the public ip. -* *access_ip* - IP for other hosts to use to connect to. Often required when - deploying from a cloud, such as OpenStack or GCE and you have separate - public/floating and private IPs. This would **usually** be the private ip. +* *access_ip* - IP to use from other hosts to connect to this host. Often required when deploying + from a cloud, such as OpenStack or GCE and you have separate public/floating and private IPs. + This would **usually** be the private ip. * *ansible_default_ipv4.address* - Not Kubespray-specific, but it is used if ip and access_ip are undefined * *ip6* - IPv6 address to use for binding services. (host var) From b0fb06054e6a6b406e2836a919ce3d7b2ff1f5d2 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Fri, 4 Oct 2024 09:46:30 +0200 Subject: [PATCH 3/3] Assert correct limit usage This should prevent confusing errors with undefined variables --- .../preinstall/tasks/0040-verify-settings.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/roles/kubernetes/preinstall/tasks/0040-verify-settings.yml b/roles/kubernetes/preinstall/tasks/0040-verify-settings.yml index 867cfb2ed..e06b22417 100644 --- a/roles/kubernetes/preinstall/tasks/0040-verify-settings.yml +++ b/roles/kubernetes/preinstall/tasks/0040-verify-settings.yml @@ -1,4 +1,19 @@ --- +- name: Stop if any host not in '--limit' does not have a fact cache + vars: + uncached_hosts: "{{ (hostvars | selectattr('ansible_default_ipv4', 'undefined')).keys() }}" + excluded_hosts: "{{ hostvars.keys() | difference(lookup('inventory_hostnames', ansible_limit)) }}" + assert: + that: uncached_hosts | intersect(excluded_hosts) == 0 + fail_msg: | + Kubespray does not support '--limit' without a populated facts cache for the excluded hosts. + Please run the facts.yml playbook first without '--limit'. + The following excluded hosts are not cached: {{ uncached_hosts | intersect(excluded_hosts) }} + run_once: true + when: + - ansible_limit is defined + - not ignore_assert_errors + - name: Stop if kube_control_plane group is empty assert: that: groups.get( 'kube_control_plane' )