Browse Source

Systemd units, limits, and bin path fixes

* Add restart for weave service unit
* Reuse docker_bin_dir everythere
* Limit systemd managed docker containers by CPU/RAM. Do not configure native
  systemd limits due to the lack of consensus in the kernel community
  requires out-of-tree kernel patches.

Signed-off-by: Bogdan Dobrelya <bdobrelia@mirantis.com>
pull/816/head
Bogdan Dobrelya 8 years ago
committed by Bogdan Dobrelya
parent
commit
a56d9de502
46 changed files with 237 additions and 50 deletions
  1. 7
      docs/large-deployments.md
  2. 1
      inventory/group_vars/all.yml
  3. 2
      roles/docker/handlers/main.yml
  4. 2
      roles/docker/templates/docker.service.j2
  5. 6
      roles/download/tasks/main.yml
  6. 2
      roles/download/tasks/set_docker_image_facts.yml
  7. 4
      roles/etcd/defaults/main.yml
  8. 8
      roles/etcd/tasks/install.yml
  9. 4
      roles/etcd/tasks/pre_upgrade.yml
  10. 4
      roles/etcd/templates/deb-etcd-docker.initd.j2
  11. 9
      roles/etcd/templates/etcd-docker.service.j2
  12. 20
      roles/kubernetes-apps/ansible/defaults/main.yml
  13. 7
      roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2
  14. 7
      roles/kubernetes-apps/ansible/templates/netchecker-agent-ds.yml
  15. 7
      roles/kubernetes-apps/ansible/templates/netchecker-agent-hostnet-ds.yml
  16. 14
      roles/kubernetes-apps/ansible/templates/netchecker-server-pod.yml
  17. 14
      roles/kubernetes/master/defaults/main.yml
  18. 2
      roles/kubernetes/master/tasks/main.yml
  19. 7
      roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2
  20. 7
      roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2
  21. 7
      roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2
  22. 12
      roles/kubernetes/node/defaults/main.yml
  23. 2
      roles/kubernetes/node/templates/deb-kubelet.initd.j2
  24. 3
      roles/kubernetes/node/templates/kubelet-container.j2
  25. 4
      roles/kubernetes/node/templates/kubelet.service.j2
  26. 7
      roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2
  27. 7
      roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2
  28. 14
      roles/network_plugin/calico/defaults/main.yml
  29. 7
      roles/network_plugin/calico/rr/templates/calico-rr.service.j2
  30. 4
      roles/network_plugin/calico/tasks/main.yml
  31. 7
      roles/network_plugin/calico/templates/calico-node.service.j2
  32. 5
      roles/network_plugin/calico/templates/calicoctl-container.j2
  33. 10
      roles/network_plugin/canal/defaults/main.yml
  34. 4
      roles/network_plugin/canal/tasks/main.yml
  35. 14
      roles/network_plugin/canal/templates/canal-node.yml.j2
  36. 2
      roles/network_plugin/cloud/tasks/main.yml
  37. 6
      roles/network_plugin/flannel/defaults/main.yml
  38. 2
      roles/network_plugin/flannel/handlers/main.yml
  39. 10
      roles/network_plugin/flannel/templates/flannel-pod.yml
  40. 4
      roles/network_plugin/weave/defaults/main.yml
  41. 2
      roles/network_plugin/weave/tasks/main.yml
  42. 1
      roles/network_plugin/weave/templates/weave.j2
  43. 5
      roles/network_plugin/weave/templates/weave.service.j2
  44. 6
      roles/network_plugin/weave/templates/weaveproxy.service.j2
  45. 2
      roles/reset/tasks/main.yml
  46. 6
      scripts/collect-info.yaml

7
docs/large-deployments.md

@ -20,5 +20,12 @@ For a large scaled deployments, consider the following configuration changes:
``dns_cpu_requests``, ``dns_memory_limit``, ``dns_memory_requests``.
Please note that limits must always be greater than or equal to requests.
* Tune CPU/memory limits and requests. Those are located in roles' defaults
and named like ``foo_memory_limit``, ``foo_memory_requests`` and
``foo_cpu_limit``, ``foo_cpu_requests``. Note that 'Mi' memory units for K8s
will be submitted as 'M', if applied for ``docker run``, and cpu K8s units will
end up with the 'm' skipped for docker as well. This is required as docker does not
understand k8s units well.
For example, when deploying 200 nodes, you may want to run ansible with
``--forks=50``, ``--timeout=600`` and define the ``retry_stagger: 60``.

1
inventory/group_vars/all.yml

@ -188,6 +188,7 @@ docker_daemon_graph: "/var/lib/docker"
## An obvious use case is allowing insecure-registry access
## to self hosted registries like so:
docker_options: "--insecure-registry={{ kube_service_addresses }} --graph={{ docker_daemon_graph }}"
docker_bin_dir: "/usr/bin"
## Uncomment this if you want to force overlay/overlay2 as docker storage driver
## Please note that overlay2 is only supported on newer kernels

2
roles/docker/handlers/main.yml

@ -27,7 +27,7 @@
pause: seconds=10 prompt="Waiting for docker restart"
- name: Docker | wait for docker
command: /usr/bin/docker images
command: "{{ docker_bin_dir }}/docker images"
register: docker_ready
retries: 10
delay: 5

2
roles/docker/templates/docker.service.j2

@ -18,7 +18,7 @@ Environment=GOTRACEBACK=crash
ExecReload=/bin/kill -s HUP $MAINPID
Delegate=yes
KillMode=process
ExecStart=/usr/bin/docker daemon \
ExecStart={{ docker_bin_dir }}/docker daemon \
$DOCKER_OPTS \
$DOCKER_STORAGE_OPTIONS \
$DOCKER_NETWORK_OPTIONS \

6
roles/download/tasks/main.yml

@ -79,7 +79,7 @@
#NOTE(bogdando) this brings no docker-py deps for nodes
- name: Download containers if pull is required or told to always pull
command: "/usr/bin/docker pull {{ pull_args }}"
command: "{{ docker_bin_dir }}/docker pull {{ pull_args }}"
register: pull_task_result
until: pull_task_result|success
retries: 4
@ -115,7 +115,7 @@
tags: facts
- name: Download | save container images
shell: docker save "{{ pull_args }}" | gzip -{{ download_compress }} > "{{ fname }}"
shell: "{{ docker_bin_dir }}/docker save {{ pull_args }} | gzip -{{ download_compress }} > {{ fname }}"
delegate_to: "{{ download_delegate }}"
register: saved
run_once: true
@ -145,6 +145,6 @@
tags: [upload, upgrade]
- name: Download | load container images
shell: docker load < "{{ fname }}"
shell: "{{ docker_bin_dir }}/docker load < {{ fname }}"
when: (ansible_os_family != "CoreOS" and inventory_hostname != groups['kube-master'][0] or download_delegate == "localhost") and download_run_once|bool and download.enabled|bool and download.container|bool
tags: [upload, upgrade]

2
roles/download/tasks/set_docker_image_facts.yml

@ -8,7 +8,7 @@
{%- if pull_by_digest|bool %}{{download.repo}}@sha256:{{download.sha256}}{%- else -%}{{download.repo}}:{{download.tag}}{%- endif -%}
- name: Register docker images info
shell: "{% raw %}/usr/bin/docker images -q | xargs /usr/bin/docker inspect -f '{{.RepoTags}},{{.RepoDigests}}'{% endraw %}"
shell: "{{ docker_bin_dir }}/docker images -q | xargs {{ docker_bin_dir }}/docker inspect -f {% raw %}'{{.RepoTags}},{{.RepoDigests}}'{% endraw %}"
register: docker_images_raw
failed_when: false
when: not download_always_pull|bool

4
roles/etcd/defaults/main.yml

@ -6,3 +6,7 @@ etcd_cert_dir: "{{ etcd_config_dir }}/ssl"
etcd_cert_group: root
etcd_script_dir: "{{ bin_dir }}/etcd-scripts"
# Limits
etcd_memory_limit: 512M
etcd_cpu_limit: 300m

8
roles/etcd/tasks/install.yml

@ -12,10 +12,10 @@
#Plan A: no docker-py deps
- name: Install | Copy etcdctl binary from container
command: sh -c "/usr/bin/docker rm -f etcdctl-binarycopy;
/usr/bin/docker create --name etcdctl-binarycopy {{ etcd_image_repo }}:{{ etcd_image_tag }} &&
/usr/bin/docker cp etcdctl-binarycopy:{{ etcd_container_bin_dir }}etcdctl {{ bin_dir }}/etcdctl &&
/usr/bin/docker rm -f etcdctl-binarycopy"
command: sh -c "{{ docker_bin_dir }}/docker rm -f etcdctl-binarycopy;
{{ docker_bin_dir }}/docker create --name etcdctl-binarycopy {{ etcd_image_repo }}:{{ etcd_image_tag }} &&
{{ docker_bin_dir }}/docker cp etcdctl-binarycopy:{{ etcd_container_bin_dir }}etcdctl {{ bin_dir }}/etcdctl &&
{{ docker_bin_dir }}/docker rm -f etcdctl-binarycopy"
when: etcd_deployment_type == "docker"
register: etcd_task_result
until: etcd_task_result.rc == 0

4
roles/etcd/tasks/pre_upgrade.yml

@ -26,12 +26,12 @@
- /etc/init.d/etcd-proxy
- name: "Pre-upgrade | find etcd-proxy container"
command: docker ps -aq --filter "name=etcd-proxy*"
command: "{{ docker_bin_dir }}/docker ps -aq --filter 'name=etcd-proxy*'"
register: etcd_proxy_container
failed_when: false
- name: "Pre-upgrade | remove etcd-proxy if it exists"
command: "docker rm -f {{item}}"
command: "{{ docker_bin_dir }}/docker rm -f {{item}}"
with_items: "{{etcd_proxy_container.stdout_lines}}"
- name: "Pre-upgrade | check if member list is non-SSL"

4
roles/etcd/templates/deb-etcd-docker.initd.j2

@ -15,7 +15,7 @@ set -a
PATH=/sbin:/usr/sbin:/bin/:/usr/bin
DESC="etcd k/v store"
NAME=etcd
DAEMON={{ docker_bin_dir | default("/usr/bin") }}/docker
DAEMON={{ docker_bin_dir }}/docker
DAEMON_EXEC=`basename $DAEMON`
DAEMON_ARGS="run --restart=on-failure:5 --env-file=/etc/etcd.env \
--net=host \
@ -50,7 +50,7 @@ do_status()
#
do_start()
{
{{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_member_name | default("etcd-proxy") }} &>/dev/null || true
{{ docker_bin_dir }}/docker rm -f {{ etcd_member_name | default("etcd") }} &>/dev/null || true
sleep 1
start-stop-daemon --background --start --quiet --make-pidfile --pidfile $PID --user $DAEMON_USER --exec $DAEMON -- \
$DAEMON_ARGS \

9
roles/etcd/templates/etcd-docker.service.j2

@ -6,7 +6,7 @@ After=docker.service
[Service]
User=root
PermissionsStartOnly=true
ExecStart={{ docker_bin_dir | default("/usr/bin") }}/docker run --restart=on-failure:5 \
ExecStart={{ docker_bin_dir }}/docker run --restart=on-failure:5 \
--env-file=/etc/etcd.env \
{# TODO(mattymo): Allow docker IP binding and disable in envfile
-p 2380:2380 -p 2379:2379 #}
@ -14,14 +14,15 @@ ExecStart={{ docker_bin_dir | default("/usr/bin") }}/docker run --restart=on-fai
-v /etc/ssl/certs:/etc/ssl/certs:ro \
-v {{ etcd_cert_dir }}:{{ etcd_cert_dir }}:ro \
-v /var/lib/etcd:/var/lib/etcd:rw \
--memory={{ etcd_memory_limit|regex_replace('Mi', 'M') }} --cpu-shares={{ etcd_cpu_limit|regex_replace('m', '') }} \
--name={{ etcd_member_name | default("etcd") }} \
{{ etcd_image_repo }}:{{ etcd_image_tag }} \
{% if etcd_after_v3 %}
{{ etcd_container_bin_dir }}etcd
{% endif %}
ExecStartPre=-{{ docker_bin_dir | default("/usr/bin") }}/docker rm -f {{ etcd_member_name | default("etcd-proxy") }}
ExecReload={{ docker_bin_dir | default("/usr/bin") }}/docker restart {{ etcd_member_name | default("etcd-proxy") }}
ExecStop={{ docker_bin_dir | default("/usr/bin") }}/docker stop {{ etcd_member_name | default("etcd-proxy") }}
ExecStartPre=-{{ docker_bin_dir }}/docker rm -f {{ etcd_member_name | default("etcd") }}
ExecReload={{ docker_bin_dir }}/docker restart {{ etcd_member_name | default("etcd") }}
ExecStop={{ docker_bin_dir }}/docker stop {{ etcd_member_name | default("etcd") }}
Restart=always
RestartSec=15s

20
roles/kubernetes-apps/ansible/defaults/main.yml

@ -20,6 +20,12 @@ exechealthz_image_tag: "{{ exechealthz_version }}"
calico_policy_image_repo: "calico/kube-policy-controller"
calico_policy_image_tag: latest
# Limits for calico apps
calico_policy_controller_cpu_limit: 100m
calico_policy_controller_memory_limit: 256M
calico_policy_controller_cpu_requests: 30m
calico_policy_controller_memory_requests: 128M
# Netchecker
deploy_netchecker: false
netchecker_port: 31081
@ -29,5 +35,19 @@ agent_img: "quay.io/l23network/mcp-netchecker-agent:v0.1"
server_img: "quay.io/l23network/mcp-netchecker-server:v0.1"
kubectl_image: "gcr.io/google_containers/kubectl:v0.18.0-120-gaeb4ac55ad12b1-dirty"
# Limits for netchecker apps
netchecker_agent_cpu_limit: 30m
netchecker_agent_memory_limit: 100M
netchecker_agent_cpu_requests: 15m
netchecker_agent_memory_requests: 64M
netchecker_server_cpu_limit: 100m
netchecker_server_memory_limit: 256M
netchecker_server_cpu_requests: 50m
netchecker_server_memory_requests: 128M
netchecker_kubectl_cpu_limit: 30m
netchecker_kubectl_memory_limit: 128M
netchecker_kubectl_cpu_requests: 15m
netchecker_kubectl_memory_requests: 64M
# SSL
etcd_cert_dir: "/etc/ssl/etcd/ssl"

7
roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2

@ -25,6 +25,13 @@ spec:
- name: calico-policy-controller
image: {{ calico_policy_image_repo }}:{{ calico_policy_image_tag }}
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ calico_policy_controller_cpu_limit }}
memory: {{ calico_policy_controller_memory_limit }}
requests:
cpu: {{ calico_policy_controller_cpu_requests }}
memory: {{ calico_policy_controller_memory_requests }}
env:
- name: ETCD_ENDPOINTS
value: "{{ etcd_access_endpoint }}"

7
roles/kubernetes-apps/ansible/templates/netchecker-agent-ds.yml

@ -23,3 +23,10 @@ spec:
- name: REPORT_INTERVAL
value: '{{ agent_report_interval }}'
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ netchecker_agent_cpu_limit }}
memory: {{ netchecker_agent_memory_limit }}
requests:
cpu: {{ netchecker_agent_cpu_requests }}
memory: {{ netchecker_agent_memory_requests }}

7
roles/kubernetes-apps/ansible/templates/netchecker-agent-hostnet-ds.yml

@ -24,3 +24,10 @@ spec:
- name: REPORT_INTERVAL
value: '{{ agent_report_interval }}'
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ netchecker_agent_cpu_limit }}
memory: {{ netchecker_agent_memory_limit }}
requests:
cpu: {{ netchecker_agent_cpu_requests }}
memory: {{ netchecker_agent_memory_requests }}

14
roles/kubernetes-apps/ansible/templates/netchecker-server-pod.yml

@ -11,11 +11,25 @@ spec:
image: "{{ server_img }}"
env:
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ netchecker_server_cpu_limit }}
memory: {{ netchecker_server_memory_limit }}
requests:
cpu: {{ netchecker_server_cpu_requests }}
memory: {{ netchecker_server_memory_requests }}
ports:
- containerPort: 8081
hostPort: 8081
- name: kubectl-proxy
image: "{{ kubectl_image }}"
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ netchecker_kubectl_cpu_limit }}
memory: {{ netchecker_kubectl_memory_limit }}
requests:
cpu: {{ netchecker_kubectl_cpu_requests }}
memory: {{ netchecker_kubectl_memory_requests }}
args:
- proxy

14
roles/kubernetes/master/defaults/main.yml

@ -13,4 +13,16 @@ kube_apiserver_node_port_range: "30000-32767"
etcd_config_dir: /etc/ssl/etcd
etcd_cert_dir: "{{ etcd_config_dir }}/ssl"
# Limits for kube components
kube_controller_memory_limit: 512M
kube_controller_cpu_limit: 250m
kube_controller_memory_requests: 170M
kube_controller_cpu_requests: 100m
kube_scheduler_memory_limit: 512M
kube_scheduler_cpu_limit: 250m
kube_scheduler_memory_requests: 170M
kube_scheduler_cpu_requests: 100m
kube_apiserver_memory_limit: 2000M
kube_apiserver_cpu_limit: 800m
kube_apiserver_memory_requests: 256M
kube_apiserver_cpu_requests: 300m

2
roles/kubernetes/master/tasks/main.yml

@ -3,7 +3,7 @@
tags: k8s-pre-upgrade
- name: Copy kubectl from hyperkube container
command: "/usr/bin/docker run --rm -v {{ bin_dir }}:/systembindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp /hyperkube /systembindir/kubectl"
command: "{{ docker_bin_dir }}/docker run --rm -v {{ bin_dir }}:/systembindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp /hyperkube /systembindir/kubectl"
register: kube_task_result
until: kube_task_result.rc == 0
retries: 4

7
roles/kubernetes/master/templates/manifests/kube-apiserver.manifest.j2

@ -12,6 +12,13 @@ spec:
- name: kube-apiserver
image: {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }}
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ kube_apiserver_cpu_limit }}
memory: {{ kube_apiserver_memory_limit }}
requests:
cpu: {{ kube_apiserver_cpu_requests }}
memory: {{ kube_apiserver_memory_requests }}
command:
- /hyperkube
- apiserver

7
roles/kubernetes/master/templates/manifests/kube-controller-manager.manifest.j2

@ -11,6 +11,13 @@ spec:
- name: kube-controller-manager
image: {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }}
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ kube_controller_cpu_limit }}
memory: {{ kube_controller_memory_limit }}
requests:
cpu: {{ kube_controller_cpu_requests }}
memory: {{ kube_controller_memory_requests }}
command:
- /hyperkube
- controller-manager

7
roles/kubernetes/master/templates/manifests/kube-scheduler.manifest.j2

@ -11,6 +11,13 @@ spec:
- name: kube-scheduler
image: {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }}
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ kube_scheduler_cpu_limit }}
memory: {{ kube_scheduler_memory_limit }}
requests:
cpu: {{ kube_scheduler_cpu_requests }}
memory: {{ kube_scheduler_memory_requests }}
command:
- /hyperkube
- scheduler

12
roles/kubernetes/node/defaults/main.yml

@ -9,6 +9,18 @@ kube_proxy_mode: iptables
# If using the pure iptables proxy, SNAT everything
kube_proxy_masquerade_all: true
# Limits for kube components and nginx load balancer app
kubelet_memory_limit: 512M
kubelet_cpu_limit: 100m
kube_proxy_memory_limit: 2000M
kube_proxy_cpu_limit: 500m
kube_proxy_memory_requests: 256M
kube_proxy_cpu_requests: 150m
nginx_memory_limit: 512M
nginx_cpu_limit: 300m
nginx_memory_requests: 64M
nginx_cpu_requests: 50m
# kube_api_runtime_config:
# - extensions/v1beta1/daemonsets=true
# - extensions/v1beta1/deployments=true

2
roles/kubernetes/node/templates/deb-kubelet.initd.j2

@ -39,7 +39,7 @@ DAEMON_USER=root
#
do_start()
{
/usr/bin/docker rm -f kubelet &>/dev/null || true
{{ docker_bin_dir }}/docker rm -f kubelet &>/dev/null || true
sleep 1
# Return
# 0 if daemon has been started

3
roles/kubernetes/node/templates/kubelet-container.j2

@ -1,5 +1,5 @@
#!/bin/bash
/usr/bin/docker run --privileged \
{{ docker_bin_dir }}/docker run --privileged \
--net=host --pid=host --name=kubelet --restart=on-failure:5 \
-v /etc/cni:/etc/cni:ro \
-v /opt/cni:/opt/cni:ro \
@ -9,6 +9,7 @@
-v {{ docker_daemon_graph }}:/var/lib/docker \
-v /var/run:/var/run \
-v /var/lib/kubelet:/var/lib/kubelet \
--memory={{ kubelet_memory_limit|regex_replace('Mi', 'M') }} --cpu-shares={{ kubelet_cpu_limit|regex_replace('m', '') }} \
{{ hyperkube_image_repo }}:{{ hyperkube_image_tag}} \
nsenter --target=1 --mount --wd=. -- \
./hyperkube kubelet \

4
roles/kubernetes/node/templates/kubelet.service.j2

@ -23,8 +23,8 @@ ExecStart={{ bin_dir }}/kubelet \
$DOCKER_SOCKET \
$KUBELET_NETWORK_PLUGIN \
$KUBELET_CLOUDPROVIDER
ExecStartPre=-/usr/bin/docker rm -f kubelet
ExecReload=/usr/bin/docker restart kubelet
ExecStartPre=-{{ docker_bin_dir }}/docker rm -f kubelet
ExecReload={{ docker_bin_dir }}/docker restart kubelet
Restart=always
RestartSec=10s

7
roles/kubernetes/node/templates/manifests/kube-proxy.manifest.j2

@ -11,6 +11,13 @@ spec:
- name: kube-proxy
image: {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }}
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ kube_proxy_cpu_limit }}
memory: {{ kube_proxy_memory_limit }}
requests:
cpu: {{ kube_proxy_cpu_requests }}
memory: {{ kube_proxy_memory_requests }}
command:
- /hyperkube
- proxy

7
roles/kubernetes/node/templates/manifests/nginx-proxy.manifest.j2

@ -11,6 +11,13 @@ spec:
- name: nginx-proxy
image: {{ nginx_image_repo }}:{{ nginx_image_tag }}
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ nginx_cpu_limit }}
memory: {{ nginx_memory_limit }}
requests:
cpu: {{ nginx_cpu_requests }}
memory: {{ nginx_memory_requests }}
securityContext:
privileged: true
volumeMounts:

14
roles/network_plugin/calico/defaults/main.yml

@ -19,3 +19,17 @@ global_as_num: "64512"
# not be specified in calico CNI config, so Calico will use built-in
# defaults. The value should be a number, not a string.
# calico_mtu: 1500
# Limits for apps
calico_rr_memory_limit: 1000M
calico_rr_cpu_limit: 300m
calico_rr_memory_requests: 500M
calico_rr_cpu_requests: 150m
calico_node_memory_limit: 500M
calico_node_cpu_limit: 300m
calico_node_memory_requests: 256M
calico_node_cpu_requests: 150m
calicoctl_memory_limit: 170M
calicoctl_cpu_limit: 100m
calicoctl_memory_requests: 70M
calicoctl_cpu_requests: 50m

7
roles/network_plugin/calico/rr/templates/calico-rr.service.j2

@ -5,8 +5,8 @@ Requires=docker.service
[Service]
EnvironmentFile=/etc/calico/calico-rr.env
ExecStartPre=-/usr/bin/docker rm -f calico-rr
ExecStart=/usr/bin/docker run --net=host --privileged \
ExecStartPre=-{{ docker_bin_dir }}/docker rm -f calico-rr
ExecStart={{ docker_bin_dir }}/docker run --net=host --privileged \
--name=calico-rr \
-e IP=${IP} \
-e IP6=${IP6} \
@ -16,12 +16,13 @@ ExecStart=/usr/bin/docker run --net=host --privileged \
-e ETCD_KEY_FILE=${ETCD_KEY_FILE} \
-v /var/log/calico-rr:/var/log/calico \
-v {{ calico_cert_dir }}:{{ calico_cert_dir }}:ro \
--memory={{ calico_rr_memory_limit|regex_replace('Mi', 'M') }} --cpu-shares={{ calico_rr_cpu_limit|regex_replace('m', '') }} \
{{ calico_rr_image_repo }}:{{ calico_rr_image_tag }}
Restart=always
RestartSec=10s
ExecStop=-/usr/bin/docker stop calico-rr
ExecStop=-{{ docker_bin_dir }}/docker stop calico-rr
[Install]
WantedBy=multi-user.target

4
roles/network_plugin/calico/tasks/main.yml

@ -41,7 +41,7 @@
notify: restart calico-node
- name: Calico | Copy cni plugins from hyperkube
command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /usr/bin/rsync -a /opt/cni/bin/ /cnibindir/"
command: "{{ docker_bin_dir }}/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /usr/bin/rsync -a /opt/cni/bin/ /cnibindir/"
register: cni_task_result
until: cni_task_result.rc == 0
retries: 4
@ -50,7 +50,7 @@
tags: [hyperkube, upgrade]
- name: Calico | Copy cni plugins from calico/cni container
command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ calico_cni_image_repo }}:{{ calico_cni_image_tag }} sh -c 'cp -a /opt/cni/bin/* /cnibindir/'"
command: "{{ docker_bin_dir }}/docker run --rm -v /opt/cni/bin:/cnibindir {{ calico_cni_image_repo }}:{{ calico_cni_image_tag }} sh -c 'cp -a /opt/cni/bin/* /cnibindir/'"
register: cni_task_result
until: cni_task_result.rc == 0
retries: 4

7
roles/network_plugin/calico/templates/calico-node.service.j2

@ -5,8 +5,8 @@ Requires=docker.service
[Service]
EnvironmentFile=/etc/calico/calico.env
ExecStartPre=-/usr/bin/docker rm -f calico-node
ExecStart=/usr/bin/docker run --net=host --privileged \
ExecStartPre=-{{ docker_bin_dir }}/docker rm -f calico-node
ExecStart={{ docker_bin_dir }}/docker run --net=host --privileged \
--name=calico-node \
-e HOSTNAME=${CALICO_HOSTNAME} \
-e IP=${CALICO_IP} \
@ -24,12 +24,13 @@ ExecStart=/usr/bin/docker run --net=host --privileged \
-v /lib/modules:/lib/modules \
-v /var/run/calico:/var/run/calico \
-v {{ calico_cert_dir }}:{{ calico_cert_dir }}:ro \
--memory={{ calico_node_memory_limit|regex_replace('Mi', 'M') }} --cpu-shares={{ calico_node_cpu_limit|regex_replace('m', '') }} \
{{ calico_node_image_repo }}:{{ calico_node_image_tag }}
Restart=always
RestartSec=10s
ExecStop=-/usr/bin/docker stop calico-node
ExecStop=-{{ docker_bin_dir }}/docker stop calico-node
[Install]
WantedBy=multi-user.target

5
roles/network_plugin/calico/templates/calicoctl-container.j2

@ -1,13 +1,14 @@
#!/bin/bash
/usr/bin/docker run -i --privileged --rm \
{{ docker_bin_dir }}/docker run -i --privileged --rm \
--net=host --pid=host \
-e ETCD_ENDPOINTS={{ etcd_access_endpoint }} \
-e ETCD_CA_CERT_FILE=/etc/calico/certs/ca_cert.crt \
-e ETCD_CERT_FILE=/etc/calico/certs/cert.crt \
-e ETCD_KEY_FILE=/etc/calico/certs/key.pem \
-v /usr/bin/docker:/usr/bin/docker \
-v {{ docker_bin_dir }}/docker:{{ docker_bin_dir }}/docker \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /var/run/calico:/var/run/calico \
-v /etc/calico/certs:/etc/calico/certs:ro \
--memory={{ calicoctl_memory_limit|regex_replace('Mi', 'M') }} --cpu-shares={{ calicoctl_cpu_limit|regex_replace('m', '') }} \
{{ calicoctl_image_repo }}:{{ calicoctl_image_tag}} \
$@

10
roles/network_plugin/canal/defaults/main.yml

@ -13,3 +13,13 @@ canal_log_level: "info"
# Etcd SSL dirs
canal_cert_dir: /etc/canal/certs
etcd_cert_dir: /etc/ssl/etcd/ssl
# Limits for apps
calico_node_memory_limit: 500M
calico_node_cpu_limit: 200m
calico_node_memory_requests: 256M
calico_node_cpu_requests: 100m
flannel_memory_limit: 500M
flannel_cpu_limit: 200m
flannel_memory_requests: 256M
flannel_cpu_requests: 100m

4
roles/network_plugin/canal/tasks/main.yml

@ -43,7 +43,7 @@
dest: "{{kube_config_dir}}/canal-node.yaml"
- name: Canal | Copy cni plugins from hyperkube
command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /usr/bin/rsync -a /opt/cni/bin/ /cnibindir/"
command: "{{ docker_bin_dir }}/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /usr/bin/rsync -a /opt/cni/bin/ /cnibindir/"
register: cni_task_result
until: cni_task_result.rc == 0
retries: 4
@ -52,7 +52,7 @@
tags: [hyperkube, upgrade]
- name: Canal | Copy cni plugins from calico/cni
command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ calico_cni_image_repo }}:{{ calico_cni_image_tag }} sh -c 'cp -a /opt/cni/bin/* /cnibindir/'"
command: "{{ docker_bin_dir }}/docker run --rm -v /opt/cni/bin:/cnibindir {{ calico_cni_image_repo }}:{{ calico_cni_image_tag }} sh -c 'cp -a /opt/cni/bin/* /cnibindir/'"
register: cni_task_result
until: cni_task_result.rc == 0
retries: 4

14
roles/network_plugin/canal/templates/canal-node.yml.j2

@ -49,6 +49,13 @@ spec:
- name: flannel
image: "{{ flannel_image_repo }}:{{ flannel_image_tag }}"
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ flannel_cpu_limit }}
memory: {{ flannel_memory_limit }}
requests:
cpu: {{ flannel_cpu_requests }}
memory: {{ flannel_memory_requests }}
env:
# Cluster name
- name: CLUSTER_NAME
@ -119,6 +126,13 @@ spec:
- name: calico-node
image: "{{ calico_node_image_repo }}:{{ calico_node_image_tag }}"
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ calico_node_cpu_limit }}
memory: {{ calico_node_memory_limit }}
requests:
cpu: {{ calico_node_cpu_requests }}
memory: {{ calico_node_memory_requests }}
env:
# The location of the etcd cluster.
- name: ETCD_ENDPOINTS

2
roles/network_plugin/cloud/tasks/main.yml

@ -1,7 +1,7 @@
---
- name: Cloud | Copy cni plugins from hyperkube
command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp -r /opt/cni/bin/. /cnibindir/"
command: "{{ docker_bin_dir }}/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp -r /opt/cni/bin/. /cnibindir/"
register: cni_task_result
until: cni_task_result.rc == 0
retries: 4

6
roles/network_plugin/flannel/defaults/main.yml

@ -10,3 +10,9 @@ flannel_public_ip: "{{ access_ip|default(ip|default(ansible_default_ipv4.address
# You can choose what type of flannel backend to use
# please refer to flannel's docs : https://github.com/coreos/flannel/blob/master/README.md
flannel_backend_type: "vxlan"
# Limits for apps
flannel_memory_limit: 500M
flannel_cpu_limit: 300m
flannel_memory_requests: 256M
flannel_cpu_requests: 150m

2
roles/network_plugin/flannel/handlers/main.yml

@ -32,7 +32,7 @@
pause: seconds=10 prompt="Waiting for docker restart"
- name: Flannel | wait for docker
command: /usr/bin/docker images
command: "{{ docker_bin_dir }}/docker images"
register: docker_ready
retries: 10
delay: 5

10
roles/network_plugin/flannel/templates/flannel-pod.yml

@ -19,6 +19,13 @@
- name: "flannel-container"
image: "{{ flannel_image_repo }}:{{ flannel_image_tag }}"
imagePullPolicy: {{ k8s_image_pull_policy }}
resources:
limits:
cpu: {{ flannel_cpu_limit }}
memory: {{ flannel_memory_limit }}
requests:
cpu: {{ flannel_cpu_requests }}
memory: {{ flannel_memory_requests }}
command:
- "/bin/sh"
- "-c"
@ -26,9 +33,6 @@
ports:
- hostPort: 10253
containerPort: 10253
resources:
limits:
cpu: "100m"
volumeMounts:
- name: "subnetenv"
mountPath: "/run/flannel"

4
roles/network_plugin/weave/defaults/main.yml

@ -0,0 +1,4 @@
---
# Limits
weave_memory_limit: 500M
weave_cpu_limit: 300m

2
roles/network_plugin/weave/tasks/main.yml

@ -1,6 +1,6 @@
---
- name: Weave | Copy cni plugins from hyperkube
command: "/usr/bin/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp -r /opt/cni/bin/. /cnibindir/"
command: "{{ docker_bin_dir }}/docker run --rm -v /opt/cni/bin:/cnibindir {{ hyperkube_image_repo }}:{{ hyperkube_image_tag }} /bin/cp -r /opt/cni/bin/. /cnibindir/"
register: cni_task_result
until: cni_task_result.rc == 0
retries: 4

1
roles/network_plugin/weave/templates/weave.j2

@ -1,3 +1,4 @@
WEAVE_DOCKER_ARGS="--memory={{ weave_memory_limit|regex_replace('Mi', 'M') }} --cpu-shares={{ weave_cpu_limit|regex_replace('m', '') }}"
WEAVE_PEERS="{% for host in groups['k8s-cluster'] %}{{ hostvars[host]['access_ip'] | default(hostvars[host]['ip'] | default(hostvars[host]['ansible_default_ipv4']['address'])) }}{% if not loop.last %} {% endif %}{% endfor %}"
WEAVEPROXY_ARGS="--rewrite-inspect --without-dns"
WEAVE_SUBNET="--ipalloc-range {{ kube_pods_subnet }}"

5
roles/network_plugin/weave/templates/weave.service.j2

@ -6,12 +6,13 @@ After=docker.service docker.socket
[Service]
EnvironmentFile=-/etc/weave.env
ExecStartPre=-/usr/bin/docker rm -f weave
ExecStartPre=-{{ docker_bin_dir }}/docker rm -f weave
ExecStartPre={{ bin_dir }}/weave launch-router \
$WEAVE_SUBNET \
$WEAVE_PEERS
ExecStart=/usr/bin/docker attach weave
ExecStart={{ docker_bin_dir }}/docker attach weave
ExecStop={{ bin_dir }}/weave stop
Restart=on-failure
[Install]
WantedBy=multi-user.target

6
roles/network_plugin/weave/templates/weaveproxy.service.j2

@ -7,11 +7,11 @@ After=docker.service docker.socket
[Service]
EnvironmentFile=-/etc/weave.%H.env
EnvironmentFile=-/etc/weave.env
ExecStartPre=-/usr/bin/docker rm -f weaveproxy
ExecStartPre=-{{ docker_bin_dir }}/docker rm -f weaveproxy
ExecStartPre={{ bin_dir }}/weave launch-proxy $WEAVEPROXY_ARGS
ExecStart=/usr/bin/docker attach weaveproxy
ExecStart={{ docker_bin_dir }}/docker attach weaveproxy
Restart=on-failure
ExecStop=/opt/bin/weave stop-proxy
ExecStop={{ bin_dir }}/weave stop-proxy
[Install]
WantedBy=weave-network.target

2
roles/reset/tasks/main.yml

@ -21,7 +21,7 @@
when: ansible_service_mgr == "systemd" and services_removed.changed
- name: reset | remove all containers
shell: docker ps -aq | xargs -r docker rm -fv
shell: "{{ docker_bin_dir }}/docker ps -aq | xargs -r docker rm -fv"
- name: reset | gather mounted kubelet dirs
shell: mount | grep /var/lib/kubelet | awk '{print $3}' | tac

6
scripts/collect-info.yaml

@ -10,7 +10,7 @@
- name: kernel_info
cmd: uname -r
- name: docker_info
cmd: docker info
cmd: "{{ docker_bin_dir }}/docker info"
- name: ip_info
cmd: ip -4 -o a
- name: route_info
@ -34,9 +34,11 @@
- name: weave_info
cmd: weave report
- name: weave_logs
cmd: docker logs weave
cmd: "{{ docker_bin_dir }}/docker logs weave"
- name: kube_describe_all
cmd: kubectl describe all --all-namespaces
- name: kube_describe_nodes
cmd: kubectl describe nodes
- name: kubelet_logs
cmd: journalctl -u kubelet --no-pager
- name: kubedns_logs

Loading…
Cancel
Save