From d7bb4d954af3c25e51132a7c26b4eccb382322f7 Mon Sep 17 00:00:00 2001 From: Kuldip Madnani Date: Fri, 12 Oct 2018 12:29:51 -0500 Subject: [PATCH] Handling docker clean up during docker upgrade and docker config changes. (#3321) * Added changes to clean up orphan containers and reload docker & kubelet directories. * Added new files for cleaning up orphans and docker & kubelet directories * Added new lines at the end of these files * removed the trailing whitespaces from main.yml and clean-up.yml * Updated as per the review comments * Updated as per the review comments * Removed service_facts and package_facts because they are not supported in ansible 2.4.0 * Corrected yaml syntax errors * Removed the use of json_query filter and utilized selectattr * Removed trailing spaces * Changed the default value of docker_clean_up to false * Added Changes to only include cleanup-docker-orphans.sh * Reverted back changes done inside handler. * Removed trailing spaces and made default value of docker_orphan_clean_up as true * Reverted the default value of docker_orphan_clean_up as false * Made the docker clean up as drop in * Made the docker clean up as drop in * Reverted the value of boolean docker_orphan_clean_up to false --- .../container-engine/docker/defaults/main.yml | 3 ++ .../docker/files/cleanup-docker-orphans.sh | 38 +++++++++++++++++++ .../container-engine/docker/tasks/systemd.yml | 14 +++++++ .../templates/docker-orphan-cleanup.conf.j2 | 2 + 4 files changed, 57 insertions(+) create mode 100644 roles/container-engine/docker/files/cleanup-docker-orphans.sh create mode 100644 roles/container-engine/docker/templates/docker-orphan-cleanup.conf.j2 diff --git a/roles/container-engine/docker/defaults/main.yml b/roles/container-engine/docker/defaults/main.yml index ec819b24a..fb719878d 100644 --- a/roles/container-engine/docker/defaults/main.yml +++ b/roles/container-engine/docker/defaults/main.yml @@ -44,3 +44,6 @@ dockerproject_rh_repo_gpgkey: 'https://yum.dockerproject.org/gpg' dockerproject_apt_repo_base_url: 'https://apt.dockerproject.org/repo' dockerproject_apt_repo_gpgkey: 'https://apt.dockerproject.org/gpg' docker_bin_dir: "/usr/bin" + +# flag to enable/disable docker cleanup +docker_orphan_clean_up: false diff --git a/roles/container-engine/docker/files/cleanup-docker-orphans.sh b/roles/container-engine/docker/files/cleanup-docker-orphans.sh new file mode 100644 index 000000000..5db82f88b --- /dev/null +++ b/roles/container-engine/docker/files/cleanup-docker-orphans.sh @@ -0,0 +1,38 @@ +#!/bin/bash +list_descendants () +{ + local children=$(ps -o pid= --ppid "$1") + for pid in $children + do + list_descendants "$pid" + done + [[ -n "$children" ]] && echo "$children" +} + +count_shim_processes=$(pgrep -f ^docker-containerd-shim | wc -l) +live_restore=$(docker info --format {{.LiveRestoreEnabled}} 2>/dev/null) + +if [ ${count_shim_processes} -gt 0 ] && [ -n "${live_restore}" -a "${live_restore}" == "true" ]; then + # Find all container pids from shims + orphans=$(pgrep -P $(pgrep -d ',' -f ^docker-containerd-shim) |\ + # Filter out valid docker pids, leaving the orphans + egrep -v $(docker ps -q | xargs docker inspect --format '{{.State.Pid}}' | awk '{printf "%s%s",sep,$1; sep="|"}')) + + if [[ -n "$orphans" ]] + then + # Get shim pids of orphans + orphan_shim_pids=$(ps -o pid= $(ps -o ppid= $orphans)) + + # Find all orphaned container PIDs + orphan_container_pids=$(for pid in $orphan_shim_pids; do list_descendants $pid; done) + + # Recursively kill all child PIDs of orphan shims + echo -e "Killing orphan container PIDs and descendants: \n$(ps -O ppid= $orphan_container_pids)" + #kill -9 $orphan_container_pids || true + + else + echo "No orphaned containers found" + fi +else + echo "Either live-restore is turned off or the node doesn't have any shim processes." +fi \ No newline at end of file diff --git a/roles/container-engine/docker/tasks/systemd.yml b/roles/container-engine/docker/tasks/systemd.yml index 78cec33cc..e37d7cc47 100644 --- a/roles/container-engine/docker/tasks/systemd.yml +++ b/roles/container-engine/docker/tasks/systemd.yml @@ -38,4 +38,18 @@ notify: restart docker when: dns_mode != 'none' and resolvconf_mode == 'docker_dns' +- name: Copy docker orphan clean up script to the node + copy: + src: cleanup-docker-orphans.sh + dest: "{{ bin_dir }}/cleanup-docker-orphans.sh" + mode: 0755 + when: docker_orphan_clean_up | bool + +- name: Write docker orphan clean up systemd drop-in + template: + src: docker-orphan-cleanup.conf.j2 + dest: "/etc/systemd/system/docker.service.d/docker-orphan-cleanup.conf" + notify: restart docker + when: docker_orphan_clean_up | bool + - meta: flush_handlers diff --git a/roles/container-engine/docker/templates/docker-orphan-cleanup.conf.j2 b/roles/container-engine/docker/templates/docker-orphan-cleanup.conf.j2 new file mode 100644 index 000000000..70754ac57 --- /dev/null +++ b/roles/container-engine/docker/templates/docker-orphan-cleanup.conf.j2 @@ -0,0 +1,2 @@ +[Service] +ExecStop=-{{ bin_dir }}/cleanup-docker-orphans.sh \ No newline at end of file