From a5bb24b886f1878199b400c983accb96e2eef6c6 Mon Sep 17 00:00:00 2001 From: Josh Lothian Date: Wed, 22 Mar 2017 10:12:57 -0500 Subject: [PATCH 1/6] Fix docker restart in atomic In atomic, containers are left running when docker is restarted. When docker is restarted after the flannel config is put in place, the docker0 interface isn't re-IPed because docker sees the running containers and won't update the previous config. This patch kills all the running containers after docker is stopped. We can't simply `docker stop` the running containers, as they respawn before we've got a chance to stop the docker daemon, so we need to use runc to do this after dockerd is stopped. --- roles/network_plugin/flannel/handlers/main.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/roles/network_plugin/flannel/handlers/main.yml b/roles/network_plugin/flannel/handlers/main.yml index 98c93a53a..9a87e2ec2 100644 --- a/roles/network_plugin/flannel/handlers/main.yml +++ b/roles/network_plugin/flannel/handlers/main.yml @@ -10,6 +10,7 @@ - Flannel | reload systemd - Flannel | reload docker.socket - Flannel | reload docker + - Flannel | reload docker (atomic) - Flannel | pause while Docker restarts - Flannel | wait for docker @@ -26,6 +27,11 @@ service: name: docker state: restarted + when: not is_atomic + +- name: Flannel | reload docker (atomic) + shell: systemctl stop docker && runc list | awk '!/ID/ {print $1}' | xargs -n 1 -I ID runc kill ID KILL && systemctl start docker + when: is_atomic - name: Flannel | pause while Docker restarts pause: From 30cc7c847ef69613317228b749843be1f361dfe8 Mon Sep 17 00:00:00 2001 From: Josh Lothian Date: Thu, 30 Mar 2017 17:57:40 -0500 Subject: [PATCH 2/6] Reconfigure docker restart behavior on atomic Before restarting docker, instruct it to kill running containers when it restarts. Needs a second docker restart after we restore the original behavior, otherwise the next time docker is restarted by an operator, it will unexpectedly bring down all running containers. --- .../network_plugin/flannel/handlers/main.yml | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/roles/network_plugin/flannel/handlers/main.yml b/roles/network_plugin/flannel/handlers/main.yml index 9a87e2ec2..e3e937a1f 100644 --- a/roles/network_plugin/flannel/handlers/main.yml +++ b/roles/network_plugin/flannel/handlers/main.yml @@ -9,7 +9,9 @@ notify: - Flannel | reload systemd - Flannel | reload docker.socket + - Flannel | reconfigure docker restart behavior (atomic) - Flannel | reload docker + - Flannel | restore docker restart behavior (atomic) - Flannel | reload docker (atomic) - Flannel | pause while Docker restarts - Flannel | wait for docker @@ -23,14 +25,29 @@ state: restarted when: ansible_os_family in ['CoreOS', 'Container Linux by CoreOS'] +- name: Flannel | reconfigure docker restart behavior (atomic) + replace: + name: /etc/docker/daemon.json + regexp: '"live-restore":.*true' + replace: '"live-restore": false' + when: is_atomic + - name: Flannel | reload docker service: name: docker state: restarted - when: not is_atomic + +- name: Flannel | restore docker restart behavior (atomic) + replace: + name: /etc/docker/daemon.json + regexp: '"live-restore": false' + replace: '"live-restore": true' + when: is_atomic - name: Flannel | reload docker (atomic) - shell: systemctl stop docker && runc list | awk '!/ID/ {print $1}' | xargs -n 1 -I ID runc kill ID KILL && systemctl start docker + service: + name: docker + state: restarted when: is_atomic - name: Flannel | pause while Docker restarts From 9ee0600a7f5212e602464544d046518522fe90d4 Mon Sep 17 00:00:00 2001 From: Josh Lothian Date: Fri, 31 Mar 2017 07:46:21 -0500 Subject: [PATCH 3/6] Update handler names and explanation --- roles/network_plugin/flannel/handlers/main.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/roles/network_plugin/flannel/handlers/main.yml b/roles/network_plugin/flannel/handlers/main.yml index e3e937a1f..412563394 100644 --- a/roles/network_plugin/flannel/handlers/main.yml +++ b/roles/network_plugin/flannel/handlers/main.yml @@ -4,14 +4,18 @@ failed_when: false notify: Flannel | restart docker +# special cases for atomic because it defaults to live-restore: true +# So we disable live-restore to pickup the new flannel IP. After +# we enable it, we have to restart docker again to pickup the new +# setting and restore the original behavior - name: Flannel | restart docker command: /bin/true notify: - Flannel | reload systemd - Flannel | reload docker.socket - - Flannel | reconfigure docker restart behavior (atomic) + - Flannel | configure docker live-restore true (atomic) - Flannel | reload docker - - Flannel | restore docker restart behavior (atomic) + - Flannel | configure docker live-restore false (atomic) - Flannel | reload docker (atomic) - Flannel | pause while Docker restarts - Flannel | wait for docker @@ -25,7 +29,7 @@ state: restarted when: ansible_os_family in ['CoreOS', 'Container Linux by CoreOS'] -- name: Flannel | reconfigure docker restart behavior (atomic) +- name: Flannel | configure docker live-restore true (atomic) replace: name: /etc/docker/daemon.json regexp: '"live-restore":.*true' @@ -37,7 +41,7 @@ name: docker state: restarted -- name: Flannel | restore docker restart behavior (atomic) +- name: Flannel | configure docker live-restore false (atomic) replace: name: /etc/docker/daemon.json regexp: '"live-restore": false' From 6f67367b5758893798d0ae157caa3feac29499c3 Mon Sep 17 00:00:00 2001 From: Josh Lothian Date: Wed, 5 Apr 2017 15:41:46 -0500 Subject: [PATCH 4/6] Leave 'live-restore' false Leave live-restore false to updates always pick up new network configuration --- roles/network_plugin/flannel/handlers/main.yml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/roles/network_plugin/flannel/handlers/main.yml b/roles/network_plugin/flannel/handlers/main.yml index 412563394..8fbb6a1fd 100644 --- a/roles/network_plugin/flannel/handlers/main.yml +++ b/roles/network_plugin/flannel/handlers/main.yml @@ -41,19 +41,6 @@ name: docker state: restarted -- name: Flannel | configure docker live-restore false (atomic) - replace: - name: /etc/docker/daemon.json - regexp: '"live-restore": false' - replace: '"live-restore": true' - when: is_atomic - -- name: Flannel | reload docker (atomic) - service: - name: docker - state: restarted - when: is_atomic - - name: Flannel | pause while Docker restarts pause: seconds: 10 From ef8d3f684f5bd4dc3999291a0493d0c1c736612b Mon Sep 17 00:00:00 2001 From: Josh Lothian Date: Fri, 19 May 2017 09:45:46 -0500 Subject: [PATCH 5/6] Remove unused handler Previous patch removed the step that sets live-restore back to false, so don't try to notify that handler any more --- roles/network_plugin/flannel/handlers/main.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/roles/network_plugin/flannel/handlers/main.yml b/roles/network_plugin/flannel/handlers/main.yml index 8fbb6a1fd..a84d70c70 100644 --- a/roles/network_plugin/flannel/handlers/main.yml +++ b/roles/network_plugin/flannel/handlers/main.yml @@ -15,7 +15,6 @@ - Flannel | reload docker.socket - Flannel | configure docker live-restore true (atomic) - Flannel | reload docker - - Flannel | configure docker live-restore false (atomic) - Flannel | reload docker (atomic) - Flannel | pause while Docker restarts - Flannel | wait for docker From 7ae5785447e583e27cfabc28396e17894cd25d76 Mon Sep 17 00:00:00 2001 From: Josh Lothian Date: Fri, 19 May 2017 09:50:10 -0500 Subject: [PATCH 6/6] Removed the other unused handler With live-restore: true, we don't need a special docker restart --- roles/network_plugin/flannel/handlers/main.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/roles/network_plugin/flannel/handlers/main.yml b/roles/network_plugin/flannel/handlers/main.yml index a84d70c70..bd4058976 100644 --- a/roles/network_plugin/flannel/handlers/main.yml +++ b/roles/network_plugin/flannel/handlers/main.yml @@ -15,7 +15,6 @@ - Flannel | reload docker.socket - Flannel | configure docker live-restore true (atomic) - Flannel | reload docker - - Flannel | reload docker (atomic) - Flannel | pause while Docker restarts - Flannel | wait for docker