You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

94 lines
3.0 KiB

  1. ---
  2. - name: Get etcd endpoint health
  3. command: "{{ bin_dir }}/etcdctl endpoint health"
  4. register: etcd_endpoint_health
  5. ignore_errors: true
  6. changed_when: false
  7. check_mode: no
  8. environment:
  9. ETCDCTL_API: 3
  10. ETCDCTL_ENDPOINTS: "{{ etcd_access_addresses }}"
  11. ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
  12. ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
  13. ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem"
  14. when:
  15. - groups['broken_etcd']
  16. - name: Set healthy fact
  17. set_fact:
  18. healthy: "{{ etcd_endpoint_health.stderr is match('Error: unhealthy cluster') }}"
  19. when:
  20. - groups['broken_etcd']
  21. # When there is an error, everything is printed in stderr_lines, even "is healthy" messages.
  22. - name: Set has_quorum fact
  23. set_fact:
  24. has_quorum: "{{ etcd_endpoint_health.stderr_lines | select('match', '.*is healthy.*') | list | length >= etcd_endpoint_health.stderr_lines | select('match', '.*is unhealthy.*') | list | length }}"
  25. when:
  26. - groups['broken_etcd']
  27. - include_tasks: recover_lost_quorum.yml
  28. when:
  29. - groups['broken_etcd']
  30. - not has_quorum
  31. - name: Remove etcd data dir
  32. file:
  33. path: "{{ etcd_data_dir }}"
  34. state: absent
  35. delegate_to: "{{ item }}"
  36. with_items: "{{ groups['broken_etcd'] }}"
  37. ignore_errors: true
  38. when:
  39. - groups['broken_etcd']
  40. - has_quorum
  41. - name: Delete old certificates
  42. # noqa 302 - rm is ok here for now
  43. shell: "rm {{ etcd_cert_dir }}/*{{ item }}*"
  44. with_items: "{{ groups['broken_etcd'] }}"
  45. register: delete_old_cerificates
  46. ignore_errors: true
  47. when: groups['broken_etcd']
  48. - name: Fail if unable to delete old certificates
  49. fail:
  50. msg: "Unable to delete old certificates for: {{ item.item }}"
  51. loop: "{{ delete_old_cerificates.results }}"
  52. changed_when: false
  53. when:
  54. - groups['broken_etcd']
  55. - "item.rc != 0 and not 'No such file or directory' in item.stderr"
  56. - name: Get etcd cluster members
  57. command: "{{ bin_dir }}/etcdctl member list"
  58. register: member_list
  59. changed_when: false
  60. check_mode: no
  61. environment:
  62. ETCDCTL_API: 3
  63. ETCDCTL_ENDPOINTS: "{{ etcd_access_addresses }}"
  64. ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
  65. ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
  66. ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem"
  67. when:
  68. - groups['broken_etcd']
  69. - not healthy
  70. - has_quorum
  71. - name: Remove broken cluster members
  72. command: "{{ bin_dir }}/etcdctl member remove {{ item[1].replace(' ','').split(',')[0] }}"
  73. environment:
  74. ETCDCTL_API: 3
  75. ETCDCTL_ENDPOINTS: "{{ etcd_access_addresses }}"
  76. ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
  77. ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
  78. ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem"
  79. with_nested:
  80. - "{{ groups['broken_etcd'] }}"
  81. - "{{ member_list.stdout_lines }}"
  82. when:
  83. - groups['broken_etcd']
  84. - not healthy
  85. - has_quorum
  86. - hostvars[item[0]]['etcd_member_name'] == item[1].replace(' ','').split(',')[2]