You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

91 lines
3.0 KiB

  1. ---
  2. - name: Get etcd endpoint health
  3. command: "{{ bin_dir }}/etcdctl endpoint health"
  4. register: etcd_endpoint_health
  5. ignore_errors: true
  6. changed_when: false
  7. check_mode: no
  8. environment:
  9. ETCDCTL_API: 3
  10. ETCDCTL_ENDPOINTS: "{{ etcd_access_addresses }}"
  11. ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
  12. ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
  13. ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem"
  14. when:
  15. - groups['broken_etcd']
  16. - name: Set healthy fact
  17. set_fact:
  18. healthy: "{{ etcd_endpoint_health.stderr is match('Error: unhealthy cluster') }}"
  19. when:
  20. - groups['broken_etcd']
  21. # When there is an error, everything is printed in stderr_lines, even "is healthy" messages.
  22. - name: Set has_quorum fact
  23. set_fact:
  24. has_quorum: "{{ etcd_endpoint_health.stderr_lines | select('match', '.*is healthy.*') | list | length >= etcd_endpoint_health.stderr_lines | select('match', '.*is unhealthy.*') | list | length }}"
  25. - include_tasks: recover_lost_quorum.yml
  26. when:
  27. - groups['broken_etcd']
  28. - not has_quorum
  29. - name: Remove etcd data dir
  30. file:
  31. path: "{{ etcd_data_dir }}"
  32. state: absent
  33. delegate_to: "{{ item }}"
  34. with_items: "{{ groups['broken_etcd'] }}"
  35. when:
  36. - groups['broken_etcd']
  37. - has_quorum
  38. - name: Delete old certificates
  39. # noqa 302 - rm is ok here for now
  40. shell: "rm {{ etcd_cert_dir }}/*{{ item }}*"
  41. with_items: "{{ groups['broken_etcd'] }}"
  42. register: delete_old_cerificates
  43. ignore_errors: true
  44. when: groups['broken_etcd']
  45. - name: Fail if unable to delete old certificates
  46. fail:
  47. msg: "Unable to delete old certificates for: {{ item.item }}"
  48. loop: "{{ delete_old_cerificates.results }}"
  49. changed_when: false
  50. when:
  51. - groups['broken_etcd']
  52. - "item.rc != 0 and not 'No such file or directory' in item.stderr"
  53. - name: Get etcd cluster members
  54. command: "{{ bin_dir }}/etcdctl member list"
  55. register: member_list
  56. changed_when: false
  57. check_mode: no
  58. environment:
  59. ETCDCTL_API: 3
  60. ETCDCTL_ENDPOINTS: "{{ etcd_access_addresses }}"
  61. ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
  62. ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
  63. ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem"
  64. when:
  65. - groups['broken_etcd']
  66. - not healthy
  67. - has_quorum
  68. - name: Remove broken cluster members
  69. command: "{{ bin_dir }}/etcdctl member remove {{ item[1].replace(' ','').split(',')[0] }}"
  70. environment:
  71. ETCDCTL_API: 3
  72. ETCDCTL_ENDPOINTS: "{{ etcd_access_addresses }}"
  73. ETCDCTL_CERT: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
  74. ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
  75. ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem"
  76. with_nested:
  77. - "{{ groups['broken_etcd'] }}"
  78. - "{{ member_list.stdout_lines }}"
  79. when:
  80. - groups['broken_etcd']
  81. - not healthy
  82. - has_quorum
  83. - hostvars[item[0]]['etcd_member_name'] == item[1].replace(' ','').split(',')[2]