Browse Source

Add advanced net check for DNS K8s app

* Add an option to deploy K8s app to test e2e network connectivity
  and cluster DNS resolve via Kubedns for nethost/simple pods
  (defaults to false).
* Parametrize existing k8s apps templates with kube_namespace and
  kube_config_dir instead of hardcode.
* For CoreOS, ensure nameservers from inventory to be put in the
  first place to allow hostnet pods connectivity via short names
  or FQDN and hostnet agents to pass as well, if netchecker
  deployed.

Signed-off-by: Bogdan Dobrelya <bdobrelia@mirantis.com>
pull/529/head
Bogdan Dobrelya 8 years ago
parent
commit
b7692fad09
16 changed files with 220 additions and 11 deletions
  1. 41
      docs/netcheck.md
  2. 2
      inventory/group_vars/all.yml
  3. 17
      roles/dnsmasq/tasks/resolvconf.yml
  4. 21
      roles/download/defaults/main.yml
  5. 12
      roles/kubernetes-apps/ansible/defaults/main.yml
  6. 6
      roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml
  7. 10
      roles/kubernetes-apps/ansible/tasks/main.yaml
  8. 20
      roles/kubernetes-apps/ansible/tasks/netchecker.yml
  9. 2
      roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2
  10. 2
      roles/kubernetes-apps/ansible/templates/kubedns-rc.yml
  11. 2
      roles/kubernetes-apps/ansible/templates/kubedns-svc.yml
  12. 25
      roles/kubernetes-apps/ansible/templates/netchecker-agent-ds.yml
  13. 26
      roles/kubernetes-apps/ansible/templates/netchecker-agent-hostnet-ds.yml
  14. 21
      roles/kubernetes-apps/ansible/templates/netchecker-server-pod.yml
  15. 15
      roles/kubernetes-apps/ansible/templates/netchecker-server-svc.yml
  16. 9
      roles/kubernetes/node/meta/main.yml

41
docs/netcheck.md

@ -0,0 +1,41 @@
Network Checker Application
===========================
With the ``deploy_netchecker`` var enabled (defaults to false), Kargo deploys a
Network Checker Application from the 3rd side `l23network/mcp-netchecker` docker
images. It consists of the server and agents trying to reach the server by usual
for Kubernetes applications network connectivity meanings. Therefore, this
automagically verifies a pod to pod connectivity via the cluster IP and checks
if DNS resolve is functioning as well.
The checks are run by agents on a periodic basis and cover standard and host network
pods as well. The history of performed checks may be found in the agents' application
logs.
To get the most recent and cluster-wide network connectivity report, run from
any of the cluster nodes:
```
curl http://localhost:31081/api/v1/connectivity_check
```
Note that Kargo does not invoke the check but only deploys the application, if
requested.
There are related application specifc variables:
```
netchecker_port: 31081
agent_report_interval: 15
netcheck_namespace: default
agent_img: "quay.io/l23network/mcp-netchecker-agent:v0.1"
server_img: "quay.io/l23network/mcp-netchecker-server:v0.1"
```
Note that the application verifies DNS resolve for FQDNs comprising only the
combination of the ``netcheck_namespace.dns_domain`` vars, for example the
``netchecker-service.default.cluster.local``. If you want to deploy the application
to the non default namespace, make sure as well to adjust the ``searchdomains`` var
so the resulting search domain records to contain that namespace, like:
```
search: foospace.cluster.local default.cluster.local ...
nameserver: ...
```

2
inventory/group_vars/all.yml

@ -35,6 +35,8 @@ kube_users:
cluster_name: cluster.local
# Subdomains of DNS domain to be resolved via /etc/resolv.conf
ndots: 5
# Deploy netchecker app to verify DNS resolve as an HTTP service
deploy_netchecker: false
# For some environments, each node has a pubilcally accessible
# address and an address it should bind services to. These are

17
roles/dnsmasq/tasks/resolvconf.yml

@ -48,7 +48,20 @@
when: resolvconf.rc == 0
notify: Dnsmasq | update resolvconf
- name: Add search domains to resolv.conf
- name: Remove search and nameserver options from resolvconf cloud init temporary file
lineinfile:
dest: "{{resolvconffile}}"
state: absent
regexp: "^{{ item }}.*$"
backup: yes
follow: yes
with_items:
- search
- nameserver
when: ansible_os_family == "CoreOS"
notify: Dnsmasq | update resolvconf for CoreOS
- name: Add search domains to resolvconf file
lineinfile:
line: "search {{searchentries}}"
dest: "{{resolvconffile}}"
@ -66,7 +79,7 @@
nameserver {{ item }}
{% endfor %}
state: present
insertafter: "^search.*$"
insertafter: "^search default.svc.*$"
create: yes
backup: yes
follow: yes

21
roles/download/defaults/main.yml

@ -58,6 +58,12 @@ hyperkube_image_repo: "quay.io/coreos/hyperkube"
hyperkube_image_tag: "{{ kube_version }}_coreos.0"
pod_infra_image_repo: "gcr.io/google_containers/pause-amd64"
pod_infra_image_tag: "{{ pod_infra_version }}"
netcheck_tag: v0.1
netcheck_kubectl_tag: v0.18.0-120-gaeb4ac55ad12b1-dirty
netcheck_agent_img_repo: "quay.io/l23network/mcp-netchecker-agent"
netcheck_server_img_repo: "quay.io/l23network/mcp-netchecker-server"
netcheck_kubectl_img_repo: "gcr.io/google_containers/kubectl"
nginx_image_repo: nginx
nginx_image_tag: 1.11.4-alpine
dnsmasq_version: 2.72
@ -73,6 +79,21 @@ test_image_repo: busybox
test_image_tag: latest
downloads:
netcheck_server:
container: true
repo: "{{ netcheck_server_img_repo }}"
tag: "{{ netcheck_tag }}"
enabled: "{{ deploy_netchecker|bool }}"
netcheck_agent:
container: true
repo: "{{ netcheck_agent_img_repo }}"
tag: "{{ netcheck_tag }}"
enabled: "{{ deploy_netchecker|bool }}"
netcheck_kubectl:
container: true
repo: "{{ netcheck_kubectl_img_repo }}"
tag: "{{ netcheck_kubectl_tag }}"
enabled: "{{ deploy_netchecker|bool }}"
weave:
dest: weave/bin/weave
version: "{{weave_version}}"

12
roles/kubernetes-apps/ansible/defaults/main.yml

@ -1,3 +1,6 @@
kube_config_dir: /etc/kubernetes
kube_namespace: kube-system
# Versions
kubedns_version: 1.7
kubednsmasq_version: 1.3
@ -13,5 +16,14 @@ exechealthz_image_tag: "{{ exechealthz_version }}"
calico_policy_image_repo: "calico/kube-policy-controller"
calico_policy_image_tag: latest
# Netchecker
deploy_netchecker: false
netchecker_port: 31081
agent_report_interval: 15
netcheck_namespace: default
agent_img: "quay.io/l23network/mcp-netchecker-agent:v0.1"
server_img: "quay.io/l23network/mcp-netchecker-server:v0.1"
kubectl_image: "gcr.io/google_containers/kubectl:v0.18.0-120-gaeb4ac55ad12b1-dirty"
# SSL
etcd_cert_dir: "/etc/ssl/etcd/ssl"

6
roles/kubernetes-apps/ansible/tasks/calico-policy-controller.yml

@ -1,5 +1,5 @@
- name: Write calico-policy-controller yaml
template: src=calico-policy-controller.yml.j2 dest=/etc/kubernetes/calico-policy-controller.yml
template: src=calico-policy-controller.yml.j2 dest={{kube_config_dir}}/calico-policy-controller.yml
when: inventory_hostname == groups['kube-master'][0]
@ -7,7 +7,7 @@
kube:
name: "calico-policy-controller"
kubectl: "{{bin_dir}}/kubectl"
filename: "/etc/kubernetes/calico-policy-controller.yml"
namespace: "kube-system"
filename: "{{kube_config_dir}}/calico-policy-controller.yml"
namespace: "{{kube_namespace}}"
resource: "rs"
when: inventory_hostname == groups['kube-master'][0]

10
roles/kubernetes-apps/ansible/tasks/main.yaml

@ -1,6 +1,6 @@
---
- name: Kubernetes Apps | Lay Down KubeDNS Template
template: src={{item.file}} dest=/etc/kubernetes/{{item.file}}
template: src={{item.file}} dest={{kube_config_dir}}/{{item.file}}
with_items:
- {file: kubedns-rc.yml, type: rc}
- {file: kubedns-svc.yml, type: svc}
@ -10,10 +10,10 @@
- name: Kubernetes Apps | Start Resources
kube:
name: kubedns
namespace: kube-system
namespace: "{{ kube_namespace }}"
kubectl: "{{bin_dir}}/kubectl"
resource: "{{item.item.type}}"
filename: /etc/kubernetes/{{item.item.file}}
filename: "{{kube_config_dir}}/{{item.item.file}}"
state: "{{item.changed | ternary('latest','present') }}"
with_items: "{{ manifests.results }}"
when: inventory_hostname == groups['kube-master'][0]
@ -21,3 +21,7 @@
- include: tasks/calico-policy-controller.yml
when: ( enable_network_policy is defined and enable_network_policy == True ) or
( kube_network_plugin == 'canal' )
- name: Kubernetes Apps | Netchecker
include: tasks/netchecker.yml
when: deploy_netchecker

20
roles/kubernetes-apps/ansible/tasks/netchecker.yml

@ -0,0 +1,20 @@
- name: Kubernetes Apps | Lay Down Netchecker Template
template: src={{item.file}} dest={{kube_config_dir}}/{{item.file}}
with_items:
- {file: netchecker-agent-ds.yml, type: ds, name: netchecker-agent}
- {file: netchecker-agent-hostnet-ds.yml, type: ds, name: netchecker-agent-hostnet}
- {file: netchecker-server-pod.yml, type: po, name: netchecker-server}
- {file: netchecker-server-svc.yml, type: svc, name: netchecker-service}
register: manifests
when: inventory_hostname == groups['kube-master'][0]
- name: Kubernetes Apps | Start Netchecker Resources
kube:
name: "{{item.item.name}}"
namespace: "{{netcheck_namespace}}"
kubectl: "{{bin_dir}}/kubectl"
resource: "{{item.item.type}}"
filename: "{{kube_config_dir}}/{{item.item.file}}"
state: "{{item.changed | ternary('latest','present') }}"
with_items: "{{ manifests.results }}"
when: inventory_hostname == groups['kube-master'][0]

2
roles/kubernetes-apps/ansible/templates/calico-policy-controller.yml.j2

@ -2,7 +2,7 @@ apiVersion: extensions/v1beta1
kind: ReplicaSet
metadata:
name: calico-policy-controller
namespace: kube-system
namespace: {{ kube_namespace }}
labels:
k8s-app: calico-policy
kubernetes.io/cluster-service: "true"

2
roles/kubernetes-apps/ansible/templates/kubedns-rc.yml

@ -2,7 +2,7 @@ apiVersion: v1
kind: ReplicationController
metadata:
name: kubedns
namespace: kube-system
namespace: {{ kube_namespace }}
labels:
k8s-app: kubedns
version: v19

2
roles/kubernetes-apps/ansible/templates/kubedns-svc.yml

@ -2,7 +2,7 @@ apiVersion: v1
kind: Service
metadata:
name: kubedns
namespace: kube-system
namespace: {{ kube_namespace }}
labels:
k8s-app: kubedns
kubernetes.io/cluster-service: "true"

25
roles/kubernetes-apps/ansible/templates/netchecker-agent-ds.yml

@ -0,0 +1,25 @@
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
labels:
app: netchecker-agent
name: netchecker-agent
namespace: {{ netcheck_namespace }}
spec:
template:
metadata:
name: netchecker-agent
labels:
app: netchecker-agent
spec:
containers:
- name: netchecker-agent
image: "{{ agent_img }}"
env:
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: REPORT_INTERVAL
value: '{{ agent_report_interval }}'
imagePullPolicy: {{ k8s_image_pull_policy }}

26
roles/kubernetes-apps/ansible/templates/netchecker-agent-hostnet-ds.yml

@ -0,0 +1,26 @@
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
labels:
app: netchecker-agent-hostnet
name: netchecker-agent-hostnet
namespace: {{ netcheck_namespace }}
spec:
template:
metadata:
name: netchecker-agent-hostnet
labels:
app: netchecker-agent-hostnet
spec:
hostNetwork: True
containers:
- name: netchecker-agent
image: "{{ agent_img }}"
env:
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: REPORT_INTERVAL
value: '{{ agent_report_interval }}'
imagePullPolicy: {{ k8s_image_pull_policy }}

21
roles/kubernetes-apps/ansible/templates/netchecker-server-pod.yml

@ -0,0 +1,21 @@
apiVersion: v1
kind: Pod
metadata:
name: netchecker-server
labels:
app: netchecker-server
namespace: {{ netcheck_namespace }}
spec:
containers:
- name: netchecker-server
image: "{{ server_img }}"
env:
imagePullPolicy: {{ k8s_image_pull_policy }}
ports:
- containerPort: 8081
hostPort: 8081
- name: kubectl-proxy
image: "{{ kubectl_image }}"
imagePullPolicy: {{ k8s_image_pull_policy }}
args:
- proxy

15
roles/kubernetes-apps/ansible/templates/netchecker-server-svc.yml

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: netchecker-service
namespace: {{ netcheck_namespace }}
spec:
selector:
app: netchecker-server
ports:
-
protocol: TCP
port: 8081
targetPort: 8081
nodePort: {{ netchecker_port }}
type: NodePort

9
roles/kubernetes/node/meta/main.yml

@ -9,6 +9,15 @@ dependencies:
file: "{{ downloads.nginx }}"
- role: download
file: "{{ downloads.testbox }}"
- role: download
file: "{{ downloads.netcheck_server }}"
when: deploy_netchecker
- role: download
file: "{{ downloads.netcheck_agent }}"
when: deploy_netchecker
- role: download
file: "{{ downloads.netcheck_kubectl }}"
when: deploy_netchecker
- role: download
file: "{{ downloads.kubednsmasq }}"
when: not skip_dnsmasq_k8s|default(false)

Loading…
Cancel
Save