committed by
Antoine Legrand
11 changed files with 256 additions and 1 deletions
Split View
Diff Options
-
16inventory/sample/group_vars/k8s-cluster/k8s-cluster.yml
-
8roles/kubernetes-apps/container_engine_accelerator/meta/main.yml
-
10roles/kubernetes-apps/container_engine_accelerator/nvidia_gpu/defaults/main.yml
-
54roles/kubernetes-apps/container_engine_accelerator/nvidia_gpu/tasks/main.yml
-
61roles/kubernetes-apps/container_engine_accelerator/nvidia_gpu/templates/k8s-device-plugin-nvidia-daemonset.yml.j2
-
80roles/kubernetes-apps/container_engine_accelerator/nvidia_gpu/templates/nvidia-driver-install-daemonset.yml.j2
-
3roles/kubernetes-apps/container_engine_accelerator/nvidia_gpu/vars/centos-7.yml
-
3roles/kubernetes-apps/container_engine_accelerator/nvidia_gpu/vars/ubuntu-16.yml
-
3roles/kubernetes-apps/container_engine_accelerator/nvidia_gpu/vars/ubuntu-18.yml
-
6roles/kubernetes-apps/meta/main.yml
-
13roles/kubernetes/node/templates/kubelet.standard.env.j2
@ -0,0 +1,8 @@ |
|||
--- |
|||
dependencies: |
|||
- role: kubernetes-apps/container_engine_accelerator/nvidia_gpu |
|||
when: nvidia_accelerator_enabled |
|||
tags: |
|||
- apps |
|||
- nvidia_gpu |
|||
- container_engine_accelerator |
@ -0,0 +1,10 @@ |
|||
--- |
|||
nvidia_accelerator_enabled: false |
|||
nvidia_driver_version: "390.87" |
|||
nvidia_gpu_tesla_base_url: https://us.download.nvidia.com/tesla/ |
|||
nvidia_gpu_gtx_base_url: http://us.download.nvidia.com/XFree86/Linux-x86_64/ |
|||
nvidia_gpu_flavor: tesla |
|||
nvidia_url_end: "{{nvidia_driver_version}}/NVIDIA-Linux-x86_64-{{nvidia_driver_version}}.run" |
|||
nvidia_driver_install_container: false |
|||
nvidia_driver_install_supported: false |
|||
nvidia_gpu_nodes: [] |
@ -0,0 +1,54 @@ |
|||
--- |
|||
|
|||
- name: Container Engine Acceleration Nvidia GPU| gather os specific variables |
|||
include_vars: "{{ item }}" |
|||
with_first_found: |
|||
- files: |
|||
- "{{ ansible_distribution|lower }}-{{ ansible_distribution_version|lower|replace('/', '_') }}.yml" |
|||
- "{{ ansible_distribution|lower }}-{{ ansible_distribution_release }}.yml" |
|||
- "{{ ansible_distribution|lower }}-{{ ansible_distribution_major_version|lower|replace('/', '_') }}.yml" |
|||
- "{{ ansible_distribution|lower }}.yml" |
|||
- "{{ ansible_os_family|lower }}.yml" |
|||
skip: true |
|||
|
|||
- name: Container Engine Acceleration Nvidia GPU | Set fact of download url Tesla |
|||
set_fact: |
|||
nvidia_driver_download_url_default: "{{nvidia_gpu_tesla_base_url}}{{nvidia_url_end}}" |
|||
when: nvidia_gpu_flavor|lower == "tesla" |
|||
|
|||
- name: Container Engine Acceleration Nvidia GPU | Set fact of download url GTX |
|||
set_fact: |
|||
nvidia_driver_download_url_default: "{{nvidia_gpu_gtx_base_url}}{{nvidia_url_end}}" |
|||
when: nvidia_gpu_flavor|lower == "gtx" |
|||
|
|||
- name: Container Engine Acceleration Nvidia GPU | Create addon dir |
|||
file: |
|||
path: "{{ kube_config_dir }}/addons/container_engine_accelerator" |
|||
owner: root |
|||
group: root |
|||
mode: 0755 |
|||
recurse: true |
|||
|
|||
- name: Container Engine Acceleration Nvidia GPU | Create manifests for nvidia accelerators |
|||
template: |
|||
src: "{{ item.file }}.j2" |
|||
dest: "{{ kube_config_dir }}/addons/container_engine_accelerator/{{ item.file }}" |
|||
with_items: |
|||
- { name: nvidia-driver-install-daemonset, file: nvidia-driver-install-daemonset.yml, type: daemonset } |
|||
- { name: k8s-device-plugin-nvidia-daemonset, file: k8s-device-plugin-nvidia-daemonset.yml, type: daemonset } |
|||
register: container_engine_accelerator_manifests |
|||
when: |
|||
- inventory_hostname == groups['kube-master'][0] and nvidia_driver_install_container |
|||
|
|||
- name: Container Engine Acceleration Nvidia GPU | Apply manifests for nvidia accelerators |
|||
kube: |
|||
name: "{{ item.item.name }}" |
|||
namespace: "kube-system" |
|||
kubectl: "{{ bin_dir }}/kubectl" |
|||
resource: "{{ item.item.type }}" |
|||
filename: "{{ kube_config_dir }}/addons/container_engine_accelerator/{{ item.item.file }}" |
|||
state: "latest" |
|||
with_items: |
|||
- "{{container_engine_accelerator_manifests.results}}" |
|||
when: |
|||
- inventory_hostname == groups['kube-master'][0] and nvidia_driver_install_container and nvidia_driver_install_supported |
@ -0,0 +1,61 @@ |
|||
apiVersion: apps/v1 |
|||
kind: DaemonSet |
|||
metadata: |
|||
name: nvidia-gpu-device-plugin |
|||
namespace: kube-system |
|||
labels: |
|||
k8s-app: nvidia-gpu-device-plugin |
|||
addonmanager.kubernetes.io/mode: Reconcile |
|||
spec: |
|||
selector: |
|||
matchLabels: |
|||
k8s-app: nvidia-gpu-device-plugin |
|||
template: |
|||
metadata: |
|||
labels: |
|||
k8s-app: nvidia-gpu-device-plugin |
|||
annotations: |
|||
scheduler.alpha.kubernetes.io/critical-pod: '' |
|||
spec: |
|||
priorityClassName: system-node-critical |
|||
affinity: |
|||
nodeAffinity: |
|||
requiredDuringSchedulingIgnoredDuringExecution: |
|||
nodeSelectorTerms: |
|||
- matchExpressions: |
|||
- key: "nvidia.com/gpu" |
|||
operator: Exists |
|||
tolerations: |
|||
- operator: "Exists" |
|||
effect: "NoExecute" |
|||
- operator: "Exists" |
|||
effect: "NoSchedule" |
|||
hostNetwork: true |
|||
hostPID: true |
|||
volumes: |
|||
- name: device-plugin |
|||
hostPath: |
|||
path: /var/lib/kubelet/device-plugins |
|||
- name: dev |
|||
hostPath: |
|||
path: /dev |
|||
containers: |
|||
- image: "k8s.gcr.io/nvidia-gpu-device-plugin@sha256:0842734032018be107fa2490c98156992911e3e1f2a21e059ff0105b07dd8e9e" |
|||
command: ["/usr/bin/nvidia-gpu-device-plugin", "-logtostderr"] |
|||
name: nvidia-gpu-device-plugin |
|||
resources: |
|||
requests: |
|||
cpu: 50m |
|||
memory: 10Mi |
|||
limits: |
|||
cpu: 50m |
|||
memory: 10Mi |
|||
securityContext: |
|||
privileged: true |
|||
volumeMounts: |
|||
- name: device-plugin |
|||
mountPath: /device-plugin |
|||
- name: dev |
|||
mountPath: /dev |
|||
updateStrategy: |
|||
type: RollingUpdate |
@ -0,0 +1,80 @@ |
|||
# Copyright 2017 Google Inc. All rights reserved. |
|||
# |
|||
# Licensed under the Apache License, Version 2.0 (the "License"); |
|||
# you may not use this file except in compliance with the License. |
|||
# You may obtain a copy of the License at |
|||
# |
|||
# http://www.apache.org/licenses/LICENSE-2.0 |
|||
# |
|||
# Unless required by applicable law or agreed to in writing, software |
|||
# distributed under the License is distributed on an "AS IS" BASIS, |
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
# See the License for the specific language governing permissions and |
|||
# limitations under the License. |
|||
|
|||
apiVersion: extensions/v1beta1 |
|||
kind: DaemonSet |
|||
metadata: |
|||
name: nvidia-driver-installer |
|||
namespace: kube-system |
|||
spec: |
|||
template: |
|||
metadata: |
|||
labels: |
|||
name: nvidia-driver-installer |
|||
annotations: |
|||
scheduler.alpha.kubernetes.io/critical-pod: '' |
|||
spec: |
|||
priorityClassName: system-node-critical |
|||
affinity: |
|||
nodeAffinity: |
|||
requiredDuringSchedulingIgnoredDuringExecution: |
|||
nodeSelectorTerms: |
|||
- matchExpressions: |
|||
- key: "nvidia.com/gpu" |
|||
operator: Exists |
|||
tolerations: |
|||
- key: "nvidia.com/gpu" |
|||
effect: "NoSchedule" |
|||
operator: "Exists" |
|||
hostNetwork: true |
|||
hostPID: true |
|||
volumes: |
|||
- name: dev |
|||
hostPath: |
|||
path: /dev |
|||
- name: nvidia-install-dir-host |
|||
hostPath: |
|||
path: /home/kubernetes/bin/nvidia |
|||
- name: root-mount |
|||
hostPath: |
|||
path: / |
|||
initContainers: |
|||
- image: "{{nvidia_driver_install_container}}" |
|||
name: nvidia-driver-installer |
|||
resources: |
|||
requests: |
|||
cpu: 0.15 |
|||
securityContext: |
|||
privileged: true |
|||
env: |
|||
- name: NVIDIA_INSTALL_DIR_HOST |
|||
value: /home/kubernetes/bin/nvidia |
|||
- name: NVIDIA_INSTALL_DIR_CONTAINER |
|||
value: /usr/local/nvidia |
|||
- name: ROOT_MOUNT_DIR |
|||
value: /root |
|||
- name: NVIDIA_DRIVER_VERSION |
|||
value: "{{nvidia_driver_version}}" |
|||
- name: NVIDIA_DRIVER_DOWNLOAD_URL |
|||
value: "{{nvidia_driver_download_url_default}}" |
|||
volumeMounts: |
|||
- name: nvidia-install-dir-host |
|||
mountPath: /usr/local/nvidia |
|||
- name: dev |
|||
mountPath: /dev |
|||
- name: root-mount |
|||
mountPath: /root |
|||
containers: |
|||
- image: "gcr.io/google-containers/pause:2.0" |
|||
name: pause |
@ -0,0 +1,3 @@ |
|||
--- |
|||
nvidia_driver_install_container: atzedevries/nvidia-centos-driver-installer:2 |
|||
nvidia_driver_install_supported: true |
@ -0,0 +1,3 @@ |
|||
--- |
|||
nvidia_driver_install_container: gcr.io/google-containers/ubuntu-nvidia-driver-installer@sha256:eea7309dc4fa4a5c9d716157e74b90826e0a853aa26c7219db4710ddcd1ad8bc |
|||
nvidia_driver_install_supported: true |
@ -0,0 +1,3 @@ |
|||
--- |
|||
nvidia_driver_install_container: gcr.io/google-containers/ubuntu-nvidia-driver-installer@sha256:eea7309dc4fa4a5c9d716157e74b90826e0a853aa26c7219db4710ddcd1ad8bc |
|||
nvidia_driver_install_supported: true |
Write
Preview
Loading…
Cancel
Save