diff --git a/docs/containerd.md b/docs/containerd.md index 32de17683..847f7c9ca 100644 --- a/docs/containerd.md +++ b/docs/containerd.md @@ -39,4 +39,65 @@ containerd_registries: image_command_tool: crictl ``` +### Containerd Runtimes + +Containerd supports multiple runtime configurations that can be used with +[RuntimeClass] Kubernetes feature. See [runtime classes in containerd] for the +details of containerd configuration. + +In kubespray, the default runtime name is "runc", and it can be configured with the `containerd_runc_runtime` dictionary: + +```yaml +containerd_runc_runtime: + name: runc + type: "io.containerd.runc.v2" + engine: "" + root: "" + options: + systemdCgroup: "false" + binaryName: /usr/local/bin/my-runc + base_runtime_spec: cri-base.json +``` + +Further runtimes can be configured with `containerd_additional_runtimes`, which +is a list of such dictionaries. + +Default runtime can be changed by setting `containerd_default_runtime`. + +#### base_runtime_spec + +`base_runtime_spec` key in a runtime dictionary can be used to explicitly +specify a runtime spec json file. We ship the default one which is generated +with `ctr oci spec > /etc/containerd/cri-base.json`. It will be used if you set +`base_runtime_spec: cri-base.json`. The main advantage of doing so is the presence of +`rlimits` section in this configuration, which will restrict the maximum number +of file descriptors(open files) per container to 1024. + +You can tune many more [settings][runtime-spec] by supplying your own file name and content with `containerd_base_runtime_specs`: + +```yaml +containerd_base_runtime_specs: + cri-spec-custom.json: | + { + "ociVersion": "1.0.2-dev", + "process": { + "user": { + "uid": 0, + ... +``` + +The files in this dict will be placed in containerd config directory, +`/etc/containerd` by default. The files can then be referenced by filename in a +runtime: + +```yaml +containerd_runc_runtime: + name: runc + base_runtime_spec: cri-spec-custom.json + ... +``` + [containerd]: https://containerd.io/ +[RuntimeClass]: https://kubernetes.io/docs/concepts/containers/runtime-class/ +[runtime classes in containerd]: https://github.com/containerd/containerd/blob/main/docs/cri/config.md#runtime-classes +[runtime-spec]: https://github.com/opencontainers/runtime-spec diff --git a/roles/container-engine/containerd/defaults/main.yml b/roles/container-engine/containerd/defaults/main.yml index 403f1a9c4..af5f54379 100644 --- a/roles/container-engine/containerd/defaults/main.yml +++ b/roles/container-engine/containerd/defaults/main.yml @@ -12,6 +12,7 @@ containerd_runc_runtime: type: "io.containerd.runc.v2" engine: "" root: "" + # base_runtime_spec: cri-base.json # use this to limit number of file descriptors per container options: systemdCgroup: "{{ containerd_use_systemd_cgroup | ternary('true', 'false') }}" @@ -22,6 +23,9 @@ containerd_additional_runtimes: [] # engine: "" # root: "" +containerd_base_runtime_specs: + cri-base.json: "{{ lookup('file', 'cri-base.json') }}" + containerd_grpc_max_recv_message_size: 16777216 containerd_grpc_max_send_message_size: 16777216 diff --git a/roles/container-engine/containerd/files/cri-base.json b/roles/container-engine/containerd/files/cri-base.json new file mode 100644 index 000000000..f022438a4 --- /dev/null +++ b/roles/container-engine/containerd/files/cri-base.json @@ -0,0 +1,214 @@ +{ + "ociVersion": "1.0.2-dev", + "process": { + "user": { + "uid": 0, + "gid": 0 + }, + "cwd": "/", + "capabilities": { + "bounding": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE" + ], + "effective": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE" + ], + "inheritable": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE" + ], + "permitted": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE" + ] + }, + "rlimits": [ + { + "type": "RLIMIT_NOFILE", + "hard": 1024, + "soft": 1024 + } + ], + "noNewPrivileges": true + }, + "root": { + "path": "rootfs" + }, + "mounts": [ + { + "destination": "/proc", + "type": "proc", + "source": "proc", + "options": [ + "nosuid", + "noexec", + "nodev" + ] + }, + { + "destination": "/dev", + "type": "tmpfs", + "source": "tmpfs", + "options": [ + "nosuid", + "strictatime", + "mode=755", + "size=65536k" + ] + }, + { + "destination": "/dev/pts", + "type": "devpts", + "source": "devpts", + "options": [ + "nosuid", + "noexec", + "newinstance", + "ptmxmode=0666", + "mode=0620", + "gid=5" + ] + }, + { + "destination": "/dev/shm", + "type": "tmpfs", + "source": "shm", + "options": [ + "nosuid", + "noexec", + "nodev", + "mode=1777", + "size=65536k" + ] + }, + { + "destination": "/dev/mqueue", + "type": "mqueue", + "source": "mqueue", + "options": [ + "nosuid", + "noexec", + "nodev" + ] + }, + { + "destination": "/sys", + "type": "sysfs", + "source": "sysfs", + "options": [ + "nosuid", + "noexec", + "nodev", + "ro" + ] + }, + { + "destination": "/run", + "type": "tmpfs", + "source": "tmpfs", + "options": [ + "nosuid", + "strictatime", + "mode=755", + "size=65536k" + ] + } + ], + "linux": { + "resources": { + "devices": [ + { + "allow": false, + "access": "rwm" + } + ] + }, + "cgroupsPath": "/default", + "namespaces": [ + { + "type": "pid" + }, + { + "type": "ipc" + }, + { + "type": "uts" + }, + { + "type": "mount" + }, + { + "type": "network" + } + ], + "maskedPaths": [ + "/proc/acpi", + "/proc/asound", + "/proc/kcore", + "/proc/keys", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware", + "/proc/scsi" + ], + "readonlyPaths": [ + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger" + ] + } +} diff --git a/roles/container-engine/containerd/tasks/main.yml b/roles/container-engine/containerd/tasks/main.yml index e2c447607..5415059f3 100644 --- a/roles/container-engine/containerd/tasks/main.yml +++ b/roles/container-engine/containerd/tasks/main.yml @@ -84,6 +84,15 @@ notify: restart containerd when: http_proxy is defined or https_proxy is defined +- name: containerd | Write base_runtime_specs + copy: + content: "{{ item.value }}" + dest: "{{ containerd_cfg_dir }}/{{ item.key }}" + owner: "root" + mode: 0644 + with_dict: "{{ containerd_base_runtime_specs | default({}) }}" + notify: restart containerd + - name: containerd | Copy containerd config file template: src: config.toml.j2 diff --git a/roles/container-engine/containerd/templates/config.toml.j2 b/roles/container-engine/containerd/templates/config.toml.j2 index 6ab414dc0..7ffe37045 100644 --- a/roles/container-engine/containerd/templates/config.toml.j2 +++ b/roles/container-engine/containerd/templates/config.toml.j2 @@ -27,6 +27,10 @@ oom_score = {{ containerd_oom_score }} runtime_type = "{{ runtime.type }}" runtime_engine = "{{ runtime.engine }}" runtime_root = "{{ runtime.root }}" +{% if runtime.base_runtime_spec is defined %} + base_runtime_spec = "{{ containerd_cfg_dir }}/{{ runtime.base_runtime_spec }}" +{% endif %} + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.{{ runtime.name }}.options] {% for key, value in runtime.options.items() %} {{ key }} = {{ value }}