Browse Source

[kata-containers] Update configuration to support kata 3.1.3. (#10466)

Namely, the libexec paths have changed since 2.5.
This also makes kata_containers_virtio_fs_cache configurable.
pull/10489/head
Heather Lapointe 1 year ago
committed by GitHub
parent
commit
ddd7aa844c
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 164 additions and 81 deletions
  1. 1
      roles/container-engine/kata-containers/defaults/main.yml
  2. 244
      roles/container-engine/kata-containers/templates/configuration-qemu.toml.j2

1
roles/container-engine/kata-containers/defaults/main.yml

@ -7,3 +7,4 @@ kata_containers_qemu_default_memory: "{{ ansible_memtotal_mb }}"
kata_containers_qemu_debug: 'false'
kata_containers_qemu_sandbox_cgroup_only: 'true'
kata_containers_qemu_enable_mem_prealloc: 'false'
kata_containers_virtio_fs_cache: 'always'

244
roles/container-engine/kata-containers/templates/configuration-qemu.toml.j2

@ -1,11 +1,12 @@
# Copyright (c) 2017-2019 Intel Corporation
# Copyright (c) 2021 Adobe Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# XXX: WARNING: this file is auto-generated.
# XXX:
# XXX: Source file: "cli/config/configuration-qemu.toml.in"
# XXX: Source file: "config/configuration-qemu.toml.in"
# XXX: Project:
# XXX: Name: Kata Containers
# XXX: Type: kata
@ -18,20 +19,46 @@ kernel = "/opt/kata/share/kata-containers/vmlinux.container"
kernel = "/opt/kata/share/kata-containers/vmlinuz.container"
{% endif %}
image = "/opt/kata/share/kata-containers/kata-containers.img"
# initrd = "/opt/kata/share/kata-containers/kata-containers-initrd.img"
machine_type = "q35"
# rootfs filesystem type:
# - ext4 (default)
# - xfs
# - erofs
rootfs_type="ext4"
# Enable confidential guest support.
# Toggling that setting may trigger different hardware features, ranging
# from memory encryption to both memory and CPU-state encryption and integrity.
# The Kata Containers runtime dynamically detects the available feature set and
# aims at enabling the largest possible one.
# aims at enabling the largest possible one, returning an error if none is
# available, or none is supported by the hypervisor.
#
# Known limitations:
# * Does not work by design:
# - CPU Hotplug
# - Memory Hotplug
# - NVDIMM devices
#
# Default false
# confidential_guest = true
# Choose AMD SEV-SNP confidential guests
# In case of using confidential guests on AMD hardware that supports both SEV
# and SEV-SNP, the following enables SEV-SNP guests. SEV guests are default.
# Default false
# sev_snp_guest = true
# Enable running QEMU VMM as a non-root user.
# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
# a non-root random user. See documentation for the limitations of this mode.
# rootless = true
# List of valid annotation names for the hypervisor
# Each member of the list is a regular expression, which is the base name
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
enable_annotations = []
enable_annotations = ["enable_iommu"]
# List of valid annotations values for the hypervisor
# Each member of the list is a path pattern as described by glob(3).
@ -55,11 +82,25 @@ kernel_params = ""
# If you want that qemu uses the default firmware leave this option empty
firmware = ""
# Path to the firmware volume.
# firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables
# as configuration) and FIRMWARE_CODE.fd (UEFI program image). UEFI variables
# can be customized per each user while UEFI code is kept same.
firmware_volume = ""
# Machine accelerators
# comma-separated list of machine accelerators to pass to the hypervisor.
# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
machine_accelerators=""
# Qemu seccomp sandbox feature
# comma-separated list of seccomp sandbox features to control the syscall access.
# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"`
# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
# Another note: enabling this feature may reduce performance, you may enable
# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
#seccompsandbox="on,obsolete=deny,spawn=deny,resourcecontrol=deny"
# CPU features
# comma-separated list of cpu features to pass to the cpu
# For example, `cpu_features = "pmu=off,vmx=off"
@ -110,6 +151,12 @@ default_memory = {{ kata_containers_qemu_default_memory }}
# This is will determine the times that memory will be hotadded to sandbox/VM.
#memory_slots = 10
# Default maximum memory in MiB per SB / VM
# unspecified or == 0 --> will be set to the actual amount of physical RAM
# > 0 <= amount of physical RAM --> will be set to the specified number
# > amount of physical RAM --> will be set to the actual amount of physical RAM
default_maxmemory = 0
# The size in MiB will be plused to max memory of hypervisor.
# It is the memory address space for the NVDIMM devie.
# If set block storage driver (block_device_driver) to "nvdimm",
@ -128,12 +175,13 @@ default_memory = {{ kata_containers_qemu_default_memory }}
# root file system is backed by a block device, the block device is passed
# directly to the hypervisor for performance reasons.
# This flag prevents the block device from being passed to the hypervisor,
# 9pfs is used instead to pass the rootfs.
# virtio-fs is used instead to pass the rootfs.
disable_block_device_use = false
# Shared file system type:
# - virtio-fs (default)
# - virtio-9p
# - virtio-fs-nydus
{% if kata_containers_version is version('2.2.0', '>=') %}
shared_fs = "virtio-fs"
{% else %}
@ -141,27 +189,39 @@ shared_fs = "virtio-9p"
{% endif %}
# Path to vhost-user-fs daemon.
{% if kata_containers_version is version('2.5.0', '>=') %}
virtio_fs_daemon = "/opt/kata/libexec/virtiofsd"
{% else %}
virtio_fs_daemon = "/opt/kata/libexec/kata-qemu/virtiofsd"
{% endif %}
# List of valid annotations values for the virtiofs daemon
# The default if not set is empty (all annotations rejected.)
# Your distribution recommends: ["/opt/kata/libexec/kata-qemu/virtiofsd"]
valid_virtio_fs_daemon_paths = ["/opt/kata/libexec/kata-qemu/virtiofsd"]
# Your distribution recommends: ["/opt/kata/libexec/virtiofsd"]
valid_virtio_fs_daemon_paths = [
"/opt/kata/libexec/virtiofsd",
"/opt/kata/libexec/kata-qemu/virtiofsd",
]
# Default size of DAX cache in MiB
virtio_fs_cache_size = 0
# Default size of virtqueues
virtio_fs_queue_size = 1024
# Extra args for virtiofsd daemon
#
# Format example:
# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"]
# ["--arg1=xxx", "--arg2=yyy"]
# Examples:
# Set virtiofsd log level to debug : ["--log-level=debug"]
#
# see `virtiofsd -h` for possible options.
virtio_fs_extra_args = ["--thread-pool-size=1"]
virtio_fs_extra_args = ["--thread-pool-size=1", "--announce-submounts"]
# Cache mode:
#
# - none
# - never
# Metadata, data, and pathname lookup are not cached in guest. They are
# always fetched from host and any changes are immediately pushed to host.
#
@ -172,13 +232,27 @@ virtio_fs_extra_args = ["--thread-pool-size=1"]
#
# - always
# Metadata, data, and pathname lookup are cached in guest and never expire.
virtio_fs_cache = "always"
virtio_fs_cache = "{{ kata_containers_virtio_fs_cache }}"
# Block storage driver to be used for the hypervisor in case the container
# rootfs is backed by a block device. This is virtio-scsi, virtio-blk
# or nvdimm.
block_device_driver = "virtio-scsi"
# aio is the I/O mechanism used by qemu
# Options:
#
# - threads
# Pthread based disk I/O.
#
# - native
# Native Linux I/O.
#
# - io_uring
# Linux io_uring API. This provides the fastest I/O operations on Linux, requires kernel>5.1 and
# qemu >=5.0.
block_device_aio = "io_uring"
# Specifies cache-related options will be set to block devices or not.
# Default false
#block_device_cache_set = true
@ -242,6 +316,11 @@ vhost_user_store_path = "/var/run/kata-containers/vhost-user"
# Your distribution recommends: ["/var/run/kata-containers/vhost-user"]
valid_vhost_user_store_paths = ["/var/run/kata-containers/vhost-user"]
# The timeout for reconnecting on non-server spdk sockets when the remote end goes away.
# qemu will delay this many seconds and then attempt to reconnect.
# Zero disables reconnecting, and the default is zero.
vhost_user_reconnect_timeout_sec = 0
# Enable file based guest memory support. The default is an empty string which
# will disable this feature. In the case of virtio-fs, this is enabled
# automatically and '/dev/shm' is used as the backing folder.
@ -253,17 +332,12 @@ valid_vhost_user_store_paths = ["/var/run/kata-containers/vhost-user"]
# Your distribution recommends: [""]
valid_file_mem_backends = [""]
# Enable swap of vm memory. Default false.
# The behaviour is undefined if mem_prealloc is also set to true
#enable_swap = true
# -pflash can add image file to VM. The arguments of it should be in format
# of ["/path/to/flash0.img", "/path/to/flash1.img"]
pflashes = []
# This option changes the default hypervisor and kernel parameters
# to enable debug output where available. This extra output is added
# to the proxy logs, but only when proxy debug is also enabled.
# to enable debug output where available. And Debug also enables the hmp socket.
#
# Default false
enable_debug = {{ kata_containers_qemu_debug }}
@ -278,21 +352,18 @@ enable_debug = {{ kata_containers_qemu_debug }}
# used for 9p packet payload.
#msize_9p = 8192
# If true and vsocks are supported, use vsocks to communicate directly
# with the agent and no proxy is started, otherwise use unix
# sockets and start a proxy to communicate with the agent.
# Default false
#use_vsock = true
# If false and nvdimm is supported, use nvdimm device to plug guest image.
# Otherwise virtio-block device is used.
#
# nvdimm is not supported when `confidential_guest = true`.
#
# Default is false
#disable_image_nvdimm = true
# VFIO devices are hotplugged on a bridge by default.
# Enable hotplugging on root bus. This may be required for devices with
# a large PCI bar, as this is a current limitation with hotplugging on
# a bridge. This value is valid for "pc" machine type.
# a bridge.
# Default false
#hotplug_vfio_on_root_bus = true
@ -329,15 +400,15 @@ valid_entropy_sources = ["/dev/urandom","/dev/random",""]
# the OCI spec passed to the runtime.
#
# You can create a rootfs with hooks by customizing the osbuilder scripts:
# https://github.com/kata-containers/osbuilder
# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder
#
# Hooks must be stored in a subdirectory of guest_hook_path according to their
# hook type, i.e. "guest_hook_path/{prestart,postart,poststop}".
# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}".
# The agent will scan these directories for executable files and add them, in
# lexicographical order, to the lifecycle of the guest container.
# Hooks are executed in the runtime namespace of the guest. See the official documentation:
# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
# Warnings will be logged if any error is encountered will scanning for hooks,
# Warnings will be logged if any error is encountered while scanning for hooks,
# but it will not abort container execution.
#guest_hook_path = "/usr/share/oci/hooks"
#
@ -382,6 +453,19 @@ valid_entropy_sources = ["/dev/urandom","/dev/random",""]
# be default_memory.
#enable_guest_swap = true
# use legacy serial for guest console if available and implemented for architecture. Default false
#use_legacy_serial = true
# disable applying SELinux on the VMM process (default false)
disable_selinux=false
# disable applying SELinux on the container process
# If set to false, the type `container_t` is applied to the container process by default.
# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
# with `SELINUX=yes`.
# (default: true)
disable_guest_selinux=true
[factory]
# VM templating support. Once enabled, new VMs are created from template
# using vm cloning. They will share the same initial kernel, initramfs and
@ -425,31 +509,6 @@ valid_entropy_sources = ["/dev/urandom","/dev/random",""]
# Default /var/run/kata-containers/cache.sock
#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
[proxy.kata]
path = "/opt/kata/libexec/kata-containers/kata-proxy"
# If enabled, proxy messages will be sent to the system log
# (default: disabled)
enable_debug = {{ kata_containers_qemu_debug }}
[shim.kata]
path = "/opt/kata/libexec/kata-containers/kata-shim"
# If enabled, shim messages will be sent to the system log
# (default: disabled)
enable_debug = {{ kata_containers_qemu_debug }}
# If enabled, the shim will create opentracing.io traces and spans.
# (See https://www.jaegertracing.io/docs/getting-started).
#
# Note: By default, the shim runs in a separate network namespace. Therefore,
# to allow it to send trace details to the Jaeger agent running on the host,
# it is necessary to set 'disable_new_netns=true' so that it runs in the host
# network namespace.
#
# (default: disabled)
#enable_tracing = true
[agent.kata]
# If enabled, make the agent display debug-level messages.
# (default: disabled)
@ -457,24 +516,17 @@ enable_debug = {{ kata_containers_qemu_debug }}
# Enable agent tracing.
#
# If enabled, the default trace mode is "dynamic" and the
# default trace type is "isolated". The trace mode and type are set
# explicitly with the `trace_type=` and `trace_mode=` options.
# If enabled, the agent will generate OpenTelemetry trace spans.
#
# Notes:
#
# - Tracing is ONLY enabled when `enable_tracing` is set: explicitly
# setting `trace_mode=` and/or `trace_type=` without setting `enable_tracing`
# will NOT activate agent tracing.
#
# - See https://github.com/kata-containers/agent/blob/master/TRACING.md for
# full details.
# - If the runtime also has tracing enabled, the agent spans will be
# associated with the appropriate runtime parent span.
# - If enabled, the runtime will wait for the container to shutdown,
# increasing the container shutdown time slightly.
#
# (default: disabled)
#enable_tracing = true
#
#trace_mode = "dynamic"
#trace_type = "isolated"
# Comma separated list of kernel modules and their parameters.
# These modules will be loaded in the guest kernel using modprobe(8).
@ -500,21 +552,6 @@ kernel_modules=[]
# (default: 30)
#dial_timeout = 30
[netmon]
# If enabled, the network monitoring process gets started when the
# sandbox is created. This allows for the detection of some additional
# network being added to the existing network namespace, after the
# sandbox has been created.
# (default: disabled)
#enable_netmon = true
# Specify the path to the netmon binary.
path = "/opt/kata/libexec/kata-containers/kata-netmon"
# If enabled, netmon messages will be sent to the system log
# (default: disabled)
enable_debug = {{ kata_containers_qemu_debug }}
[runtime]
# If enabled, the runtime will log additional debug messages to the
# system log
@ -546,6 +583,19 @@ internetworking_model="tcfilter"
# (default: true)
disable_guest_seccomp=true
# vCPUs pinning settings
# if enabled, each vCPU thread will be scheduled to a fixed CPU
# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
# enable_vcpus_pinning = false
# Apply a custom SELinux security policy to the container process inside the VM.
# This is used when you want to apply a type other than the default `container_t`,
# so general users should not uncomment and apply it.
# (format: "user:role:type")
# Note: You cannot specify MCS policy with the label because the sensitivity levels and
# categories are determined automatically by high-level container runtimes such as containerd.
#guest_selinux_label="system_u:system_r:container_t"
# If enabled, the runtime will create opentracing.io traces and spans.
# (See https://www.jaegertracing.io/docs/getting-started).
# (default: disabled)
@ -563,11 +613,9 @@ disable_guest_seccomp=true
# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
# `disable_new_netns` conflicts with `enable_netmon`
# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only
# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
# (like OVS) directly.
# If you are using docker, `disable_new_netns` only works with `docker run --net=none`
# (default: false)
#disable_new_netns = true
@ -576,15 +624,49 @@ disable_guest_seccomp=true
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
# The sandbox cgroup is constrained if there is no container type annotation.
# See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType
# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
sandbox_cgroup_only={{ kata_containers_qemu_sandbox_cgroup_only }}
# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug.
# Compatibility for determining appropriate sandbox (VM) size:
# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
# does not yet support sandbox sizing annotations.
# - When running single containers using a tool like ctr, container sizing information will be available.
static_sandbox_resource_mgmt=false
# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
# These will not be exposed to the container workloads, and are only provided for potential guest services.
sandbox_bind_mounts=[]
# VFIO Mode
# Determines how VFIO devices should be be presented to the container.
# Options:
#
# - vfio
# Matches behaviour of OCI runtimes (e.g. runc) as much as
# possible. VFIO devices will appear in the container as VFIO
# character devices under /dev/vfio. The exact names may differ
# from the host (they need to match the VM's IOMMU group numbers
# rather than the host's)
#
# - guest-kernel
# This is a Kata-specific behaviour that's useful in certain cases.
# The VFIO device is managed by whatever driver in the VM kernel
# claims it. This means it will appear as one or more device nodes
# or network interfaces depending on the nature of the device.
# Using this mode requires specially built workloads that know how
# to locate the relevant device interfaces within the VM.
#
vfio_mode="guest-kernel"
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
disable_guest_empty_dir=false
# Enabled experimental feature list, format: ["a", "b"].
# Experimental features are features not stable enough for production,
# they may break compatibility, and are prepared for a big version bump.

Loading…
Cancel
Save