From 3e434d08d1443c118ead809a4150ff1f3c73173e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20M=C3=A9taireau?= Date: Thu, 19 Sep 2024 15:27:50 +0200 Subject: [PATCH] Always use our own seccomp policy as a default. As per Etienne Perot's comment on #908: > Then it seems to me like it would be easy to simply apply this seccomp profile under all container runtimes (since there's no reason why the same image and the same command-line would call different syscalls under different container runtimes). --- dangerzone/isolation_provider/container.py | 30 ++++++++-------------- docs/developer/gvisor.md | 5 ++-- tests/isolation_provider/test_container.py | 1 - 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index b0b0c3c..42c48f1 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -109,38 +109,30 @@ class Container(IsolationProvider): * Set the `container_engine_t` SELinux label, which allows gVisor to work on SELinux-enforcing systems (see https://github.com/freedomofpress/dangerzone/issues/880). + * Set a custom seccomp policy for every container engine, since the `ptrace(2)` + system call is forbidden by some. For Podman specifically, where applicable, we also add the following: * Do not log the container's output. - * Use a newer seccomp policy (for Podman 3.x versions only). * Do not map the host user to the container, with `--userns nomap` (available from Podman 4.1 onwards) - This particular argument is specified in `start_doc_to_pixels_proc()`, but should move here once #748 is merged. """ - # This file has been copied as is [1] from the official Podman repo. See: - # - # [1] https://github.com/containers/common/blob/d3283f8401eeeb21f3c59a425b5461f069e199a7/pkg/seccomp/seccomp.json - seccomp_json_path = get_resource_path("seccomp.gvisor.json") - custom_seccomp_policy_arg = ["--security-opt", f"seccomp={seccomp_json_path}"] if Container.get_runtime_name() == "podman": security_args = ["--log-driver", "none"] security_args += ["--security-opt", "no-new-privileges"] - - # NOTE: Ubuntu Focal/Jammy have Podman version 3, and their seccomp policy - # does not include the `ptrace()` syscall. This system call is required for - # running gVisor, so we enforce a newer seccomp policy file in that case. - # - # See also https://github.com/freedomofpress/dangerzone/issues/846 - if Container.get_runtime_version() < (4, 0): - security_args += custom_seccomp_policy_arg else: security_args = ["--security-opt=no-new-privileges:true"] - # Older Docker Desktop versions may have a seccomp policy that does not - # allow `ptrace(2)`. In these cases, we specify our own. See: - # https://github.com/freedomofpress/dangerzone/issues/846 - if Container.get_runtime_version() < (25, 0): - security_args += custom_seccomp_policy_arg + + # We specify a custom seccomp policy uniformly, because on certain container + # engines the default policy might not allow the `ptrace(2)` syscall [1]. Our + # custom seccomp policy has been copied as is [2] from the official Podman repo. + # + # [1] https://github.com/freedomofpress/dangerzone/issues/846 + # [2] https://github.com/containers/common/blob/d3283f8401eeeb21f3c59a425b5461f069e199a7/pkg/seccomp/seccomp.json + seccomp_json_path = get_resource_path("seccomp.gvisor.json") + security_args += ["--security-opt", f"seccomp={seccomp_json_path}"] security_args += ["--cap-drop", "all"] security_args += ["--cap-add", "SYS_CHROOT"] diff --git a/docs/developer/gvisor.md b/docs/developer/gvisor.md index 76871d8..e85f84c 100644 --- a/docs/developer/gvisor.md +++ b/docs/developer/gvisor.md @@ -70,8 +70,9 @@ following changes: - In distributions that offer Podman version 4 or greater, we use the `--userns nomap` flag. This flag greatly minimizes the attack surface, since the host user is not mapped within the container at all. -* In distributions that offer Podman 3.x, we add a seccomp filter that adds the - `ptrace` syscall, which is required for running gVisor. +* We use our custom seccomp policy across container engines, since some do not + allow the `ptrace` syscall (see + [#846](https://github.com/freedomofpress/dangerzone/issues/846)). * It labels the **outer** container with the `container_engine_t` SELinux label. This label is reserved for running a container engine within a container, and is necessary in environments where SELinux is enabled in enforcing mode (see diff --git a/tests/isolation_provider/test_container.py b/tests/isolation_provider/test_container.py index 35e9cce..2828ca3 100644 --- a/tests/isolation_provider/test_container.py +++ b/tests/isolation_provider/test_container.py @@ -54,7 +54,6 @@ class TestContainer(IsolationProviderTest): class TestContainerTermination(IsolationProviderTermination): - def test_linger_runtime_kill( self, provider_wait: base.IsolationProvider,