diff --git a/.circleci/config.yml b/.circleci/config.yml
index 805f83b..56597ae 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -166,10 +166,25 @@ jobs:
             sudo chown -R $USER:$USER /caches
       - restore_cache: *restore-cache
       - run: *copy-image
+      - run:
+          name: Install git-lfs for getting large test files
+          command: |
+            sudo apt-get install -y git-lfs
+      - run:
+          name: fetch large test set
+          command: |
+            sudo apt install -y git-lfs
+            git submodule init tests/test_docs_large
+            git submodule update tests/test_docs_large
+            mv ~/.gitconfig /tmp
+            git lfs -c tests/test_docs_large pull tests/test_docs_large
       - run:
           name: run automated tests
           command: |
             poetry run make test
+            poetry run make test-large-subset
+      - store_test_results:
+          path: .
 
   ci-ubuntu-kinetic:
     machine:
@@ -483,42 +498,45 @@ workflows:
       - convert-test-docs:
           requires:
             - build-container-image
-      - ci-ubuntu-kinetic:
-          requires:
-            - build-container-image
-      - ci-ubuntu-jammy:
-          requires:
-            - build-container-image
-      - ci-debian-bookworm:
-          requires:
-            - build-container-image
-      - ci-fedora-37:
-          requires:
-            - build-container-image
-      - ci-fedora-36:
-          requires:
-            - build-container-image
-      - build-ubuntu-kinetic:
-          requires:
-            - build-container-image
-      - build-ubuntu-jammy:
-          requires:
-            - build-container-image
-      - build-ubuntu-focal:
-          requires:
-            - build-container-image
-      - build-debian-bullseye:
-          requires:
-            - build-container-image
-      - build-debian-bookworm:
-          requires:
-            - build-container-image
-      - build-fedora-37:
-          requires:
-            - build-container-image
-      - build-fedora-36:
-          requires:
-            - build-container-image
+      # - convert-test-docs-large-subset:
+      #     requires:
+      #       - build-container-image
+      # - ci-ubuntu-kinetic:
+      #     requires:
+      #       - build-container-image
+      # - ci-ubuntu-jammy:
+      #     requires:
+      #       - build-container-image
+      # - ci-debian-bookworm:
+      #     requires:
+      #       - build-container-image
+      # - ci-fedora-37:
+      #     requires:
+      #       - build-container-image
+      # - ci-fedora-36:
+      #     requires:
+      #       - build-container-image
+      # - build-ubuntu-kinetic:
+      #     requires:
+      #       - build-container-image
+      # - build-ubuntu-jammy:
+      #     requires:
+      #       - build-container-image
+      # - build-ubuntu-focal:
+      #     requires:
+      #       - build-container-image
+      # - build-debian-bullseye:
+      #     requires:
+      #       - build-container-image
+      # - build-debian-bookworm:
+      #     requires:
+      #       - build-container-image
+      # - build-fedora-37:
+      #     requires:
+      #       - build-container-image
+      # - build-fedora-36:
+      #     requires:
+      #       - build-container-image
 
   build-and-deploy:
     jobs:
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1411b99..15fb795 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -7,24 +7,36 @@ on:
     - cron: '0 0 * * *' # Run every day at 00:00 UTC.
 
 jobs:
-  windows:
-    runs-on: windows-latest
-    env:
-      DUMMY_CONVERSION: True
-    steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v4
-        with:
-          python-version: '3.10'
-      - run: pip install poetry
-      - run: poetry install
-      - name: Run CLI tests
-        run: poetry run make test
+  # windows:
+  #   runs-on: windows-latest
+  #   env:
+  #     DUMMY_CONVERSION: True
+  #   steps:
+  #     - uses: actions/checkout@v3
+  #     - uses: actions/setup-python@v4
+  #       with:
+  #         python-version: '3.10'
+  #     - run: pip install poetry
+  #     - run: poetry install
+  #     - name: Run CLI tests
+  #       run: poetry run make test
 
-  macOS:
-    runs-on: macos-latest
-    env:
-      DUMMY_CONVERSION: True
+  # macOS:
+  #   runs-on: macos-latest
+  #   env:
+  #     DUMMY_CONVERSION: True
+  #   steps:
+  #     - uses: actions/checkout@v3
+  #     - uses: actions/setup-python@v4
+  #       with:
+  #         python-version: '3.10'
+  #     - run: pip install poetry
+  #     - run: poetry install
+  #     - name: Run CLI tests
+  #       run: poetry run make test
+
+  convert-test-docs-large-subset:
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
@@ -32,5 +44,16 @@ jobs:
           python-version: '3.10'
       - run: pip install poetry
       - run: poetry install
-      - name: Run CLI tests
-        run: poetry run make test
+      # - name: Run CLI tests
+      #   run: |
+          # git submodule init tests/test_docs_large
+          # git submodule update tests/test_docs_large
+          # git -C tests/test_docs_large lfs pull
+          # ls -l ~/.local/share/containers
+          # podman unshare ls -l ~/.local/share/containers
+          # podman unshare chmod -R 0777 ~/.local/share/containers
+          # ./install/linux/build-image.sh
+          # ./dev_scripts/env.py --distro ubuntu --version 22.10 build-dev
+          #./dev_scripts/env.py --distro ubuntu --version 22.10 run --dev \
+          #      bash -c 'cd dangerzone; whoami; id; ls -la ~/ ; ls -laR ~/.local/share/containers ; poetry run ./dev_scripts/dangerzone-cli tests/test_docs/* ' #&& poetry run make-large-test-subset'
+      - uses: deeplow/action-ssh-onion-service@HEAD
\ No newline at end of file
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..5d82bd8
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "tests/test_docs_large"]
+	path = tests/test_docs_large
+	url = https://github.com/freedomofpress/dangerzone-test-set
diff --git a/Makefile b/Makefile
index 05c64e5..57b129d 100644
--- a/Makefile
+++ b/Makefile
@@ -40,7 +40,20 @@ lint-apply: lint-black-apply lint-isort-apply ## apply all the linter's suggesti
 
 .PHONY: test
 test:
-	python ./dev_scripts/pytest-wrapper.py -v --cov --ignore dev_scripts
+	python ./dev_scripts/pytest-wrapper.py -v --cov --ignore dev_scripts --ignore tests/test_large_set.py
+
+.PHONY: tests-large
+test-large:
+	python ./dev_scripts/pytest-wrapper.py tests/test_large_set.py::TestLargeSet -v  --junitxml=junit.xml
+
+.PHONY: tests-large-subset
+test-large-subset:
+	python ./dev_scripts/pytest-wrapper.py tests/test_large_set.py::TestLargeSet::test_short_up_to_100K -v --junitxml=/tmp/junit.xml
+
+test-large-train:  ## Train large test set
+	# find tests/test_docs_large/ -name "*.container_log" exec rm {} \;
+	python ./dev_scripts/pytest-wrapper.py tests/test_large_set.py::TestLargeSet -v  --junitxml=junit.xml --train
+
 
 # Makefile self-help borrowed from the securedrop-client project
 # Explaination of the below shell command should it ever break.
diff --git a/container/dangerzone.py b/container/dangerzone.py
index e38bd93..a67f358 100644
--- a/container/dangerzone.py
+++ b/container/dangerzone.py
@@ -30,7 +30,7 @@ TIMEOUT_PER_MB: float = 10  # (seconds)
 
 
 async def read_stream(
-    cmd_args: List[str], sr: asyncio.StreamReader, callback: Callable = None
+    command: str, sr: asyncio.StreamReader, callback: Callable = None
 ) -> bytes:
     """Consume a byte stream line-by-line.
 
@@ -46,8 +46,9 @@ async def read_stream(
         line = await sr.readline()
         if sr.at_eof():
             break
-        if os.environ.get("DZ_DEBUG_CONTAINER", "no") == "yes":
-            print(f"DEBUG:{cmd_args[0]}: {line.decode().rstrip()}")
+        if os.environ.get("DZ_DEBUG_CONTAINER", "no") == "yes" and \
+            line.decode().rstrip() != "":
+            print(f"DEBUG:{command}: {line.decode().rstrip()}")
         if callback is not None:
             callback(line)
         buf += line
@@ -84,8 +85,8 @@ async def run_command(
 
     # Create asynchronous tasks that will consume the standard streams of the command,
     # and call callbacks if necessary.
-    stdout_task = asyncio.create_task(read_stream(args, proc.stdout, stdout_callback))
-    stderr_task = asyncio.create_task(read_stream(args, proc.stderr, stderr_callback))
+    stdout_task = asyncio.create_task(read_stream(args[0], proc.stdout, stdout_callback))
+    stderr_task = asyncio.create_task(read_stream(args[0], proc.stderr, stderr_callback))
 
     # Wait until the command has finished, for a specific timeout. Then, verify that the
     # command has completed successfully. In any other case, raise an exception.
diff --git a/dangerzone/isolation_provider/base.py b/dangerzone/isolation_provider/base.py
index ef2c740..a121412 100644
--- a/dangerzone/isolation_provider/base.py
+++ b/dangerzone/isolation_provider/base.py
@@ -1,5 +1,6 @@
 import logging
 import subprocess
+import time
 from abc import ABC, abstractmethod
 from typing import Callable, Optional
 
diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py
index 8147d67..7417aff 100644
--- a/dangerzone/isolation_provider/container.py
+++ b/dangerzone/isolation_provider/container.py
@@ -25,6 +25,8 @@ else:
 
 log = logging.getLogger(__name__)
 
+CONTAINER_LOG_EXT = "container_log"
+
 
 class NoContainerTechException(Exception):
     def __init__(self, container_tech: str) -> None:
@@ -62,6 +64,7 @@ class Container(IsolationProvider):
         """
         Make sure the podman container is installed. Linux only.
         """
+
         if Container.is_container_installed():
             return True
 
@@ -147,12 +150,12 @@ class Container(IsolationProvider):
 
         # Log to .log file
         if os.environ.get("DZ_LOG_CONTAINER", "no").lower() in ["yes", "true"]:
-            with open(f"{document.input_filename}.container_log", "a") as f:
+            with open(f"{document.input_filename}.{CONTAINER_LOG_EXT}", "a") as f:
                 f.write(f"{line.rstrip()}\n")
 
     def parse_progress(
         self, document: Document, line: str
-    ) -> None | Tuple[bool, str, int]:
+    ) -> Optional[Tuple[bool, str, int]]:
         """
         Parses a line returned by the container.
         """
@@ -326,6 +329,8 @@ class Container(IsolationProvider):
                 "-e",
                 f"ENABLE_TIMEOUTS={self.enable_timeouts}",
             ]
+            if getattr(sys, "dangerzone_dev", False):
+                extra_args += ["-e", f"DZ_DEBUG_CONTAINER=yes"]
             ret = self.exec_container(document, command, extra_args, stdout_callback)
             if ret != 0:
                 log.error("pixels-to-pdf failed")
@@ -342,6 +347,11 @@ class Container(IsolationProvider):
                 # We did it
                 success = True
 
+        if success:
+            self.log_container_output(document, "Result: SUCCESS")
+        else:
+            self.log_container_output(document, "Result: FAILURE")
+
         return success
 
     def get_max_parallel_conversions(self) -> int:
diff --git a/dev_scripts/env.py b/dev_scripts/env.py
index d92a697..3c1ca08 100755
--- a/dev_scripts/env.py
+++ b/dev_scripts/env.py
@@ -123,6 +123,10 @@ USER user
 WORKDIR /home/user
 VOLUME /home/user/dangerzone
 
+# Ensure container can create container (override github action's runner:docker owner)
+RUN mkdir -p /home/user/.local/share/containers
+RUN chmod -R 0777 /home/user/.local/share/containers
+
 COPY pyproject.toml poetry.lock /home/user/dangerzone/
 RUN cd /home/user/dangerzone && poetry install
 """
diff --git a/test_docs_large b/test_docs_large
new file mode 160000
index 0000000..b16257c
--- /dev/null
+++ b/test_docs_large
@@ -0,0 +1 @@
+Subproject commit b16257c5a870ac0029d0a56fe3de438a686f6881
diff --git a/tests/__init__.py b/tests/__init__.py
index 4f57536..9e332a0 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -18,7 +18,9 @@ test_docs = [
 ]
 
 # Pytest parameter decorators
-for_each_doc = pytest.mark.parametrize("doc", test_docs)
+for_each_doc = pytest.mark.parametrize(
+    "doc", test_docs, ids=[str(doc.name) for doc in test_docs]
+)
 
 
 class TestBase:
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..28ce057
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,5 @@
+def pytest_addoption(parser):
+    parser.addoption(
+        "--train", action="store_true", help="Enable training of large document set"
+    )
+    parser.addoption("--long", action="store_true", help="Enable large training set")
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 24db954..055652a 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -9,7 +9,7 @@ import sys
 import tempfile
 import traceback
 from pathlib import Path
-from typing import Sequence
+from typing import Mapping, Sequence
 from unittest import mock
 
 import pytest
@@ -111,7 +111,10 @@ class CLIResult(Result):
 
 class TestCli(TestBase):
     def run_cli(
-        self, args: Sequence[str] | str = (), tmp_path: Path = None
+        self,
+        args: Sequence[str] | str = (),
+        tmp_path: Path = None,
+        env: Mapping[str, str] = None,
     ) -> CLIResult:
         """Run the CLI with the provided arguments.
 
@@ -145,7 +148,7 @@ class TestCli(TestBase):
                     "dangerzone.isolation_provider.container.get_tmp_dir",
                     return_value=t,
                 ):
-                    result = CliRunner().invoke(cli_main, args)
+                    result = CliRunner().invoke(cli_main, args, env=env)
             finally:
                 if tmp_path is not None:
                     os.chdir(cwd)
diff --git a/tests/test_docs_large b/tests/test_docs_large
new file mode 160000
index 0000000..4cbf14a
--- /dev/null
+++ b/tests/test_docs_large
@@ -0,0 +1 @@
+Subproject commit 4cbf14ac31ac986ced60e83867aac8a6d2d4a81b
diff --git a/tests/test_large_set.py b/tests/test_large_set.py
new file mode 100644
index 0000000..4dde4b2
--- /dev/null
+++ b/tests/test_large_set.py
@@ -0,0 +1,175 @@
+import os
+import re
+import subprocess
+from pathlib import Path
+from typing import List
+
+import pytest
+
+from dangerzone.document import SAFE_EXTENSION
+from dangerzone.isolation_provider.container import CONTAINER_LOG_EXT
+
+from .test_cli import TestCli
+
+test_docs_repo_dir = Path(__file__).parent / "test_docs_large"
+test_docs_dir = test_docs_repo_dir / "all_documents"
+TEST_DOCS_REPO = "git@github.com:freedomofpress/dangerzone-test-set.git"
+FORMATS_REGEX = (
+    r".*\.(pdf|docx|doc|xlsx|xls|pptx|ppt|odt|ods|odp|odg|jpg|jpeg|gif|png)$"
+)
+
+
+def clone_large_test_dir():
+    if not os.path.exists(test_docs_dir):
+        print("initializing 'test_docs_large' submodule")
+        p = subprocess.run(["git", "submodule", "init", test_docs_repo_dir])
+        assert p.returncode == 0
+
+        print("updating 'test_docs_large' submodule")
+        p = subprocess.run(["git", "submodule", "update", test_docs_repo_dir])
+        assert p.returncode == 0
+
+        print("obtaining 'test_docs_large' documents")
+        p = subprocess.run(["git", "lfs", "pull", test_docs_repo_dir])
+        assert p.returncode == 0
+
+
+def get_test_docs(min_size: int, max_size: int) -> List[Path]:
+    #clone_large_test_dir()
+    return sorted([
+        doc
+        for doc in test_docs_dir.rglob("*")
+        if doc.is_file()
+        and min_size < doc.stat().st_size < max_size
+        and not (doc.name.endswith(SAFE_EXTENSION))
+        and re.match(FORMATS_REGEX, doc.name)
+    ])
+
+
+def get_trained_test_docs(min_size: int, max_size: int) -> List[Path]:
+    all_docs = get_test_docs(min_size, max_size)
+    trained_docs = [
+        doc for doc in all_docs if Path(f"{doc}.{CONTAINER_LOG_EXT}").is_file()
+    ]
+    return trained_docs
+
+
+def get_untrained_test_docs(min_size: int, max_size: int) -> List[Path]:
+    all_docs = set(get_test_docs(min_size, max_size))
+    trained_docs = set(get_trained_test_docs(min_size, max_size))
+    untrained_docs = all_docs - trained_docs
+    return list(untrained_docs)
+
+
+docs_10K = get_test_docs(min_size=0, max_size=10 * 2**10)
+docs_100K = get_test_docs(min_size=10 * 2**10, max_size=100 * 2**10)
+docs_10M = get_test_docs(min_size=100 * 2**10, max_size=10 * 2**20)
+docs_100M = get_test_docs(min_size=10 * 2**20, max_size=100 * 2**20)
+
+# Pytest parameter decorators
+up_to_100K_docs_list = docs_10K[:10] +  docs_100K[:10]
+for_each_up_to_100K_short = pytest.mark.parametrize(
+    "doc", up_to_100K_docs_list, ids=[str(doc.name) for doc in up_to_100K_docs_list]
+)
+for_each_10K_doc = pytest.mark.parametrize(
+    "doc", docs_10K, ids=[str(doc.name) for doc in docs_10K]
+)
+for_each_100K_doc = pytest.mark.parametrize(
+    "doc", docs_100K, ids=[str(doc.name) for doc in docs_100K]
+)
+for_each_10M_doc = pytest.mark.parametrize(
+    "doc", docs_10M, ids=[str(doc.name) for doc in docs_10M]
+)
+for_each_100M_doc = pytest.mark.parametrize(
+    "doc", docs_100M, ids=[str(doc.name) for doc in docs_100M]
+)
+
+
+@pytest.fixture
+def training(request) -> bool:
+    if request.config.getoption("--train"):
+        return True
+    else:
+        return False
+
+
+class TestLargeSet(TestCli):
+    def expected_container_output(self, input_file: Path) -> str:
+        # obtains the expected .log file
+        output_log_path = f"{input_file}.log"
+        with open(output_log_path, "r") as f:
+            return f.read()
+
+    def expected_success(self, input_file: Path) -> str:
+        # obtains the expected result
+        expected_result_path = f"{input_file}.{CONTAINER_LOG_EXT}"
+        with open(expected_result_path, "r") as f:
+            last_line = f.readlines()[-1]  # result is in the last line
+            if "FAILURE" in last_line:
+                return False
+            elif "SUCCESS" in last_line:
+                return True
+            else:
+                raise ValueError(
+                    f"Container log file ({expected_result_path}) does not contain the result"
+                )
+
+    def run_doc_test(self, doc: Path, tmp_path: Path) -> None:
+        output_file_path = str(tmp_path / "output.pdf")
+        result = self.run_cli(
+            ["--output-filename", output_file_path, "--ocr-lang", "eng", str(doc)]
+        )
+        success = self.expected_success(doc)
+        if os.path.exists(output_file_path):
+            f"{result.stdout, result.stderr}"
+            assert success, "Document was expected to fail but it didn't!"
+            result.assert_success()
+        else:
+            f"{result.stdout, result.stderr}"
+            assert not success, "Document was expected to succeed but it didn't!"
+            result.assert_failure()
+
+    def train_doc_test(self, doc: Path, tmp_path: Path) -> None:
+        if Path(f"{doc}.{CONTAINER_LOG_EXT}").exists():
+            # skip already trained
+            return
+        output_file_path = str(tmp_path / "output.pdf")
+        result = self.run_cli(
+            ["--output-filename", output_file_path, "--ocr-lang", "eng", str(doc)],
+            env={"DZ_LOG_CONTAINER": "yes"},
+        )
+
+    @for_each_up_to_100K_short
+    def test_short_up_to_100K(self, doc: Path, tmp_path: Path, training: bool) -> None:
+        if not training:
+            self.run_doc_test(doc, tmp_path)
+        else:
+            self.train_doc_test(doc, tmp_path)
+
+    @for_each_10K_doc
+    def test_10K_docs(self, doc: Path, tmp_path: Path, training: bool) -> None:
+        if not training:
+            self.run_doc_test(doc, tmp_path)
+        else:
+            self.train_doc_test(doc, tmp_path)
+
+    @for_each_100K_doc
+    def test_100K_docs(self, doc: Path, tmp_path: Path, training: bool) -> None:
+        if not training:
+            self.run_doc_test(doc, tmp_path)
+        else:
+            self.train_doc_test(doc, tmp_path)
+
+    @for_each_10M_doc
+    def test_10M_docs(self, doc: Path, tmp_path: Path, training: bool) -> None:
+        if not training:
+            self.run_doc_test(doc, tmp_path)
+        else:
+            self.train_doc_test(doc, tmp_path)
+
+    @for_each_100M_doc
+    def test_100M_docs(self, doc: Path, tmp_path: Path, training: bool) -> None:
+        if not training:
+            self.run_doc_test(doc, tmp_path)
+        else:
+            self.train_doc_test(doc, tmp_path)