From 641aa131c95099f92edab6d1dc4682c7289ab1ad Mon Sep 17 00:00:00 2001 From: Alex Pyrgiotis Date: Mon, 22 May 2023 16:41:27 +0300 Subject: [PATCH] ci: Add test for OCR languages Test that the languages that we provide to users for OCR match the languages that are installed in the container image Fixes #417 --- tests/test_ocr.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 tests/test_ocr.py diff --git a/tests/test_ocr.py b/tests/test_ocr.py new file mode 100644 index 0000000..1e9f3a0 --- /dev/null +++ b/tests/test_ocr.py @@ -0,0 +1,41 @@ +import platform +import subprocess + +import pytest + +from dangerzone.isolation_provider.container import Container +from dangerzone.logic import DangerzoneCore + + +# TODO: Perform an equivalent test on Qubes. +# NOTE: We skip running this test on Windows/MacOS, because our current CI cannot run +# Docker in these platforms. It's not a problem anyways, because the result should be +# the same in all container-based platforms. +@pytest.mark.skipif(platform.system() != "Linux", reason="Container-specific") +def test_ocr_ommisions() -> None: + # Create the command that will list all the installed languages in the container + # image. + runtime = Container.get_runtime() + command = [runtime, "run", Container.CONTAINER_NAME, "tesseract", "--list-langs"] + + # Run the command, strip any extra whitespace, and remove the following first line + # from the result: + # + # List of available languages in "/usr/share/tessdata/" ... + installed_langs = set( + subprocess.run(command, text=True, check=True, stdout=subprocess.PIPE) + .stdout.strip() + .split("\n")[1:] + ) + + # Remove the "osd" and "equ" languages from the list of installed languages, since + # they are not an actual language. Read more in: + # https://pyimagesearch.com/2021/11/15/tesseract-page-segmentation-modes-psms-explained-how-to-improve-your-ocr-accuracy/ + installed_langs -= {"osd", "equ"} + + # Grab the languages that Dangerzone offers to the user through the GUI/CLI. + offered_langs = set(DangerzoneCore(Container(True)).ocr_languages.values()) + + # Ensure that both the installed languages and the ones we offer to the user are the + # same. + assert installed_langs == offered_langs