mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00

We have a container-specific test that deals with missing OCR files in the container image. This test _can_ be run under Qubes, and it may fail since it requires Podman. Make the pytest guard more strict and don't allow running this test on Qubes. Also, fix a typo in the word "omission".
59 lines
2.1 KiB
Python
59 lines
2.1 KiB
Python
import platform
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from dangerzone.isolation_provider.container import Container
|
|
from dangerzone.isolation_provider.qubes import is_qubes_native_conversion
|
|
from dangerzone.logic import DangerzoneCore
|
|
|
|
|
|
# TODO: Perform an equivalent test on Qubes.
|
|
# NOTE: We skip running this test on Windows/MacOS, because our current CI cannot run
|
|
# Docker in these platforms. It's not a problem anyways, because the result should be
|
|
# the same in all container-based platforms.
|
|
@pytest.mark.skipif(
|
|
platform.system() != "Linux" or is_qubes_native_conversion(),
|
|
reason="Container-specific",
|
|
)
|
|
def test_ocr_omissions() -> None:
|
|
# Create the command that will list all the installed languages in the container
|
|
# image.
|
|
command = [Container.get_runtime(), "run"]
|
|
command += Container.get_runtime_security_args()
|
|
command += [
|
|
Container.CONTAINER_NAME,
|
|
"find",
|
|
"/usr/share/tessdata/",
|
|
"-name",
|
|
"*.traineddata",
|
|
]
|
|
|
|
# Run the command, strip any extra whitespace, and remove the following first line
|
|
# from the result:
|
|
#
|
|
# List of available languages in "/usr/share/tessdata/" ...
|
|
installed_langs_filenames = (
|
|
subprocess.run(command, text=True, check=True, stdout=subprocess.PIPE)
|
|
.stdout.strip()
|
|
.split("\n")
|
|
)
|
|
installed_langs = set(
|
|
[
|
|
Path(filename).name.split(".traineddata")[0]
|
|
for filename in installed_langs_filenames
|
|
]
|
|
)
|
|
|
|
# Remove the "osd" and "equ" languages from the list of installed languages, since
|
|
# they are not an actual language. Read more in:
|
|
# https://pyimagesearch.com/2021/11/15/tesseract-page-segmentation-modes-psms-explained-how-to-improve-your-ocr-accuracy/
|
|
installed_langs -= {"osd", "equ"}
|
|
|
|
# Grab the languages that Dangerzone offers to the user through the GUI/CLI.
|
|
offered_langs = set(DangerzoneCore(Container()).ocr_languages.values())
|
|
|
|
# Ensure that both the installed languages and the ones we offer to the user are the
|
|
# same.
|
|
assert installed_langs == offered_langs
|