import platform import subprocess from pathlib import Path import pytest from dangerzone.isolation_provider.container import Container from dangerzone.logic import DangerzoneCore # TODO: Perform an equivalent test on Qubes. # NOTE: We skip running this test on Windows/MacOS, because our current CI cannot run # Docker in these platforms. It's not a problem anyways, because the result should be # the same in all container-based platforms. @pytest.mark.skipif(platform.system() != "Linux", reason="Container-specific") def test_ocr_ommisions() -> None: # Create the command that will list all the installed languages in the container # image. command = [Container.get_runtime(), "run"] command += Container.get_runtime_security_args() command += [ Container.CONTAINER_NAME, "find", "/usr/share/tessdata/", "-name", "*.traineddata", ] # Run the command, strip any extra whitespace, and remove the following first line # from the result: # # List of available languages in "/usr/share/tessdata/" ... installed_langs_filenames = ( subprocess.run(command, text=True, check=True, stdout=subprocess.PIPE) .stdout.strip() .split("\n") ) installed_langs = set( [ Path(filename).name.split(".traineddata")[0] for filename in installed_langs_filenames ] ) # Remove the "osd" and "equ" languages from the list of installed languages, since # they are not an actual language. Read more in: # https://pyimagesearch.com/2021/11/15/tesseract-page-segmentation-modes-psms-explained-how-to-improve-your-ocr-accuracy/ installed_langs -= {"osd", "equ"} # Grab the languages that Dangerzone offers to the user through the GUI/CLI. offered_langs = set(DangerzoneCore(Container()).ocr_languages.values()) # Ensure that both the installed languages and the ones we offer to the user are the # same. assert installed_langs == offered_langs