mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00
ci: Add test for OCR languages
Test that the languages that we provide to users for OCR match the languages that are installed in the container image Fixes #417
This commit is contained in:
parent
5bd609781d
commit
641aa131c9
1 changed files with 41 additions and 0 deletions
41
tests/test_ocr.py
Normal file
41
tests/test_ocr.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
import platform
|
||||
import subprocess
|
||||
|
||||
import pytest
|
||||
|
||||
from dangerzone.isolation_provider.container import Container
|
||||
from dangerzone.logic import DangerzoneCore
|
||||
|
||||
|
||||
# TODO: Perform an equivalent test on Qubes.
|
||||
# NOTE: We skip running this test on Windows/MacOS, because our current CI cannot run
|
||||
# Docker in these platforms. It's not a problem anyways, because the result should be
|
||||
# the same in all container-based platforms.
|
||||
@pytest.mark.skipif(platform.system() != "Linux", reason="Container-specific")
|
||||
def test_ocr_ommisions() -> None:
|
||||
# Create the command that will list all the installed languages in the container
|
||||
# image.
|
||||
runtime = Container.get_runtime()
|
||||
command = [runtime, "run", Container.CONTAINER_NAME, "tesseract", "--list-langs"]
|
||||
|
||||
# Run the command, strip any extra whitespace, and remove the following first line
|
||||
# from the result:
|
||||
#
|
||||
# List of available languages in "/usr/share/tessdata/" ...
|
||||
installed_langs = set(
|
||||
subprocess.run(command, text=True, check=True, stdout=subprocess.PIPE)
|
||||
.stdout.strip()
|
||||
.split("\n")[1:]
|
||||
)
|
||||
|
||||
# Remove the "osd" and "equ" languages from the list of installed languages, since
|
||||
# they are not an actual language. Read more in:
|
||||
# https://pyimagesearch.com/2021/11/15/tesseract-page-segmentation-modes-psms-explained-how-to-improve-your-ocr-accuracy/
|
||||
installed_langs -= {"osd", "equ"}
|
||||
|
||||
# Grab the languages that Dangerzone offers to the user through the GUI/CLI.
|
||||
offered_langs = set(DangerzoneCore(Container(True)).ocr_languages.values())
|
||||
|
||||
# Ensure that both the installed languages and the ones we offer to the user are the
|
||||
# same.
|
||||
assert installed_langs == offered_langs
|
Loading…
Reference in a new issue