mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 09:52:37 +02:00

Add a new way to detect where the Tesseract data are stored in a user's system. On Linux, the Tesseract data should be installed via the package manager. On macOS and Windows, they should be bundled with the Dangerzone application. There is also the exception of running Dangerzone locally, where even on Linux, we should get the Tesseract data from the Dangerzone share/ folder.
16 lines
670 B
Python
16 lines
670 B
Python
from dangerzone.isolation_provider.dummy import Dummy
|
|
from dangerzone.logic import DangerzoneCore
|
|
from dangerzone.util import get_tessdata_dir
|
|
|
|
|
|
def test_ocr_ommisions() -> None:
|
|
# Grab the languages that are available in the Tesseract data dir.
|
|
suffix_len = len(".traineddata")
|
|
available_langs = {f.name[:-suffix_len] for f in get_tessdata_dir().iterdir()}
|
|
|
|
# Grab the languages that Dangerzone offers to the user through the GUI/CLI.
|
|
offered_langs = set(DangerzoneCore(Dummy()).ocr_languages.values())
|
|
|
|
# Ensure that both the available languages and the ones we offer to the user are the
|
|
# same.
|
|
assert available_langs == offered_langs
|