FIXUP: Use pathlib.Path for newer code

This commit is contained in:
Alex Pyrgiotis 2024-10-09 18:57:33 +03:00
parent 80e972b456
commit 09bb12593a
No known key found for this signature in database
GPG key ID: B6C15EBA0357C9AA
3 changed files with 14 additions and 21 deletions

View file

@ -126,7 +126,7 @@ class IsolationProvider(ABC):
return pixmap.pdfocr_tobytes( return pixmap.pdfocr_tobytes(
compress=True, compress=True,
language=ocr_lang, language=ocr_lang,
tessdata=get_tessdata_dir(), tessdata=str(get_tessdata_dir()),
) )
def pixels_to_pdf_page( def pixels_to_pdf_page(

View file

@ -1,4 +1,3 @@
import os
import pathlib import pathlib
import platform import platform
import subprocess import subprocess
@ -34,23 +33,25 @@ def get_resource_path(filename: str) -> str:
return str(resource_path) return str(resource_path)
def get_tessdata_dir() -> str: def get_tessdata_dir() -> pathlib.Path:
if getattr(sys, "dangerzone_dev", False) or platform.system() in ( if getattr(sys, "dangerzone_dev", False) or platform.system() in (
"Windows", "Windows",
"Darwin", "Darwin",
): ):
# Always use the tessdata path from the Dangerzone ./share directory, for # Always use the tessdata path from the Dangerzone ./share directory, for
# development builds, or in Windows/macOS platforms. # development builds, or in Windows/macOS platforms.
return get_resource_path("tessdata") return pathlib.Path(get_resource_path("tessdata"))
fedora_tessdata_dir = "/usr/share/tesseract/tessdata/" tessdata_dirs = [
debian_tessdata_dir = "/usr/share/tessdata/" pathlib.Path("/usr/share/tessdata/"), # on debian
if os.path.isdir(fedora_tessdata_dir): pathlib.Path("/usr/share/tesseract/tessdata/"), # on fedora
return fedora_tessdata_dir ]
if os.path.isdir(debian_tessdata_dir):
return debian_tessdata_dir for dir in tessdata_dirs:
else: if dir.is_dir():
raise RuntimeError("Tesseract language data are not installed in the system") return dir
raise RuntimeError("Tesseract language data are not installed in the system")
def get_version() -> str: def get_version() -> str:

View file

@ -1,10 +1,3 @@
import pathlib
import platform
import subprocess
from pathlib import Path
import pytest
from dangerzone.isolation_provider.dummy import Dummy from dangerzone.isolation_provider.dummy import Dummy
from dangerzone.logic import DangerzoneCore from dangerzone.logic import DangerzoneCore
from dangerzone.util import get_tessdata_dir from dangerzone.util import get_tessdata_dir
@ -12,9 +5,8 @@ from dangerzone.util import get_tessdata_dir
def test_ocr_ommisions() -> None: def test_ocr_ommisions() -> None:
# Grab the languages that are available in the Tesseract data dir. # Grab the languages that are available in the Tesseract data dir.
tessdata_dir = pathlib.Path(get_tessdata_dir())
suffix_len = len(".traineddata") suffix_len = len(".traineddata")
available_langs = {f.name[:-suffix_len] for f in tessdata_dir.iterdir()} available_langs = {f.name[:-suffix_len] for f in get_tessdata_dir().iterdir()}
# Grab the languages that Dangerzone offers to the user through the GUI/CLI. # Grab the languages that Dangerzone offers to the user through the GUI/CLI.
offered_langs = set(DangerzoneCore(Dummy()).ocr_languages.values()) offered_langs = set(DangerzoneCore(Dummy()).ocr_languages.values())