mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-29 10:12:38 +02:00
FIXUP: Use pathlib.Path for newer code
This commit is contained in:
parent
80e972b456
commit
09bb12593a
3 changed files with 14 additions and 21 deletions
|
@ -126,7 +126,7 @@ class IsolationProvider(ABC):
|
|||
return pixmap.pdfocr_tobytes(
|
||||
compress=True,
|
||||
language=ocr_lang,
|
||||
tessdata=get_tessdata_dir(),
|
||||
tessdata=str(get_tessdata_dir()),
|
||||
)
|
||||
|
||||
def pixels_to_pdf_page(
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import os
|
||||
import pathlib
|
||||
import platform
|
||||
import subprocess
|
||||
|
@ -34,22 +33,24 @@ def get_resource_path(filename: str) -> str:
|
|||
return str(resource_path)
|
||||
|
||||
|
||||
def get_tessdata_dir() -> str:
|
||||
def get_tessdata_dir() -> pathlib.Path:
|
||||
if getattr(sys, "dangerzone_dev", False) or platform.system() in (
|
||||
"Windows",
|
||||
"Darwin",
|
||||
):
|
||||
# Always use the tessdata path from the Dangerzone ./share directory, for
|
||||
# development builds, or in Windows/macOS platforms.
|
||||
return get_resource_path("tessdata")
|
||||
return pathlib.Path(get_resource_path("tessdata"))
|
||||
|
||||
tessdata_dirs = [
|
||||
pathlib.Path("/usr/share/tessdata/"), # on debian
|
||||
pathlib.Path("/usr/share/tesseract/tessdata/"), # on fedora
|
||||
]
|
||||
|
||||
for dir in tessdata_dirs:
|
||||
if dir.is_dir():
|
||||
return dir
|
||||
|
||||
fedora_tessdata_dir = "/usr/share/tesseract/tessdata/"
|
||||
debian_tessdata_dir = "/usr/share/tessdata/"
|
||||
if os.path.isdir(fedora_tessdata_dir):
|
||||
return fedora_tessdata_dir
|
||||
if os.path.isdir(debian_tessdata_dir):
|
||||
return debian_tessdata_dir
|
||||
else:
|
||||
raise RuntimeError("Tesseract language data are not installed in the system")
|
||||
|
||||
|
||||
|
|
|
@ -1,10 +1,3 @@
|
|||
import pathlib
|
||||
import platform
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from dangerzone.isolation_provider.dummy import Dummy
|
||||
from dangerzone.logic import DangerzoneCore
|
||||
from dangerzone.util import get_tessdata_dir
|
||||
|
@ -12,9 +5,8 @@ from dangerzone.util import get_tessdata_dir
|
|||
|
||||
def test_ocr_ommisions() -> None:
|
||||
# Grab the languages that are available in the Tesseract data dir.
|
||||
tessdata_dir = pathlib.Path(get_tessdata_dir())
|
||||
suffix_len = len(".traineddata")
|
||||
available_langs = {f.name[:-suffix_len] for f in tessdata_dir.iterdir()}
|
||||
available_langs = {f.name[:-suffix_len] for f in get_tessdata_dir().iterdir()}
|
||||
|
||||
# Grab the languages that Dangerzone offers to the user through the GUI/CLI.
|
||||
offered_langs = set(DangerzoneCore(Dummy()).ocr_languages.values())
|
||||
|
|
Loading…
Reference in a new issue