mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-29 10:12:38 +02:00
FIXUP: Use pathlib.Path for newer code
This commit is contained in:
parent
80e972b456
commit
09bb12593a
3 changed files with 14 additions and 21 deletions
|
@ -126,7 +126,7 @@ class IsolationProvider(ABC):
|
||||||
return pixmap.pdfocr_tobytes(
|
return pixmap.pdfocr_tobytes(
|
||||||
compress=True,
|
compress=True,
|
||||||
language=ocr_lang,
|
language=ocr_lang,
|
||||||
tessdata=get_tessdata_dir(),
|
tessdata=str(get_tessdata_dir()),
|
||||||
)
|
)
|
||||||
|
|
||||||
def pixels_to_pdf_page(
|
def pixels_to_pdf_page(
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import os
|
|
||||||
import pathlib
|
import pathlib
|
||||||
import platform
|
import platform
|
||||||
import subprocess
|
import subprocess
|
||||||
|
@ -34,23 +33,25 @@ def get_resource_path(filename: str) -> str:
|
||||||
return str(resource_path)
|
return str(resource_path)
|
||||||
|
|
||||||
|
|
||||||
def get_tessdata_dir() -> str:
|
def get_tessdata_dir() -> pathlib.Path:
|
||||||
if getattr(sys, "dangerzone_dev", False) or platform.system() in (
|
if getattr(sys, "dangerzone_dev", False) or platform.system() in (
|
||||||
"Windows",
|
"Windows",
|
||||||
"Darwin",
|
"Darwin",
|
||||||
):
|
):
|
||||||
# Always use the tessdata path from the Dangerzone ./share directory, for
|
# Always use the tessdata path from the Dangerzone ./share directory, for
|
||||||
# development builds, or in Windows/macOS platforms.
|
# development builds, or in Windows/macOS platforms.
|
||||||
return get_resource_path("tessdata")
|
return pathlib.Path(get_resource_path("tessdata"))
|
||||||
|
|
||||||
fedora_tessdata_dir = "/usr/share/tesseract/tessdata/"
|
tessdata_dirs = [
|
||||||
debian_tessdata_dir = "/usr/share/tessdata/"
|
pathlib.Path("/usr/share/tessdata/"), # on debian
|
||||||
if os.path.isdir(fedora_tessdata_dir):
|
pathlib.Path("/usr/share/tesseract/tessdata/"), # on fedora
|
||||||
return fedora_tessdata_dir
|
]
|
||||||
if os.path.isdir(debian_tessdata_dir):
|
|
||||||
return debian_tessdata_dir
|
for dir in tessdata_dirs:
|
||||||
else:
|
if dir.is_dir():
|
||||||
raise RuntimeError("Tesseract language data are not installed in the system")
|
return dir
|
||||||
|
|
||||||
|
raise RuntimeError("Tesseract language data are not installed in the system")
|
||||||
|
|
||||||
|
|
||||||
def get_version() -> str:
|
def get_version() -> str:
|
||||||
|
|
|
@ -1,10 +1,3 @@
|
||||||
import pathlib
|
|
||||||
import platform
|
|
||||||
import subprocess
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from dangerzone.isolation_provider.dummy import Dummy
|
from dangerzone.isolation_provider.dummy import Dummy
|
||||||
from dangerzone.logic import DangerzoneCore
|
from dangerzone.logic import DangerzoneCore
|
||||||
from dangerzone.util import get_tessdata_dir
|
from dangerzone.util import get_tessdata_dir
|
||||||
|
@ -12,9 +5,8 @@ from dangerzone.util import get_tessdata_dir
|
||||||
|
|
||||||
def test_ocr_ommisions() -> None:
|
def test_ocr_ommisions() -> None:
|
||||||
# Grab the languages that are available in the Tesseract data dir.
|
# Grab the languages that are available in the Tesseract data dir.
|
||||||
tessdata_dir = pathlib.Path(get_tessdata_dir())
|
|
||||||
suffix_len = len(".traineddata")
|
suffix_len = len(".traineddata")
|
||||||
available_langs = {f.name[:-suffix_len] for f in tessdata_dir.iterdir()}
|
available_langs = {f.name[:-suffix_len] for f in get_tessdata_dir().iterdir()}
|
||||||
|
|
||||||
# Grab the languages that Dangerzone offers to the user through the GUI/CLI.
|
# Grab the languages that Dangerzone offers to the user through the GUI/CLI.
|
||||||
offered_langs = set(DangerzoneCore(Dummy()).ocr_languages.values())
|
offered_langs = set(DangerzoneCore(Dummy()).ocr_languages.values())
|
||||||
|
|
Loading…
Reference in a new issue