mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-29 10:12:38 +02:00
qubes: Pass OCR parameters properly
Pass OCR parameters to conversion functions as arguments, instead of setting environment variables. Fixes #455
This commit is contained in:
parent
8a0c0a4673
commit
e64d1da61f
2 changed files with 9 additions and 15 deletions
|
@ -11,12 +11,13 @@ import json
|
|||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
from .common import DangerzoneConverter, running_on_qubes
|
||||
|
||||
|
||||
class PixelsToPDF(DangerzoneConverter):
|
||||
async def convert(self) -> None:
|
||||
async def convert(self, ocr_lang: Optional[str] = None) -> None:
|
||||
self.percentage = 50.0
|
||||
|
||||
num_pages = len(glob.glob("/tmp/dangerzone/page-*.rgb"))
|
||||
|
@ -43,7 +44,7 @@ class PixelsToPDF(DangerzoneConverter):
|
|||
total_size += page_size
|
||||
timeout = self.calculate_timeout(page_size, 1)
|
||||
|
||||
if os.environ.get("OCR") == "1": # OCR the document
|
||||
if ocr_lang: # OCR the document
|
||||
self.update_progress(
|
||||
f"Converting page {page}/{num_pages} from pixels to searchable PDF"
|
||||
)
|
||||
|
@ -71,7 +72,7 @@ class PixelsToPDF(DangerzoneConverter):
|
|||
png_filename,
|
||||
ocr_filename,
|
||||
"-l",
|
||||
os.environ.get("OCR_LANGUAGE"), # type: ignore
|
||||
ocr_lang,
|
||||
"--dpi",
|
||||
"70",
|
||||
"pdf",
|
||||
|
@ -152,10 +153,11 @@ class PixelsToPDF(DangerzoneConverter):
|
|||
|
||||
|
||||
async def main() -> int:
|
||||
ocr_lang = os.environ.get("OCR_LANGUAGE") if os.environ.get("OCR") == "1" else None
|
||||
converter = PixelsToPDF()
|
||||
|
||||
try:
|
||||
await converter.convert()
|
||||
await converter.convert(ocr_lang)
|
||||
error_code = 0 # Success!
|
||||
|
||||
except (RuntimeError, TimeoutError, ValueError) as e:
|
||||
|
|
|
@ -176,25 +176,17 @@ class Qubes(IsolationProvider):
|
|||
f"Conversion output (doc to pixels)\n{self.sanitize_conversion_str(untrusted_log)}"
|
||||
)
|
||||
|
||||
# FIXME pass OCR stuff properly (see #455)
|
||||
old_environ = dict(os.environ)
|
||||
if ocr_lang:
|
||||
os.environ["OCR"] = "1"
|
||||
os.environ["OCR_LANGUAGE"] = ocr_lang
|
||||
|
||||
def print_progress_wrapper(error: bool, text: str, percentage: float) -> None:
|
||||
self.print_progress_trusted(document, error, text, percentage)
|
||||
|
||||
asyncio.run(PixelsToPDF(progress_callback=print_progress_wrapper).convert())
|
||||
asyncio.run(
|
||||
PixelsToPDF(progress_callback=print_progress_wrapper).convert(ocr_lang)
|
||||
)
|
||||
|
||||
percentage = 100.0
|
||||
text = "Safe PDF created"
|
||||
self.print_progress_trusted(document, False, text, percentage)
|
||||
|
||||
# FIXME remove once the OCR args are no longer passed with env vars
|
||||
os.environ.clear()
|
||||
os.environ.update(old_environ)
|
||||
|
||||
shutil.move(CONVERTED_FILE_PATH, document.output_filename)
|
||||
success = True
|
||||
|
||||
|
|
Loading…
Reference in a new issue