Deduplicate container output parsing (stdout_callback)

The container output logging logic was in both the CLI and the GUI.
This change moves the core parsing logic to container.py.

Since the code was largely the same, now cli does need to specify
a stdout_callback since all the necessary logging already happens.

The GUI now only adds an stdout_callback to detect if there was an
error during the conversion process.
This commit is contained in:
deeplow 2022-09-26 10:39:59 +01:00
parent 2d587f4082
commit 6d2fdf0afe
No known key found for this signature in database
GPG key ID: 577982871529A52A
4 changed files with 54 additions and 53 deletions

View file

@ -1,4 +1,3 @@
import json
import logging import logging
import sys import sys
from typing import Any, Callable, List, Optional, TypeVar from typing import Any, Callable, List, Optional, TypeVar
@ -69,19 +68,7 @@ def cli_main(
# Convert the document # Convert the document
print_header("Converting document to safe PDF") print_header("Converting document to safe PDF")
def stdout_callback(line: str) -> None: dangerzone.convert_documents(ocr_lang)
try:
status = json.loads(line)
s = Style.BRIGHT + Fore.CYAN + f"{status['percentage']}% "
if status["error"]:
s += Style.RESET_ALL + Fore.RED + status["text"]
else:
s += Style.RESET_ALL + status["text"]
click.echo(s)
except:
click.echo(f"Invalid JSON returned from container: {line}")
dangerzone.convert_documents(ocr_lang, stdout_callback)
documents_safe = dangerzone.get_safe_documents() documents_safe = dangerzone.get_safe_documents()
documents_failed = dangerzone.get_failed_documents() documents_failed = dangerzone.get_failed_documents()

View file

@ -1,4 +1,5 @@
import gzip import gzip
import json
import logging import logging
import os import os
import pipes import pipes
@ -6,10 +7,12 @@ import platform
import shutil import shutil
import subprocess import subprocess
import tempfile import tempfile
from typing import Callable, List, Optional from typing import Callable, List, Optional, Tuple
import appdirs import appdirs
from colorama import Fore, Style
from .document import Document
from .util import get_resource_path, get_subprocess_startupinfo from .util import get_resource_path, get_subprocess_startupinfo
container_name = "dangerzone.rocks/dangerzone" container_name = "dangerzone.rocks/dangerzone"
@ -127,7 +130,34 @@ def is_container_installed() -> bool:
return installed return installed
def exec(args: List[str], stdout_callback: Callable[[str], None] = None) -> int: def parse_progress(document: Document, line: str) -> Tuple[bool, str, int]:
"""
Parses a line returned by the container.
"""
try:
status = json.loads(line)
except:
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
log.error(error_message)
return (True, error_message, -1)
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
s += Fore.CYAN + f"{status['percentage']}% "
if status["error"]:
s += Style.RESET_ALL + Fore.RED + status["text"]
log.error(s)
else:
s += Style.RESET_ALL + status["text"]
log.info(s)
return (status["error"], status["text"], status["percentage"])
def exec(
document: Document,
args: List[str],
stdout_callback: Optional[Callable] = None,
) -> int:
args_str = " ".join(pipes.quote(s) for s in args) args_str = " ".join(pipes.quote(s) for s in args)
log.info("> " + args_str) log.info("> " + args_str)
@ -140,18 +170,21 @@ def exec(args: List[str], stdout_callback: Callable[[str], None] = None) -> int:
universal_newlines=True, universal_newlines=True,
startupinfo=startupinfo, startupinfo=startupinfo,
) as p: ) as p:
if stdout_callback and p.stdout is not None: if p.stdout is not None:
for line in p.stdout: for line in p.stdout:
stdout_callback(line) (error, text, percentage) = parse_progress(document, line)
if stdout_callback:
stdout_callback(error, text, percentage)
p.communicate() p.communicate()
return p.returncode return p.returncode
def exec_container( def exec_container(
document: Document,
command: List[str], command: List[str],
extra_args: List[str] = [], extra_args: List[str] = [],
stdout_callback: Callable[[str], None] = None, stdout_callback: Optional[Callable] = None,
) -> int: ) -> int:
container_runtime = get_runtime() container_runtime = get_runtime()
@ -181,14 +214,13 @@ def exec_container(
) )
args = [container_runtime] + args args = [container_runtime] + args
return exec(args, stdout_callback) return exec(document, args, stdout_callback)
def convert( def convert(
input_filename: str, document: Document,
output_filename: str,
ocr_lang: Optional[str], ocr_lang: Optional[str],
stdout_callback: Callable[[str], None], stdout_callback: Optional[Callable] = None,
) -> bool: ) -> bool:
success = False success = False
@ -210,11 +242,11 @@ def convert(
command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "document-to-pixels"] command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "document-to-pixels"]
extra_args = [ extra_args = [
"-v", "-v",
f"{input_filename}:/tmp/input_file", f"{document.input_filename}:/tmp/input_file",
"-v", "-v",
f"{pixel_dir}:/dangerzone", f"{pixel_dir}:/dangerzone",
] ]
ret = exec_container(command, extra_args, stdout_callback) ret = exec_container(document, command, extra_args, stdout_callback)
if ret != 0: if ret != 0:
log.error("documents-to-pixels failed") log.error("documents-to-pixels failed")
else: else:
@ -232,18 +264,18 @@ def convert(
"-e", "-e",
f"OCR_LANGUAGE={ocr_lang}", f"OCR_LANGUAGE={ocr_lang}",
] ]
ret = exec_container(command, extra_args, stdout_callback) ret = exec_container(document, command, extra_args, stdout_callback)
if ret != 0: if ret != 0:
log.error("pixels-to-pdf failed") log.error("pixels-to-pdf failed")
else: else:
# Move the final file to the right place # Move the final file to the right place
if os.path.exists(output_filename): if os.path.exists(document.output_filename):
os.remove(output_filename) os.remove(document.output_filename)
container_output_filename = os.path.join( container_output_filename = os.path.join(
safe_dir, "safe-output-compressed.pdf" safe_dir, "safe-output-compressed.pdf"
) )
shutil.move(container_output_filename, output_filename) shutil.move(container_output_filename, document.output_filename)
# We did it # We did it
success = True success = True

View file

@ -493,34 +493,17 @@ class ConvertThread(QtCore.QThread):
ocr_lang = None ocr_lang = None
if convert( if convert(
self.document.input_filename, self.document,
self.document.output_filename,
ocr_lang, ocr_lang,
self.stdout_callback, self.stdout_callback,
): ):
self.finished.emit(self.error) self.finished.emit(self.error)
def stdout_callback(self, line: str) -> None: def stdout_callback(self, error: bool, text: str, percentage: int) -> None:
try: if error:
status = json.loads(line)
except:
log.error(f"Invalid JSON returned from container: {line}")
self.error = True self.error = True
self.update.emit(
True, f"Invalid JSON returned from container:\n\n{line}", 0
)
return
s = Style.BRIGHT + Fore.CYAN + f"{status['percentage']}% " self.update.emit(error, text, percentage)
if status["error"]:
self.error = True
s += Style.RESET_ALL + Fore.RED + status["text"]
log.error(s)
else:
s += Style.RESET_ALL + status["text"]
log.info(s)
self.update.emit(status["error"], status["text"], status["percentage"])
class ConvertWidget(QtWidgets.QWidget): class ConvertWidget(QtWidgets.QWidget):

View file

@ -48,12 +48,11 @@ class DangerzoneCore(object):
self.documents.append(doc) self.documents.append(doc)
def convert_documents( def convert_documents(
self, ocr_lang: Optional[str], stdout_callback: Callable[[str], None] self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
) -> None: ) -> None:
def convert_doc(document: Document) -> None: def convert_doc(document: Document) -> None:
success = container.convert( success = container.convert(
document.input_filename, document,
document.output_filename,
ocr_lang, ocr_lang,
stdout_callback, stdout_callback,
) )