Deduplicate container output parsing (stdout_callback)

The container output logging logic was in both the CLI and the GUI.
This change moves the core parsing logic to container.py.

Since the code was largely the same, now cli does need to specify
a stdout_callback since all the necessary logging already happens.

The GUI now only adds an stdout_callback to detect if there was an
error during the conversion process.
This commit is contained in:
deeplow 2022-09-26 10:39:59 +01:00
parent 2d587f4082
commit 6d2fdf0afe
No known key found for this signature in database
GPG key ID: 577982871529A52A
4 changed files with 54 additions and 53 deletions

View file

@ -1,4 +1,3 @@
import json
import logging
import sys
from typing import Any, Callable, List, Optional, TypeVar
@ -69,19 +68,7 @@ def cli_main(
# Convert the document
print_header("Converting document to safe PDF")
def stdout_callback(line: str) -> None:
try:
status = json.loads(line)
s = Style.BRIGHT + Fore.CYAN + f"{status['percentage']}% "
if status["error"]:
s += Style.RESET_ALL + Fore.RED + status["text"]
else:
s += Style.RESET_ALL + status["text"]
click.echo(s)
except:
click.echo(f"Invalid JSON returned from container: {line}")
dangerzone.convert_documents(ocr_lang, stdout_callback)
dangerzone.convert_documents(ocr_lang)
documents_safe = dangerzone.get_safe_documents()
documents_failed = dangerzone.get_failed_documents()

View file

@ -1,4 +1,5 @@
import gzip
import json
import logging
import os
import pipes
@ -6,10 +7,12 @@ import platform
import shutil
import subprocess
import tempfile
from typing import Callable, List, Optional
from typing import Callable, List, Optional, Tuple
import appdirs
from colorama import Fore, Style
from .document import Document
from .util import get_resource_path, get_subprocess_startupinfo
container_name = "dangerzone.rocks/dangerzone"
@ -127,7 +130,34 @@ def is_container_installed() -> bool:
return installed
def exec(args: List[str], stdout_callback: Callable[[str], None] = None) -> int:
def parse_progress(document: Document, line: str) -> Tuple[bool, str, int]:
"""
Parses a line returned by the container.
"""
try:
status = json.loads(line)
except:
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
log.error(error_message)
return (True, error_message, -1)
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
s += Fore.CYAN + f"{status['percentage']}% "
if status["error"]:
s += Style.RESET_ALL + Fore.RED + status["text"]
log.error(s)
else:
s += Style.RESET_ALL + status["text"]
log.info(s)
return (status["error"], status["text"], status["percentage"])
def exec(
document: Document,
args: List[str],
stdout_callback: Optional[Callable] = None,
) -> int:
args_str = " ".join(pipes.quote(s) for s in args)
log.info("> " + args_str)
@ -140,18 +170,21 @@ def exec(args: List[str], stdout_callback: Callable[[str], None] = None) -> int:
universal_newlines=True,
startupinfo=startupinfo,
) as p:
if stdout_callback and p.stdout is not None:
if p.stdout is not None:
for line in p.stdout:
stdout_callback(line)
(error, text, percentage) = parse_progress(document, line)
if stdout_callback:
stdout_callback(error, text, percentage)
p.communicate()
return p.returncode
def exec_container(
document: Document,
command: List[str],
extra_args: List[str] = [],
stdout_callback: Callable[[str], None] = None,
stdout_callback: Optional[Callable] = None,
) -> int:
container_runtime = get_runtime()
@ -181,14 +214,13 @@ def exec_container(
)
args = [container_runtime] + args
return exec(args, stdout_callback)
return exec(document, args, stdout_callback)
def convert(
input_filename: str,
output_filename: str,
document: Document,
ocr_lang: Optional[str],
stdout_callback: Callable[[str], None],
stdout_callback: Optional[Callable] = None,
) -> bool:
success = False
@ -210,11 +242,11 @@ def convert(
command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "document-to-pixels"]
extra_args = [
"-v",
f"{input_filename}:/tmp/input_file",
f"{document.input_filename}:/tmp/input_file",
"-v",
f"{pixel_dir}:/dangerzone",
]
ret = exec_container(command, extra_args, stdout_callback)
ret = exec_container(document, command, extra_args, stdout_callback)
if ret != 0:
log.error("documents-to-pixels failed")
else:
@ -232,18 +264,18 @@ def convert(
"-e",
f"OCR_LANGUAGE={ocr_lang}",
]
ret = exec_container(command, extra_args, stdout_callback)
ret = exec_container(document, command, extra_args, stdout_callback)
if ret != 0:
log.error("pixels-to-pdf failed")
else:
# Move the final file to the right place
if os.path.exists(output_filename):
os.remove(output_filename)
if os.path.exists(document.output_filename):
os.remove(document.output_filename)
container_output_filename = os.path.join(
safe_dir, "safe-output-compressed.pdf"
)
shutil.move(container_output_filename, output_filename)
shutil.move(container_output_filename, document.output_filename)
# We did it
success = True

View file

@ -493,34 +493,17 @@ class ConvertThread(QtCore.QThread):
ocr_lang = None
if convert(
self.document.input_filename,
self.document.output_filename,
self.document,
ocr_lang,
self.stdout_callback,
):
self.finished.emit(self.error)
def stdout_callback(self, line: str) -> None:
try:
status = json.loads(line)
except:
log.error(f"Invalid JSON returned from container: {line}")
def stdout_callback(self, error: bool, text: str, percentage: int) -> None:
if error:
self.error = True
self.update.emit(
True, f"Invalid JSON returned from container:\n\n{line}", 0
)
return
s = Style.BRIGHT + Fore.CYAN + f"{status['percentage']}% "
if status["error"]:
self.error = True
s += Style.RESET_ALL + Fore.RED + status["text"]
log.error(s)
else:
s += Style.RESET_ALL + status["text"]
log.info(s)
self.update.emit(status["error"], status["text"], status["percentage"])
self.update.emit(error, text, percentage)
class ConvertWidget(QtWidgets.QWidget):

View file

@ -48,12 +48,11 @@ class DangerzoneCore(object):
self.documents.append(doc)
def convert_documents(
self, ocr_lang: Optional[str], stdout_callback: Callable[[str], None]
self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
) -> None:
def convert_doc(document: Document) -> None:
success = container.convert(
document.input_filename,
document.output_filename,
document,
ocr_lang,
stdout_callback,
)