From 981716ccff69dbb0d07b3cc3c5847ee19f5be277 Mon Sep 17 00:00:00 2001 From: deeplow Date: Tue, 20 Sep 2022 15:20:47 +0100 Subject: [PATCH] Sequential bulk document support in cli Basic implementation of bulk document support in dangerzone-cli. Usage: dangerzone-cli [OPTIONS] doc1.pdf doc2.pdf --- dangerzone/args.py | 20 +++++++++++++- dangerzone/cli.py | 51 ++++++++++++++++++++--------------- dangerzone/document.py | 40 ++++++++++++++++++++++----- dangerzone/gui/main_window.py | 3 --- dangerzone/logic.py | 34 ++++++++++++++++++++++- tests/test_document.py | 21 +++++++++++++++ 6 files changed, 137 insertions(+), 32 deletions(-) diff --git a/dangerzone/args.py b/dangerzone/args.py index c807078..120da65 100644 --- a/dangerzone/args.py +++ b/dangerzone/args.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import List, Optional, Tuple import click @@ -17,6 +17,18 @@ def _validate_input_filename( return filename +@errors.handle_document_errors +def _validate_input_filenames( + ctx: click.Context, param: List[str], value: Tuple[str] +) -> List[str]: + normalized_filenames = [] + for filename in value: + filename = Document.normalize_filename(filename) + Document.validate_input_filename(filename) + normalized_filenames.append(filename) + return normalized_filenames + + @errors.handle_document_errors def _validate_output_filename( ctx: click.Context, param: str, value: Optional[str] @@ -42,6 +54,12 @@ def validate_input_filename( return _validate_input_filename(ctx, param, value) +def validate_input_filenames( + ctx: click.Context, param: List[str], value: Tuple[str] +) -> List[str]: + return _validate_input_filenames(ctx, param, value) + + def validate_output_filename( ctx: click.Context, param: str, value: Optional[str] ) -> Optional[str]: diff --git a/dangerzone/cli.py b/dangerzone/cli.py index 3270b85..a06283a 100644 --- a/dangerzone/cli.py +++ b/dangerzone/cli.py @@ -2,7 +2,7 @@ import json import logging import os import sys -from typing import Optional +from typing import List, Optional import click from colorama import Back, Fore, Style @@ -26,23 +26,29 @@ def print_header(s: str) -> None: help=f"Default is filename ending with {SAFE_EXTENSION}", ) @click.option("--ocr-lang", help="Language to OCR, defaults to none") -@click.argument("filename", required=True, callback=args.validate_input_filename) +@click.argument( + "filenames", + required=True, + nargs=-1, + type=click.UNPROCESSED, + callback=args.validate_input_filenames, +) @errors.handle_document_errors def cli_main( - output_filename: Optional[str], ocr_lang: Optional[str], filename: str + output_filename: Optional[str], ocr_lang: Optional[str], filenames: List[str] ) -> None: setup_logging() dangerzone = DangerzoneCore() display_banner() - - document = Document(filename) - - # Set PDF output filename - if output_filename: - document.output_filename = output_filename + if len(filenames) == 1 and output_filename: + dangerzone.add_document(filenames[0], output_filename) + elif len(filenames) > 1 and output_filename: + click.echo("--output-filename can only be used with one input file.") + exit(1) else: - document.set_default_output_filename() + for filename in filenames: + dangerzone.add_document(filename) # Validate OCR language if ocr_lang: @@ -75,18 +81,21 @@ def cli_main( except: click.echo(f"Invalid JSON returned from container: {line}") - if convert( - document.input_filename, - document.output_filename, - ocr_lang, - stdout_callback, - ): - print_header("Safe PDF created successfully") - click.echo(document.output_filename) - exit(0) - else: - print_header("Failed to convert document") + dangerzone.convert_documents(ocr_lang, stdout_callback) + documents_safe = dangerzone.get_safe_documents() + documents_failed = dangerzone.get_failed_documents() + + if documents_safe != []: + print_header("Safe PDF(s) created successfully") + for document in documents_safe: + click.echo(document.output_filename) + if documents_failed != []: + print_header("Failed to convert document(s)") + for document in documents_failed: + click.echo(document.input_filename) exit(1) + else: + exit(0) def setup_logging() -> None: diff --git a/dangerzone/document.py b/dangerzone/document.py index 6cfc28f..9fb0b12 100644 --- a/dangerzone/document.py +++ b/dangerzone/document.py @@ -1,3 +1,4 @@ +import enum import os import platform import stat @@ -18,13 +19,23 @@ class Document: document, and validating its info. """ - def __init__(self, input_filename: str = None) -> None: + # document conversion state + STATE_UNCONVERTED = enum.auto() + STATE_SAFE = enum.auto() + STATE_FAILED = enum.auto() + + def __init__(self, input_filename: str = None, output_filename: str = None) -> None: self._input_filename: Optional[str] = None self._output_filename: Optional[str] = None if input_filename: self.input_filename = input_filename + if output_filename: + self.output_filename = output_filename + + self.state = Document.STATE_UNCONVERTED + @staticmethod def normalize_filename(filename: str) -> str: return os.path.abspath(filename) @@ -68,7 +79,10 @@ class Document: @property def output_filename(self) -> str: if self._output_filename is None: - raise DocumentFilenameException("Output filename has not been set yet.") + if self._input_filename is not None: + return self.default_output_filename + else: + raise DocumentFilenameException("Output filename has not been set yet.") else: return self._output_filename @@ -78,7 +92,21 @@ class Document: self.validate_output_filename(filename) self._output_filename = filename - def set_default_output_filename(self) -> None: - self.output_filename = ( - f"{os.path.splitext(self.input_filename)[0]}{SAFE_EXTENSION}" - ) + @property + def default_output_filename(self) -> str: + return f"{os.path.splitext(self.input_filename)[0]}{SAFE_EXTENSION}" + + def is_unconverted(self) -> bool: + return self.state is Document.STATE_UNCONVERTED + + def is_failed(self) -> bool: + return self.state is Document.STATE_FAILED + + def is_safe(self) -> bool: + return self.state is Document.STATE_SAFE + + def mark_as_failed(self) -> None: + self.state = Document.STATE_FAILED + + def mark_as_safe(self) -> None: + self.state = Document.STATE_SAFE diff --git a/dangerzone/gui/main_window.py b/dangerzone/gui/main_window.py index 2e2e2d8..16689e7 100644 --- a/dangerzone/gui/main_window.py +++ b/dangerzone/gui/main_window.py @@ -433,9 +433,6 @@ class SettingsWidget(QtWidgets.QWidget): self.dangerous_doc_label.setText( f"Suspicious: {os.path.basename(self.document.input_filename)}" ) - - # Set the default save location - self.document.set_default_output_filename() self.save_lineedit.setText(os.path.basename(self.document.output_filename)) def save_browse_button_clicked(self) -> None: diff --git a/dangerzone/logic.py b/dangerzone/logic.py index ec53fae..d2a044d 100644 --- a/dangerzone/logic.py +++ b/dangerzone/logic.py @@ -6,12 +6,13 @@ import platform import shutil import subprocess import sys -from typing import Optional +from typing import Callable, List, Optional import appdirs import colorama from .container import convert +from .document import Document from .settings import Settings from .util import get_resource_path @@ -36,3 +37,34 @@ class DangerzoneCore(object): # Load settings self.settings = Settings(self) + + self.documents: List[Document] = [] + + def add_document( + self, input_filename: str, output_filename: Optional[str] = None + ) -> None: + doc = Document(input_filename, output_filename) + self.documents.append(doc) + + def convert_documents( + self, ocr_lang: Optional[str], stdout_callback: Callable[[str], None] + ) -> None: + all_successful = True + + for document in self.documents: + success = convert( + document.input_filename, + document.output_filename, + ocr_lang, + stdout_callback, + ) + if success: + document.mark_as_safe() + else: + document.mark_as_failed() + + def get_safe_documents(self) -> List[Document]: + return [doc for doc in self.documents if doc.is_safe()] + + def get_failed_documents(self) -> List[Document]: + return [doc for doc in self.documents if doc.is_failed()] diff --git a/tests/test_document.py b/tests/test_document.py index 666f465..fe4d1dc 100644 --- a/tests/test_document.py +++ b/tests/test_document.py @@ -78,3 +78,24 @@ def test_output_file_not_pdf(tmp_path: Path) -> None: assert "Safe PDF filename must end in '.pdf'" in str(e.value) assert not os.path.exists(docx_file) + + +def test_is_unconverted_by_default(sample_doc: None) -> None: + d = Document(sample_doc) + assert d.is_unconverted() + + +def test_mark_as_safe(sample_doc: str) -> None: + d = Document(sample_doc) + d.mark_as_safe() + assert d.is_safe() + assert not d.is_failed() + assert not d.is_unconverted() + + +def test_mark_as_failed(sample_doc: str) -> None: + d = Document(sample_doc) + d.mark_as_failed() + assert d.is_failed() + assert not d.is_safe() + assert not d.is_unconverted()