Sequential bulk document support in cli

Basic implementation of bulk document support in dangerzone-cli. Usage: dangerzone-cli [OPTIONS] doc1.pdf doc2.pdf
2025-04-28 18:02:38 +02:00 · 2022-09-20 15:20:47 +01:00 · 2022-09-20 15:20:47 +01:00 · 981716ccff
commit 981716ccff
parent 1147698287
6 changed files with 137 additions and 32 deletions
--- a/dangerzone/args.py
+++ b/dangerzone/args.py
@ -1,4 +1,4 @@
-from typing import Optional
+from typing import List, Optional, Tuple

 import click

@ -17,6 +17,18 @@ def _validate_input_filename(
    return filename


+@errors.handle_document_errors
+def _validate_input_filenames(
+    ctx: click.Context, param: List[str], value: Tuple[str]
+) -> List[str]:
+    normalized_filenames = []
+    for filename in value:
+        filename = Document.normalize_filename(filename)
+        Document.validate_input_filename(filename)
+        normalized_filenames.append(filename)
+    return normalized_filenames
+
+
@errors.handle_document_errors
 def _validate_output_filename(
    ctx: click.Context, param: str, value: Optional[str]
@ -42,6 +54,12 @@ def validate_input_filename(
    return _validate_input_filename(ctx, param, value)


+def validate_input_filenames(
+    ctx: click.Context, param: List[str], value: Tuple[str]
+) -> List[str]:
+    return _validate_input_filenames(ctx, param, value)
+
+
 def validate_output_filename(
    ctx: click.Context, param: str, value: Optional[str]
 ) -> Optional[str]:
--- a/dangerzone/cli.py
+++ b/dangerzone/cli.py
@ -2,7 +2,7 @@ import json
 import logging
 import os
 import sys
-from typing import Optional
+from typing import List, Optional

 import click
 from colorama import Back, Fore, Style
@ -26,23 +26,29 @@ def print_header(s: str) -> None:
    help=f"Default is filename ending with {SAFE_EXTENSION}",
 )
@click.option("--ocr-lang", help="Language to OCR, defaults to none")
-@click.argument("filename", required=True, callback=args.validate_input_filename)
+@click.argument(
+    "filenames",
+    required=True,
+    nargs=-1,
+    type=click.UNPROCESSED,
+    callback=args.validate_input_filenames,
+)
@errors.handle_document_errors
 def cli_main(
-    output_filename: Optional[str], ocr_lang: Optional[str], filename: str
+    output_filename: Optional[str], ocr_lang: Optional[str], filenames: List[str]
 ) -> None:
    setup_logging()
    dangerzone = DangerzoneCore()

    display_banner()
-
-    document = Document(filename)
-
-    # Set PDF output filename
-    if output_filename:
-        document.output_filename = output_filename
+    if len(filenames) == 1 and output_filename:
+        dangerzone.add_document(filenames[0], output_filename)
+    elif len(filenames) > 1 and output_filename:
+        click.echo("--output-filename can only be used with one input file.")
+        exit(1)
    else:
-        document.set_default_output_filename()
+        for filename in filenames:
+            dangerzone.add_document(filename)

    # Validate OCR language
    if ocr_lang:
@ -75,18 +81,21 @@ def cli_main(
        except:
            click.echo(f"Invalid JSON returned from container: {line}")

-    if convert(
-        document.input_filename,
-        document.output_filename,
-        ocr_lang,
-        stdout_callback,
-    ):
-        print_header("Safe PDF created successfully")
-        click.echo(document.output_filename)
-        exit(0)
-    else:
-        print_header("Failed to convert document")
+    dangerzone.convert_documents(ocr_lang, stdout_callback)
+    documents_safe = dangerzone.get_safe_documents()
+    documents_failed = dangerzone.get_failed_documents()
+
+    if documents_safe != []:
+        print_header("Safe PDF(s) created successfully")
+        for document in documents_safe:
+            click.echo(document.output_filename)
+    if documents_failed != []:
+        print_header("Failed to convert document(s)")
+        for document in documents_failed:
+            click.echo(document.input_filename)
        exit(1)
+    else:
+        exit(0)


 def setup_logging() -> None:
--- a/dangerzone/document.py
+++ b/dangerzone/document.py
@ -1,3 +1,4 @@
+import enum
 import os
 import platform
 import stat
@ -18,13 +19,23 @@ class Document:
    document, and validating its info.
    """

-    def __init__(self, input_filename: str = None) -> None:
+    # document conversion state
+    STATE_UNCONVERTED = enum.auto()
+    STATE_SAFE = enum.auto()
+    STATE_FAILED = enum.auto()
+
+    def __init__(self, input_filename: str = None, output_filename: str = None) -> None:
        self._input_filename: Optional[str] = None
        self._output_filename: Optional[str] = None

        if input_filename:
            self.input_filename = input_filename

+            if output_filename:
+                self.output_filename = output_filename
+
+        self.state = Document.STATE_UNCONVERTED
+
    @staticmethod
    def normalize_filename(filename: str) -> str:
        return os.path.abspath(filename)
@ -68,7 +79,10 @@ class Document:
    @property
    def output_filename(self) -> str:
        if self._output_filename is None:
-            raise DocumentFilenameException("Output filename has not been set yet.")
+            if self._input_filename is not None:
+                return self.default_output_filename
+            else:
+                raise DocumentFilenameException("Output filename has not been set yet.")
        else:
            return self._output_filename

@ -78,7 +92,21 @@ class Document:
        self.validate_output_filename(filename)
        self._output_filename = filename

-    def set_default_output_filename(self) -> None:
-        self.output_filename = (
-            f"{os.path.splitext(self.input_filename)[0]}{SAFE_EXTENSION}"
-        )
+    @property
+    def default_output_filename(self) -> str:
+        return f"{os.path.splitext(self.input_filename)[0]}{SAFE_EXTENSION}"
+
+    def is_unconverted(self) -> bool:
+        return self.state is Document.STATE_UNCONVERTED
+
+    def is_failed(self) -> bool:
+        return self.state is Document.STATE_FAILED
+
+    def is_safe(self) -> bool:
+        return self.state is Document.STATE_SAFE
+
+    def mark_as_failed(self) -> None:
+        self.state = Document.STATE_FAILED
+
+    def mark_as_safe(self) -> None:
+        self.state = Document.STATE_SAFE
--- a/dangerzone/gui/main_window.py
+++ b/dangerzone/gui/main_window.py
@ -433,9 +433,6 @@ class SettingsWidget(QtWidgets.QWidget):
        self.dangerous_doc_label.setText(
            f"Suspicious: {os.path.basename(self.document.input_filename)}"
        )
-
-        # Set the default save location
-        self.document.set_default_output_filename()
        self.save_lineedit.setText(os.path.basename(self.document.output_filename))

    def save_browse_button_clicked(self) -> None:
--- a/dangerzone/logic.py
+++ b/dangerzone/logic.py
@ -6,12 +6,13 @@ import platform
 import shutil
 import subprocess
 import sys
-from typing import Optional
+from typing import Callable, List, Optional

 import appdirs
 import colorama

 from .container import convert
+from .document import Document
 from .settings import Settings
 from .util import get_resource_path

@ -36,3 +37,34 @@ class DangerzoneCore(object):

        # Load settings
        self.settings = Settings(self)
+
+        self.documents: List[Document] = []
+
+    def add_document(
+        self, input_filename: str, output_filename: Optional[str] = None
+    ) -> None:
+        doc = Document(input_filename, output_filename)
+        self.documents.append(doc)
+
+    def convert_documents(
+        self, ocr_lang: Optional[str], stdout_callback: Callable[[str], None]
+    ) -> None:
+        all_successful = True
+
+        for document in self.documents:
+            success = convert(
+                document.input_filename,
+                document.output_filename,
+                ocr_lang,
+                stdout_callback,
+            )
+            if success:
+                document.mark_as_safe()
+            else:
+                document.mark_as_failed()
+
+    def get_safe_documents(self) -> List[Document]:
+        return [doc for doc in self.documents if doc.is_safe()]
+
+    def get_failed_documents(self) -> List[Document]:
+        return [doc for doc in self.documents if doc.is_failed()]
--- a/tests/test_document.py
+++ b/tests/test_document.py
@ -78,3 +78,24 @@ def test_output_file_not_pdf(tmp_path: Path) -> None:
    assert "Safe PDF filename must end in '.pdf'" in str(e.value)

    assert not os.path.exists(docx_file)
+
+
+def test_is_unconverted_by_default(sample_doc: None) -> None:
+    d = Document(sample_doc)
+    assert d.is_unconverted()
+
+
+def test_mark_as_safe(sample_doc: str) -> None:
+    d = Document(sample_doc)
+    d.mark_as_safe()
+    assert d.is_safe()
+    assert not d.is_failed()
+    assert not d.is_unconverted()
+
+
+def test_mark_as_failed(sample_doc: str) -> None:
+    d = Document(sample_doc)
+    d.mark_as_failed()
+    assert d.is_failed()
+    assert not d.is_safe()
+    assert not d.is_unconverted()