From fff7be753547d1c651a62f6b90fa3dae91095a88 Mon Sep 17 00:00:00 2001 From: Alex Pyrgiotis Date: Tue, 15 Oct 2024 17:44:39 +0300 Subject: [PATCH] WIP for progress report --- dangerzone/ctx.py | 137 ++++++++++++++++++++++++++ dangerzone/gui/main_window.py | 8 +- dangerzone/isolation_provider/base.py | 56 +++-------- dangerzone/logic.py | 8 +- prog_tests.py | 26 +++++ 5 files changed, 183 insertions(+), 52 deletions(-) create mode 100644 dangerzone/ctx.py create mode 100755 prog_tests.py diff --git a/dangerzone/ctx.py b/dangerzone/ctx.py new file mode 100644 index 0000000..28bdc07 --- /dev/null +++ b/dangerzone/ctx.py @@ -0,0 +1,137 @@ +import datetime +import enum +import logging +import time +from typing import Callable + +from colorama import Fore, Style + +from .document import Document + +log = logging.getLogger(__name__) + + +class ConversionCtx: + + EST_PERCENT_START_CONVERSION_PROC = 1 + EST_PERCENT_GATHER_PAGES = 2 + EST_PERCENT_CONVERT_PAGES = 96 + EST_PERCENT_COMPLETE_CONVERSION = 1 + + MSG_CONVERSION_PROCESS_TYPE = "process" + + # Conversion state + STATE_NOT_STARTED = enum.auto() + STATE_STARTING_CONVERSION_PROC = enum.auto() + STATE_GATHERING_PAGES = enum.auto() + STATE_CONVERTING_PAGES = enum.auto() + STATE_COMPLETED = enum.auto() + STATE_FAILED = enum.auto() + + def __init__( + self, + document: Document, + ocr_lang: str | None = None, + progress_callback: Callable | None = None, + ) -> None: + self.doc = document + self.ocr_lang = ocr_lang + self.callback = progress_callback + + conversion_total = 100 # FiXME: + assert conversion_total == 100 + + self.percentage: float = 0.0 + self.cur_page = 0 + self.pages = 0 + self.page_timer_start = None + self.state = self.STATE_NOT_STARTED + + def is_not_started(self) -> bool: + return self.state is self.STATE_NOT_STARTED + + def is_started(self) -> bool: + return self.state in ( + self.STATE_STARTING_CONVERSION_PROC, + self.STATE_GATHERING_PAGES, + self.STATE_CONVERTING_PAGES, + ) + + def is_completed(self) -> bool: + return self.state is Document.STATE_COMPLETED + + def is_failed(self) -> bool: + return self.state is Document.STATE_FAILED + + def increase(self, step: float) -> None: + assert step > 0 + self.percentage += step + + def print_message(self, text: str, error: bool = False) -> None: + s = Style.BRIGHT + Fore.YELLOW + f"[doc {self.doc.id}] " + s += Fore.CYAN + f"{int(self.percentage)}% " + Style.RESET_ALL + if error: + s += Fore.RED + text + Style.RESET_ALL + log.error(s) + else: + s += text + log.info(s) + + if self.callback: + self.callback(error, text, self.percentage) + + def start_conversion_proc(self): + self.state = self.STATE_STARTING_CONVERSION_PROC + self.print_message( + f"Starting a {self.MSG_CONVERSION_PROCESS_TYPE} for the document conversion" + ) + + def start_page_gathering(self): + self.state = self.STATE_GATHERING_PAGES + self.increase(self.EST_PERCENT_START_CONVERSION_PROC) + self.print_message("Getting number of pages") + + def set_total_pages(self, pages: int) -> None: + self.state = self.STATE_CONVERTING_PAGES + self.increase(self.EST_PERCENT_GATHER_PAGES) + assert pages > 0 + self.pages = pages + + def page_iter(self, pages): + self.set_total_pages(pages) + for page in range(1, pages + 1): + self.start_converting_page(page) + yield page + self.finished_converting_page() + + def start_converting_page(self, page: int) -> None: + searchable = "searchable " if self.ocr_lang else "" + remaining = "" + + if not self.page_timer_start: + self.page_timer_start = time.monotonic() + else: + processed_pages = page - 1 + elapsed = time.monotonic() - self.page_timer_start + elapsed_per_page = elapsed / processed_pages + remaining = (self.pages - processed_pages) * elapsed_per_page + remaining = datetime.timedelta(seconds=round(remaining)) + remaining = f" (remaining: {remaining}s)" + + self.print_message( + f"Converting page {page}/{self.pages} from pixels to {searchable}PDF{remaining}" + ) + + def finished_converting_page(self) -> None: + self.increase(self.EST_PERCENT_CONVERT_PAGES / self.pages) + + def fail(self, msg: str) -> None: + self.state = self.STATE_FAILED + self.print_message(msg, error=True) + self.doc.mark_as_failed() + + def success(self) -> None: + self.state = self.STATE_COMPLETED + self.percentage = 100 + self.doc.mark_as_safe() + self.print_message("Conversion completed successfully") diff --git a/dangerzone/gui/main_window.py b/dangerzone/gui/main_window.py index 43fb4c8..08a3368 100644 --- a/dangerzone/gui/main_window.py +++ b/dangerzone/gui/main_window.py @@ -29,6 +29,7 @@ from ..isolation_provider.container import Container, NoContainerTechException from ..isolation_provider.dummy import Dummy from ..isolation_provider.qubes import Qubes, is_qubes_native_conversion from ..util import get_resource_path, get_subprocess_startupinfo, get_version +from ..ctx import ConversionCtx from .logic import Alert, CollapsibleBox, DangerzoneGui, UpdateDialog from .updater import UpdateReport @@ -1124,11 +1125,8 @@ class ConvertTask(QtCore.QObject): self.dangerzone = dangerzone def convert_document(self) -> None: - self.dangerzone.isolation_provider.convert( - self.document, - self.ocr_lang, - self.progress_callback, - ) + ctx = ConversionCtx(self.document, self.ocr_lang, self.progress_callback) + self.dangerzone.isolation_provider.convert(ctx) self.finished.emit(self.error) def progress_callback(self, error: bool, text: str, percentage: int) -> None: diff --git a/dangerzone/isolation_provider/base.py b/dangerzone/isolation_provider/base.py index 9404cee..8031d09 100644 --- a/dangerzone/isolation_provider/base.py +++ b/dangerzone/isolation_provider/base.py @@ -16,6 +16,7 @@ from ..conversion import errors from ..conversion.common import DEFAULT_DPI, INT_BYTES from ..document import Document from ..util import get_tessdata_dir, replace_control_chars +from ..ctx import ConversionCtx log = logging.getLogger(__name__) @@ -97,29 +98,24 @@ class IsolationProvider(ABC): def install(self) -> bool: pass - def convert( - self, - document: Document, - ocr_lang: Optional[str], - progress_callback: Optional[Callable] = None, - ) -> None: - self.progress_callback = progress_callback + def convert(self, ctx: ConversionCtx) -> None: + document = ctx.doc document.mark_as_converting() try: + ctx.start_conversion_proc() with self.doc_to_pixels_proc(document) as conversion_proc: - self.convert_with_proc(document, ocr_lang, conversion_proc) - document.mark_as_safe() + ctx.start_page_gathering() + self.convert_with_proc(ctx, conversion_proc) if document.archive_after_conversion: document.archive() + ctx.success() except errors.ConversionException as e: - self.print_progress(document, True, str(e), 0) - document.mark_as_failed() + ctx.fail(str(e)) except Exception as e: log.exception( f"An exception occurred while converting document '{document.id}'" ) - self.print_progress(document, True, str(e), 0) - document.mark_as_failed() + ctx.fail(str(e)) def ocr_page(self, pixmap: fitz.Pixmap, ocr_lang: str) -> bytes: """Get a single page as pixels, OCR it, and return a PDF as bytes.""" @@ -157,12 +153,13 @@ class IsolationProvider(ABC): def convert_with_proc( self, - document: Document, - ocr_lang: Optional[str], + ctx: ConversionCtx, p: subprocess.Popen, ) -> None: + ocr_lang = ctx.ocr_lang + document = ctx.doc percentage = 0.0 - with open(document.input_filename, "rb") as f: + with open(ctx.doc.input_filename, "rb") as f: try: assert p.stdin is not None p.stdin.write(f.read()) @@ -178,13 +175,7 @@ class IsolationProvider(ABC): safe_doc = fitz.Document() - for page in range(1, n_pages + 1): - searchable = "searchable " if ocr_lang else "" - text = ( - f"Converting page {page}/{n_pages} from pixels to {searchable}PDF" - ) - self.print_progress(document, False, text, percentage) - + for page in ctx.page_iter(n_pages): width = read_int(p.stdout) height = read_int(p.stdout) if not (1 <= width <= errors.MAX_PAGE_WIDTH): @@ -216,25 +207,6 @@ class IsolationProvider(ABC): safe_doc.save(document.sanitized_output_filename) os.replace(document.sanitized_output_filename, document.output_filename) - # TODO handle leftover code input - text = "Successfully converted document" - self.print_progress(document, False, text, 100) - - def print_progress( - self, document: Document, error: bool, text: str, percentage: float - ) -> None: - s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] " - s += Fore.CYAN + f"{int(percentage)}% " + Style.RESET_ALL - if error: - s += Fore.RED + text + Style.RESET_ALL - log.error(s) - else: - s += text - log.info(s) - - if self.progress_callback: - self.progress_callback(error, text, percentage) - def get_proc_exception( self, p: subprocess.Popen, timeout: int = TIMEOUT_EXCEPTION ) -> Exception: diff --git a/dangerzone/logic.py b/dangerzone/logic.py index 37e3380..2f13a2e 100644 --- a/dangerzone/logic.py +++ b/dangerzone/logic.py @@ -7,6 +7,7 @@ import colorama from . import errors, util from .document import Document +from .ctx import ConversionCtx from .isolation_provider.base import IsolationProvider from .settings import Settings from .util import get_resource_path @@ -65,12 +66,9 @@ class DangerzoneCore(object): self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None ) -> None: def convert_doc(document: Document) -> None: + ctx = ConversionCtx(document, ocr_lang, stdout_callback) try: - self.isolation_provider.convert( - document, - ocr_lang, - stdout_callback, - ) + self.isolation_provider.convert(ctx) except Exception as e: log.exception( f"Unexpected error occurred while converting '{document}'" diff --git a/prog_tests.py b/prog_tests.py new file mode 100755 index 0000000..49e518d --- /dev/null +++ b/prog_tests.py @@ -0,0 +1,26 @@ +#!/usr/bin/python3 + +import logging +import sys +import time + +from dangerzone import document +from dangerzone.ctx import ConversionCtx + + +def main(): + logging.basicConfig(level=logging.INFO) + doc = document.Document() + ctx = ConversionCtx(doc) + ctx.start_conversion_proc() + ctx.start_page_gathering() + for page in ctx.page_iter(10): + time.sleep(0.2) + if not page % 5: + ctx.fail(f"Failed during page {page}") + + ctx.success() + + +if __name__ == "__main__": + sys.exit(main())