mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 09:52:37 +02:00
WIP for progress report
This commit is contained in:
parent
6b658812f0
commit
fff7be7535
5 changed files with 183 additions and 52 deletions
137
dangerzone/ctx.py
Normal file
137
dangerzone/ctx.py
Normal file
|
@ -0,0 +1,137 @@
|
|||
import datetime
|
||||
import enum
|
||||
import logging
|
||||
import time
|
||||
from typing import Callable
|
||||
|
||||
from colorama import Fore, Style
|
||||
|
||||
from .document import Document
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ConversionCtx:
|
||||
|
||||
EST_PERCENT_START_CONVERSION_PROC = 1
|
||||
EST_PERCENT_GATHER_PAGES = 2
|
||||
EST_PERCENT_CONVERT_PAGES = 96
|
||||
EST_PERCENT_COMPLETE_CONVERSION = 1
|
||||
|
||||
MSG_CONVERSION_PROCESS_TYPE = "process"
|
||||
|
||||
# Conversion state
|
||||
STATE_NOT_STARTED = enum.auto()
|
||||
STATE_STARTING_CONVERSION_PROC = enum.auto()
|
||||
STATE_GATHERING_PAGES = enum.auto()
|
||||
STATE_CONVERTING_PAGES = enum.auto()
|
||||
STATE_COMPLETED = enum.auto()
|
||||
STATE_FAILED = enum.auto()
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
document: Document,
|
||||
ocr_lang: str | None = None,
|
||||
progress_callback: Callable | None = None,
|
||||
) -> None:
|
||||
self.doc = document
|
||||
self.ocr_lang = ocr_lang
|
||||
self.callback = progress_callback
|
||||
|
||||
conversion_total = 100 # FiXME:
|
||||
assert conversion_total == 100
|
||||
|
||||
self.percentage: float = 0.0
|
||||
self.cur_page = 0
|
||||
self.pages = 0
|
||||
self.page_timer_start = None
|
||||
self.state = self.STATE_NOT_STARTED
|
||||
|
||||
def is_not_started(self) -> bool:
|
||||
return self.state is self.STATE_NOT_STARTED
|
||||
|
||||
def is_started(self) -> bool:
|
||||
return self.state in (
|
||||
self.STATE_STARTING_CONVERSION_PROC,
|
||||
self.STATE_GATHERING_PAGES,
|
||||
self.STATE_CONVERTING_PAGES,
|
||||
)
|
||||
|
||||
def is_completed(self) -> bool:
|
||||
return self.state is Document.STATE_COMPLETED
|
||||
|
||||
def is_failed(self) -> bool:
|
||||
return self.state is Document.STATE_FAILED
|
||||
|
||||
def increase(self, step: float) -> None:
|
||||
assert step > 0
|
||||
self.percentage += step
|
||||
|
||||
def print_message(self, text: str, error: bool = False) -> None:
|
||||
s = Style.BRIGHT + Fore.YELLOW + f"[doc {self.doc.id}] "
|
||||
s += Fore.CYAN + f"{int(self.percentage)}% " + Style.RESET_ALL
|
||||
if error:
|
||||
s += Fore.RED + text + Style.RESET_ALL
|
||||
log.error(s)
|
||||
else:
|
||||
s += text
|
||||
log.info(s)
|
||||
|
||||
if self.callback:
|
||||
self.callback(error, text, self.percentage)
|
||||
|
||||
def start_conversion_proc(self):
|
||||
self.state = self.STATE_STARTING_CONVERSION_PROC
|
||||
self.print_message(
|
||||
f"Starting a {self.MSG_CONVERSION_PROCESS_TYPE} for the document conversion"
|
||||
)
|
||||
|
||||
def start_page_gathering(self):
|
||||
self.state = self.STATE_GATHERING_PAGES
|
||||
self.increase(self.EST_PERCENT_START_CONVERSION_PROC)
|
||||
self.print_message("Getting number of pages")
|
||||
|
||||
def set_total_pages(self, pages: int) -> None:
|
||||
self.state = self.STATE_CONVERTING_PAGES
|
||||
self.increase(self.EST_PERCENT_GATHER_PAGES)
|
||||
assert pages > 0
|
||||
self.pages = pages
|
||||
|
||||
def page_iter(self, pages):
|
||||
self.set_total_pages(pages)
|
||||
for page in range(1, pages + 1):
|
||||
self.start_converting_page(page)
|
||||
yield page
|
||||
self.finished_converting_page()
|
||||
|
||||
def start_converting_page(self, page: int) -> None:
|
||||
searchable = "searchable " if self.ocr_lang else ""
|
||||
remaining = ""
|
||||
|
||||
if not self.page_timer_start:
|
||||
self.page_timer_start = time.monotonic()
|
||||
else:
|
||||
processed_pages = page - 1
|
||||
elapsed = time.monotonic() - self.page_timer_start
|
||||
elapsed_per_page = elapsed / processed_pages
|
||||
remaining = (self.pages - processed_pages) * elapsed_per_page
|
||||
remaining = datetime.timedelta(seconds=round(remaining))
|
||||
remaining = f" (remaining: {remaining}s)"
|
||||
|
||||
self.print_message(
|
||||
f"Converting page {page}/{self.pages} from pixels to {searchable}PDF{remaining}"
|
||||
)
|
||||
|
||||
def finished_converting_page(self) -> None:
|
||||
self.increase(self.EST_PERCENT_CONVERT_PAGES / self.pages)
|
||||
|
||||
def fail(self, msg: str) -> None:
|
||||
self.state = self.STATE_FAILED
|
||||
self.print_message(msg, error=True)
|
||||
self.doc.mark_as_failed()
|
||||
|
||||
def success(self) -> None:
|
||||
self.state = self.STATE_COMPLETED
|
||||
self.percentage = 100
|
||||
self.doc.mark_as_safe()
|
||||
self.print_message("Conversion completed successfully")
|
|
@ -29,6 +29,7 @@ from ..isolation_provider.container import Container, NoContainerTechException
|
|||
from ..isolation_provider.dummy import Dummy
|
||||
from ..isolation_provider.qubes import Qubes, is_qubes_native_conversion
|
||||
from ..util import get_resource_path, get_subprocess_startupinfo, get_version
|
||||
from ..ctx import ConversionCtx
|
||||
from .logic import Alert, CollapsibleBox, DangerzoneGui, UpdateDialog
|
||||
from .updater import UpdateReport
|
||||
|
||||
|
@ -1124,11 +1125,8 @@ class ConvertTask(QtCore.QObject):
|
|||
self.dangerzone = dangerzone
|
||||
|
||||
def convert_document(self) -> None:
|
||||
self.dangerzone.isolation_provider.convert(
|
||||
self.document,
|
||||
self.ocr_lang,
|
||||
self.progress_callback,
|
||||
)
|
||||
ctx = ConversionCtx(self.document, self.ocr_lang, self.progress_callback)
|
||||
self.dangerzone.isolation_provider.convert(ctx)
|
||||
self.finished.emit(self.error)
|
||||
|
||||
def progress_callback(self, error: bool, text: str, percentage: int) -> None:
|
||||
|
|
|
@ -16,6 +16,7 @@ from ..conversion import errors
|
|||
from ..conversion.common import DEFAULT_DPI, INT_BYTES
|
||||
from ..document import Document
|
||||
from ..util import get_tessdata_dir, replace_control_chars
|
||||
from ..ctx import ConversionCtx
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
@ -97,29 +98,24 @@ class IsolationProvider(ABC):
|
|||
def install(self) -> bool:
|
||||
pass
|
||||
|
||||
def convert(
|
||||
self,
|
||||
document: Document,
|
||||
ocr_lang: Optional[str],
|
||||
progress_callback: Optional[Callable] = None,
|
||||
) -> None:
|
||||
self.progress_callback = progress_callback
|
||||
def convert(self, ctx: ConversionCtx) -> None:
|
||||
document = ctx.doc
|
||||
document.mark_as_converting()
|
||||
try:
|
||||
ctx.start_conversion_proc()
|
||||
with self.doc_to_pixels_proc(document) as conversion_proc:
|
||||
self.convert_with_proc(document, ocr_lang, conversion_proc)
|
||||
document.mark_as_safe()
|
||||
ctx.start_page_gathering()
|
||||
self.convert_with_proc(ctx, conversion_proc)
|
||||
if document.archive_after_conversion:
|
||||
document.archive()
|
||||
ctx.success()
|
||||
except errors.ConversionException as e:
|
||||
self.print_progress(document, True, str(e), 0)
|
||||
document.mark_as_failed()
|
||||
ctx.fail(str(e))
|
||||
except Exception as e:
|
||||
log.exception(
|
||||
f"An exception occurred while converting document '{document.id}'"
|
||||
)
|
||||
self.print_progress(document, True, str(e), 0)
|
||||
document.mark_as_failed()
|
||||
ctx.fail(str(e))
|
||||
|
||||
def ocr_page(self, pixmap: fitz.Pixmap, ocr_lang: str) -> bytes:
|
||||
"""Get a single page as pixels, OCR it, and return a PDF as bytes."""
|
||||
|
@ -157,12 +153,13 @@ class IsolationProvider(ABC):
|
|||
|
||||
def convert_with_proc(
|
||||
self,
|
||||
document: Document,
|
||||
ocr_lang: Optional[str],
|
||||
ctx: ConversionCtx,
|
||||
p: subprocess.Popen,
|
||||
) -> None:
|
||||
ocr_lang = ctx.ocr_lang
|
||||
document = ctx.doc
|
||||
percentage = 0.0
|
||||
with open(document.input_filename, "rb") as f:
|
||||
with open(ctx.doc.input_filename, "rb") as f:
|
||||
try:
|
||||
assert p.stdin is not None
|
||||
p.stdin.write(f.read())
|
||||
|
@ -178,13 +175,7 @@ class IsolationProvider(ABC):
|
|||
|
||||
safe_doc = fitz.Document()
|
||||
|
||||
for page in range(1, n_pages + 1):
|
||||
searchable = "searchable " if ocr_lang else ""
|
||||
text = (
|
||||
f"Converting page {page}/{n_pages} from pixels to {searchable}PDF"
|
||||
)
|
||||
self.print_progress(document, False, text, percentage)
|
||||
|
||||
for page in ctx.page_iter(n_pages):
|
||||
width = read_int(p.stdout)
|
||||
height = read_int(p.stdout)
|
||||
if not (1 <= width <= errors.MAX_PAGE_WIDTH):
|
||||
|
@ -216,25 +207,6 @@ class IsolationProvider(ABC):
|
|||
safe_doc.save(document.sanitized_output_filename)
|
||||
os.replace(document.sanitized_output_filename, document.output_filename)
|
||||
|
||||
# TODO handle leftover code input
|
||||
text = "Successfully converted document"
|
||||
self.print_progress(document, False, text, 100)
|
||||
|
||||
def print_progress(
|
||||
self, document: Document, error: bool, text: str, percentage: float
|
||||
) -> None:
|
||||
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
|
||||
s += Fore.CYAN + f"{int(percentage)}% " + Style.RESET_ALL
|
||||
if error:
|
||||
s += Fore.RED + text + Style.RESET_ALL
|
||||
log.error(s)
|
||||
else:
|
||||
s += text
|
||||
log.info(s)
|
||||
|
||||
if self.progress_callback:
|
||||
self.progress_callback(error, text, percentage)
|
||||
|
||||
def get_proc_exception(
|
||||
self, p: subprocess.Popen, timeout: int = TIMEOUT_EXCEPTION
|
||||
) -> Exception:
|
||||
|
|
|
@ -7,6 +7,7 @@ import colorama
|
|||
|
||||
from . import errors, util
|
||||
from .document import Document
|
||||
from .ctx import ConversionCtx
|
||||
from .isolation_provider.base import IsolationProvider
|
||||
from .settings import Settings
|
||||
from .util import get_resource_path
|
||||
|
@ -65,12 +66,9 @@ class DangerzoneCore(object):
|
|||
self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
|
||||
) -> None:
|
||||
def convert_doc(document: Document) -> None:
|
||||
ctx = ConversionCtx(document, ocr_lang, stdout_callback)
|
||||
try:
|
||||
self.isolation_provider.convert(
|
||||
document,
|
||||
ocr_lang,
|
||||
stdout_callback,
|
||||
)
|
||||
self.isolation_provider.convert(ctx)
|
||||
except Exception as e:
|
||||
log.exception(
|
||||
f"Unexpected error occurred while converting '{document}'"
|
||||
|
|
26
prog_tests.py
Executable file
26
prog_tests.py
Executable file
|
@ -0,0 +1,26 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
|
||||
from dangerzone import document
|
||||
from dangerzone.ctx import ConversionCtx
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
doc = document.Document()
|
||||
ctx = ConversionCtx(doc)
|
||||
ctx.start_conversion_proc()
|
||||
ctx.start_page_gathering()
|
||||
for page in ctx.page_iter(10):
|
||||
time.sleep(0.2)
|
||||
if not page % 5:
|
||||
ctx.fail(f"Failed during page {page}")
|
||||
|
||||
ctx.success()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
Loading…
Reference in a new issue