mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00
WIP for progress report
This commit is contained in:
parent
6b658812f0
commit
fff7be7535
5 changed files with 183 additions and 52 deletions
137
dangerzone/ctx.py
Normal file
137
dangerzone/ctx.py
Normal file
|
@ -0,0 +1,137 @@
|
||||||
|
import datetime
|
||||||
|
import enum
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
|
from colorama import Fore, Style
|
||||||
|
|
||||||
|
from .document import Document
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ConversionCtx:
|
||||||
|
|
||||||
|
EST_PERCENT_START_CONVERSION_PROC = 1
|
||||||
|
EST_PERCENT_GATHER_PAGES = 2
|
||||||
|
EST_PERCENT_CONVERT_PAGES = 96
|
||||||
|
EST_PERCENT_COMPLETE_CONVERSION = 1
|
||||||
|
|
||||||
|
MSG_CONVERSION_PROCESS_TYPE = "process"
|
||||||
|
|
||||||
|
# Conversion state
|
||||||
|
STATE_NOT_STARTED = enum.auto()
|
||||||
|
STATE_STARTING_CONVERSION_PROC = enum.auto()
|
||||||
|
STATE_GATHERING_PAGES = enum.auto()
|
||||||
|
STATE_CONVERTING_PAGES = enum.auto()
|
||||||
|
STATE_COMPLETED = enum.auto()
|
||||||
|
STATE_FAILED = enum.auto()
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
document: Document,
|
||||||
|
ocr_lang: str | None = None,
|
||||||
|
progress_callback: Callable | None = None,
|
||||||
|
) -> None:
|
||||||
|
self.doc = document
|
||||||
|
self.ocr_lang = ocr_lang
|
||||||
|
self.callback = progress_callback
|
||||||
|
|
||||||
|
conversion_total = 100 # FiXME:
|
||||||
|
assert conversion_total == 100
|
||||||
|
|
||||||
|
self.percentage: float = 0.0
|
||||||
|
self.cur_page = 0
|
||||||
|
self.pages = 0
|
||||||
|
self.page_timer_start = None
|
||||||
|
self.state = self.STATE_NOT_STARTED
|
||||||
|
|
||||||
|
def is_not_started(self) -> bool:
|
||||||
|
return self.state is self.STATE_NOT_STARTED
|
||||||
|
|
||||||
|
def is_started(self) -> bool:
|
||||||
|
return self.state in (
|
||||||
|
self.STATE_STARTING_CONVERSION_PROC,
|
||||||
|
self.STATE_GATHERING_PAGES,
|
||||||
|
self.STATE_CONVERTING_PAGES,
|
||||||
|
)
|
||||||
|
|
||||||
|
def is_completed(self) -> bool:
|
||||||
|
return self.state is Document.STATE_COMPLETED
|
||||||
|
|
||||||
|
def is_failed(self) -> bool:
|
||||||
|
return self.state is Document.STATE_FAILED
|
||||||
|
|
||||||
|
def increase(self, step: float) -> None:
|
||||||
|
assert step > 0
|
||||||
|
self.percentage += step
|
||||||
|
|
||||||
|
def print_message(self, text: str, error: bool = False) -> None:
|
||||||
|
s = Style.BRIGHT + Fore.YELLOW + f"[doc {self.doc.id}] "
|
||||||
|
s += Fore.CYAN + f"{int(self.percentage)}% " + Style.RESET_ALL
|
||||||
|
if error:
|
||||||
|
s += Fore.RED + text + Style.RESET_ALL
|
||||||
|
log.error(s)
|
||||||
|
else:
|
||||||
|
s += text
|
||||||
|
log.info(s)
|
||||||
|
|
||||||
|
if self.callback:
|
||||||
|
self.callback(error, text, self.percentage)
|
||||||
|
|
||||||
|
def start_conversion_proc(self):
|
||||||
|
self.state = self.STATE_STARTING_CONVERSION_PROC
|
||||||
|
self.print_message(
|
||||||
|
f"Starting a {self.MSG_CONVERSION_PROCESS_TYPE} for the document conversion"
|
||||||
|
)
|
||||||
|
|
||||||
|
def start_page_gathering(self):
|
||||||
|
self.state = self.STATE_GATHERING_PAGES
|
||||||
|
self.increase(self.EST_PERCENT_START_CONVERSION_PROC)
|
||||||
|
self.print_message("Getting number of pages")
|
||||||
|
|
||||||
|
def set_total_pages(self, pages: int) -> None:
|
||||||
|
self.state = self.STATE_CONVERTING_PAGES
|
||||||
|
self.increase(self.EST_PERCENT_GATHER_PAGES)
|
||||||
|
assert pages > 0
|
||||||
|
self.pages = pages
|
||||||
|
|
||||||
|
def page_iter(self, pages):
|
||||||
|
self.set_total_pages(pages)
|
||||||
|
for page in range(1, pages + 1):
|
||||||
|
self.start_converting_page(page)
|
||||||
|
yield page
|
||||||
|
self.finished_converting_page()
|
||||||
|
|
||||||
|
def start_converting_page(self, page: int) -> None:
|
||||||
|
searchable = "searchable " if self.ocr_lang else ""
|
||||||
|
remaining = ""
|
||||||
|
|
||||||
|
if not self.page_timer_start:
|
||||||
|
self.page_timer_start = time.monotonic()
|
||||||
|
else:
|
||||||
|
processed_pages = page - 1
|
||||||
|
elapsed = time.monotonic() - self.page_timer_start
|
||||||
|
elapsed_per_page = elapsed / processed_pages
|
||||||
|
remaining = (self.pages - processed_pages) * elapsed_per_page
|
||||||
|
remaining = datetime.timedelta(seconds=round(remaining))
|
||||||
|
remaining = f" (remaining: {remaining}s)"
|
||||||
|
|
||||||
|
self.print_message(
|
||||||
|
f"Converting page {page}/{self.pages} from pixels to {searchable}PDF{remaining}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def finished_converting_page(self) -> None:
|
||||||
|
self.increase(self.EST_PERCENT_CONVERT_PAGES / self.pages)
|
||||||
|
|
||||||
|
def fail(self, msg: str) -> None:
|
||||||
|
self.state = self.STATE_FAILED
|
||||||
|
self.print_message(msg, error=True)
|
||||||
|
self.doc.mark_as_failed()
|
||||||
|
|
||||||
|
def success(self) -> None:
|
||||||
|
self.state = self.STATE_COMPLETED
|
||||||
|
self.percentage = 100
|
||||||
|
self.doc.mark_as_safe()
|
||||||
|
self.print_message("Conversion completed successfully")
|
|
@ -29,6 +29,7 @@ from ..isolation_provider.container import Container, NoContainerTechException
|
||||||
from ..isolation_provider.dummy import Dummy
|
from ..isolation_provider.dummy import Dummy
|
||||||
from ..isolation_provider.qubes import Qubes, is_qubes_native_conversion
|
from ..isolation_provider.qubes import Qubes, is_qubes_native_conversion
|
||||||
from ..util import get_resource_path, get_subprocess_startupinfo, get_version
|
from ..util import get_resource_path, get_subprocess_startupinfo, get_version
|
||||||
|
from ..ctx import ConversionCtx
|
||||||
from .logic import Alert, CollapsibleBox, DangerzoneGui, UpdateDialog
|
from .logic import Alert, CollapsibleBox, DangerzoneGui, UpdateDialog
|
||||||
from .updater import UpdateReport
|
from .updater import UpdateReport
|
||||||
|
|
||||||
|
@ -1124,11 +1125,8 @@ class ConvertTask(QtCore.QObject):
|
||||||
self.dangerzone = dangerzone
|
self.dangerzone = dangerzone
|
||||||
|
|
||||||
def convert_document(self) -> None:
|
def convert_document(self) -> None:
|
||||||
self.dangerzone.isolation_provider.convert(
|
ctx = ConversionCtx(self.document, self.ocr_lang, self.progress_callback)
|
||||||
self.document,
|
self.dangerzone.isolation_provider.convert(ctx)
|
||||||
self.ocr_lang,
|
|
||||||
self.progress_callback,
|
|
||||||
)
|
|
||||||
self.finished.emit(self.error)
|
self.finished.emit(self.error)
|
||||||
|
|
||||||
def progress_callback(self, error: bool, text: str, percentage: int) -> None:
|
def progress_callback(self, error: bool, text: str, percentage: int) -> None:
|
||||||
|
|
|
@ -16,6 +16,7 @@ from ..conversion import errors
|
||||||
from ..conversion.common import DEFAULT_DPI, INT_BYTES
|
from ..conversion.common import DEFAULT_DPI, INT_BYTES
|
||||||
from ..document import Document
|
from ..document import Document
|
||||||
from ..util import get_tessdata_dir, replace_control_chars
|
from ..util import get_tessdata_dir, replace_control_chars
|
||||||
|
from ..ctx import ConversionCtx
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -97,29 +98,24 @@ class IsolationProvider(ABC):
|
||||||
def install(self) -> bool:
|
def install(self) -> bool:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def convert(
|
def convert(self, ctx: ConversionCtx) -> None:
|
||||||
self,
|
document = ctx.doc
|
||||||
document: Document,
|
|
||||||
ocr_lang: Optional[str],
|
|
||||||
progress_callback: Optional[Callable] = None,
|
|
||||||
) -> None:
|
|
||||||
self.progress_callback = progress_callback
|
|
||||||
document.mark_as_converting()
|
document.mark_as_converting()
|
||||||
try:
|
try:
|
||||||
|
ctx.start_conversion_proc()
|
||||||
with self.doc_to_pixels_proc(document) as conversion_proc:
|
with self.doc_to_pixels_proc(document) as conversion_proc:
|
||||||
self.convert_with_proc(document, ocr_lang, conversion_proc)
|
ctx.start_page_gathering()
|
||||||
document.mark_as_safe()
|
self.convert_with_proc(ctx, conversion_proc)
|
||||||
if document.archive_after_conversion:
|
if document.archive_after_conversion:
|
||||||
document.archive()
|
document.archive()
|
||||||
|
ctx.success()
|
||||||
except errors.ConversionException as e:
|
except errors.ConversionException as e:
|
||||||
self.print_progress(document, True, str(e), 0)
|
ctx.fail(str(e))
|
||||||
document.mark_as_failed()
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception(
|
log.exception(
|
||||||
f"An exception occurred while converting document '{document.id}'"
|
f"An exception occurred while converting document '{document.id}'"
|
||||||
)
|
)
|
||||||
self.print_progress(document, True, str(e), 0)
|
ctx.fail(str(e))
|
||||||
document.mark_as_failed()
|
|
||||||
|
|
||||||
def ocr_page(self, pixmap: fitz.Pixmap, ocr_lang: str) -> bytes:
|
def ocr_page(self, pixmap: fitz.Pixmap, ocr_lang: str) -> bytes:
|
||||||
"""Get a single page as pixels, OCR it, and return a PDF as bytes."""
|
"""Get a single page as pixels, OCR it, and return a PDF as bytes."""
|
||||||
|
@ -157,12 +153,13 @@ class IsolationProvider(ABC):
|
||||||
|
|
||||||
def convert_with_proc(
|
def convert_with_proc(
|
||||||
self,
|
self,
|
||||||
document: Document,
|
ctx: ConversionCtx,
|
||||||
ocr_lang: Optional[str],
|
|
||||||
p: subprocess.Popen,
|
p: subprocess.Popen,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
ocr_lang = ctx.ocr_lang
|
||||||
|
document = ctx.doc
|
||||||
percentage = 0.0
|
percentage = 0.0
|
||||||
with open(document.input_filename, "rb") as f:
|
with open(ctx.doc.input_filename, "rb") as f:
|
||||||
try:
|
try:
|
||||||
assert p.stdin is not None
|
assert p.stdin is not None
|
||||||
p.stdin.write(f.read())
|
p.stdin.write(f.read())
|
||||||
|
@ -178,13 +175,7 @@ class IsolationProvider(ABC):
|
||||||
|
|
||||||
safe_doc = fitz.Document()
|
safe_doc = fitz.Document()
|
||||||
|
|
||||||
for page in range(1, n_pages + 1):
|
for page in ctx.page_iter(n_pages):
|
||||||
searchable = "searchable " if ocr_lang else ""
|
|
||||||
text = (
|
|
||||||
f"Converting page {page}/{n_pages} from pixels to {searchable}PDF"
|
|
||||||
)
|
|
||||||
self.print_progress(document, False, text, percentage)
|
|
||||||
|
|
||||||
width = read_int(p.stdout)
|
width = read_int(p.stdout)
|
||||||
height = read_int(p.stdout)
|
height = read_int(p.stdout)
|
||||||
if not (1 <= width <= errors.MAX_PAGE_WIDTH):
|
if not (1 <= width <= errors.MAX_PAGE_WIDTH):
|
||||||
|
@ -216,25 +207,6 @@ class IsolationProvider(ABC):
|
||||||
safe_doc.save(document.sanitized_output_filename)
|
safe_doc.save(document.sanitized_output_filename)
|
||||||
os.replace(document.sanitized_output_filename, document.output_filename)
|
os.replace(document.sanitized_output_filename, document.output_filename)
|
||||||
|
|
||||||
# TODO handle leftover code input
|
|
||||||
text = "Successfully converted document"
|
|
||||||
self.print_progress(document, False, text, 100)
|
|
||||||
|
|
||||||
def print_progress(
|
|
||||||
self, document: Document, error: bool, text: str, percentage: float
|
|
||||||
) -> None:
|
|
||||||
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
|
|
||||||
s += Fore.CYAN + f"{int(percentage)}% " + Style.RESET_ALL
|
|
||||||
if error:
|
|
||||||
s += Fore.RED + text + Style.RESET_ALL
|
|
||||||
log.error(s)
|
|
||||||
else:
|
|
||||||
s += text
|
|
||||||
log.info(s)
|
|
||||||
|
|
||||||
if self.progress_callback:
|
|
||||||
self.progress_callback(error, text, percentage)
|
|
||||||
|
|
||||||
def get_proc_exception(
|
def get_proc_exception(
|
||||||
self, p: subprocess.Popen, timeout: int = TIMEOUT_EXCEPTION
|
self, p: subprocess.Popen, timeout: int = TIMEOUT_EXCEPTION
|
||||||
) -> Exception:
|
) -> Exception:
|
||||||
|
|
|
@ -7,6 +7,7 @@ import colorama
|
||||||
|
|
||||||
from . import errors, util
|
from . import errors, util
|
||||||
from .document import Document
|
from .document import Document
|
||||||
|
from .ctx import ConversionCtx
|
||||||
from .isolation_provider.base import IsolationProvider
|
from .isolation_provider.base import IsolationProvider
|
||||||
from .settings import Settings
|
from .settings import Settings
|
||||||
from .util import get_resource_path
|
from .util import get_resource_path
|
||||||
|
@ -65,12 +66,9 @@ class DangerzoneCore(object):
|
||||||
self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
|
self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
|
||||||
) -> None:
|
) -> None:
|
||||||
def convert_doc(document: Document) -> None:
|
def convert_doc(document: Document) -> None:
|
||||||
|
ctx = ConversionCtx(document, ocr_lang, stdout_callback)
|
||||||
try:
|
try:
|
||||||
self.isolation_provider.convert(
|
self.isolation_provider.convert(ctx)
|
||||||
document,
|
|
||||||
ocr_lang,
|
|
||||||
stdout_callback,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception(
|
log.exception(
|
||||||
f"Unexpected error occurred while converting '{document}'"
|
f"Unexpected error occurred while converting '{document}'"
|
||||||
|
|
26
prog_tests.py
Executable file
26
prog_tests.py
Executable file
|
@ -0,0 +1,26 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
from dangerzone import document
|
||||||
|
from dangerzone.ctx import ConversionCtx
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
doc = document.Document()
|
||||||
|
ctx = ConversionCtx(doc)
|
||||||
|
ctx.start_conversion_proc()
|
||||||
|
ctx.start_page_gathering()
|
||||||
|
for page in ctx.page_iter(10):
|
||||||
|
time.sleep(0.2)
|
||||||
|
if not page % 5:
|
||||||
|
ctx.fail(f"Failed during page {page}")
|
||||||
|
|
||||||
|
ctx.success()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
Loading…
Reference in a new issue