diff --git a/CHANGELOG.md b/CHANGELOG.md index 91a8f59..05e14fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,10 @@ since 0.4.1, and this project adheres to [Semantic Versioning](https://semver.or - Feature: Add support for HWP/HWPX files (Hancom Office) for macOS Apple Silicon devices ([issue #498](https://github.com/freedomofpress/dangerzone/issues/498), thanks to [@OctopusET](https://github.com/OctopusET)) - Replace Dangerzone document rendering engine from pdftoppm PyMuPDF, essentially replacing a variety of tools (gm / tesseract / pdfunite / ps2pdf) ([issue #658](https://github.com/freedomofpress/dangerzone/issues/658)) +### Removed + +- Removed timeouts ([issue #687](https://github.com/freedomofpress/dangerzone/issues/687)) + ## Dangerzone 0.5.1 ### Fixed diff --git a/dangerzone/cli.py b/dangerzone/cli.py index 2361399..da326c8 100644 --- a/dangerzone/cli.py +++ b/dangerzone/cli.py @@ -37,12 +37,6 @@ def print_header(s: str) -> None: @click.option( "--unsafe-dummy-conversion", "dummy_conversion", flag_value=True, hidden=True ) -@click.option( - "--enable-timeouts / --disable-timeouts", - default=True, - show_default=True, - help="Enable/Disable timeouts during document conversion", -) @click.argument( "filenames", required=True, @@ -55,7 +49,6 @@ def print_header(s: str) -> None: def cli_main( output_filename: Optional[str], ocr_lang: Optional[str], - enable_timeouts: bool, filenames: List[str], archive: bool, dummy_conversion: bool, @@ -67,7 +60,7 @@ def cli_main( elif is_qubes_native_conversion(): dangerzone = DangerzoneCore(Qubes()) else: - dangerzone = DangerzoneCore(Container(enable_timeouts=enable_timeouts)) + dangerzone = DangerzoneCore(Container()) display_banner() if len(filenames) == 1 and output_filename: diff --git a/dangerzone/conversion/common.py b/dangerzone/conversion/common.py index f3649c9..e0a9f39 100644 --- a/dangerzone/conversion/common.py +++ b/dangerzone/conversion/common.py @@ -12,10 +12,8 @@ import time from abc import abstractmethod from typing import Callable, Dict, List, Optional, TextIO, Tuple, Union -TIMEOUT_PER_PAGE: float = 30 # (seconds) -TIMEOUT_PER_MB: float = 30 # (seconds) -TIMEOUT_MIN: float = 60 # (seconds) DEFAULT_DPI = 150 # Pixels per inch +INT_BYTES = 2 def running_on_qubes() -> bool: @@ -23,28 +21,6 @@ def running_on_qubes() -> bool: return os.path.exists("/usr/share/qubes/marker-vm") -def calculate_timeout(size: float, pages: Optional[float] = None) -> float: - """Calculate the timeout for a command. - - The timeout calculation takes two factors in mind: - - 1. The size (in MiBs) of the dataset (document, multiple pages). - 2. The number of pages in the dataset. - - It then calculates proportional timeout values based on the above, and keeps the - large one. This way, we can handle several corner cases: - - * Documents with lots of pages, but small file size. - * Single images with large file size. - """ - # Do not have timeouts lower than 10 seconds, if the file size is small, since - # we need to take into account the program's startup time as well. - timeout = max(TIMEOUT_PER_MB * size, TIMEOUT_MIN) - if pages: - timeout = max(timeout, TIMEOUT_PER_PAGE * pages) - return timeout - - def get_tessdata_dir() -> str: if running_on_qubes(): return "/usr/share/tesseract/tessdata/" @@ -76,7 +52,7 @@ class DangerzoneConverter: @classmethod def _write_int(cls, num: int, file: TextIO = sys.stdout) -> None: - cls._write_bytes(num.to_bytes(2, "big", signed=False), file=file) + cls._write_bytes(num.to_bytes(INT_BYTES, "big", signed=False), file=file) # ==== ASYNC METHODS ==== # We run sync methods in async wrappers, because pure async methods are more difficult: @@ -127,8 +103,6 @@ class DangerzoneConverter: args: List[str], *, error_message: str, - timeout_message: Optional[str] = None, - timeout: Optional[float] = None, stdout_callback: Optional[Callable] = None, stderr_callback: Optional[Callable] = None, ) -> Tuple[bytes, bytes]: @@ -138,7 +112,6 @@ class DangerzoneConverter: output in bytes. :raises RuntimeError: if the process returns a non-zero exit status - :raises TimeoutError: if the process times out """ # Start the provided command, and return a handle. The command will run in the # background. @@ -164,12 +137,9 @@ class DangerzoneConverter: self.read_stream(proc.stderr, stderr_callback) ) - # Wait until the command has finished, for a specific timeout. Then, verify that the - # command has completed successfully. In any other case, raise an exception. - try: - ret = await asyncio.wait_for(proc.wait(), timeout=timeout) - except asyncio.exceptions.TimeoutError: - raise TimeoutError(timeout_message) + # Wait until the command has finished. Then, verify that the command + # has completed successfully. In any other case, raise an exception. + ret = await proc.wait() if ret != 0: raise RuntimeError(error_message) @@ -179,15 +149,6 @@ class DangerzoneConverter: stderr = await stderr_task return (stdout, stderr) - def calculate_timeout( - self, size: float, pages: Optional[float] = None - ) -> Optional[float]: - """Calculate the timeout for a command.""" - if not int(os.environ.get("ENABLE_TIMEOUTS", 1)): - return None - - return calculate_timeout(size, pages) - @abstractmethod async def convert(self) -> None: pass diff --git a/dangerzone/conversion/pixels_to_pdf.py b/dangerzone/conversion/pixels_to_pdf.py index 072e197..362a769 100644 --- a/dangerzone/conversion/pixels_to_pdf.py +++ b/dangerzone/conversion/pixels_to_pdf.py @@ -49,7 +49,6 @@ class PixelsToPDF(DangerzoneConverter): # The first few operations happen on a per-page basis. page_size = len(untrusted_rgb_data) total_size += page_size - timeout = self.calculate_timeout(page_size, 1) pixmap = fitz.Pixmap( fitz.Colorspace(fitz.CS_RGB), width, height, untrusted_rgb_data, False ) @@ -75,10 +74,6 @@ class PixelsToPDF(DangerzoneConverter): safe_doc.insert_pdf(fitz.open("pdf", page_pdf_bytes)) self.percentage += percentage_per_page - # Next operations apply to the all the pages, so we need to recalculate the - # timeout. - timeout = self.calculate_timeout(total_size, num_pages) - self.percentage = 100.0 self.update_progress("Safe PDF created") @@ -110,7 +105,7 @@ async def main() -> int: try: await converter.convert(ocr_lang) return 0 - except (RuntimeError, TimeoutError, ValueError) as e: + except (RuntimeError, ValueError) as e: converter.update_progress(str(e), error=True) return 1 diff --git a/dangerzone/gui/__init__.py b/dangerzone/gui/__init__.py index c440c01..a6ed69b 100644 --- a/dangerzone/gui/__init__.py +++ b/dangerzone/gui/__init__.py @@ -96,12 +96,6 @@ class Application(QtWidgets.QApplication): @click.option( "--unsafe-dummy-conversion", "dummy_conversion", flag_value=True, hidden=True ) -@click.option( - "--enable-timeouts / --disable-timeouts", - default=True, - show_default=True, - help="Enable/Disable timeouts during document conversion", -) @click.argument( "filenames", required=False, @@ -111,9 +105,7 @@ class Application(QtWidgets.QApplication): ) @click.version_option(version=get_version(), message="%(version)s") @errors.handle_document_errors -def gui_main( - dummy_conversion: bool, filenames: Optional[List[str]], enable_timeouts: bool -) -> bool: +def gui_main(dummy_conversion: bool, filenames: Optional[List[str]]) -> bool: setup_logging() if platform.system() == "Darwin": @@ -138,7 +130,7 @@ def gui_main( qubes = Qubes() dangerzone = DangerzoneGui(app, isolation_provider=qubes) else: - container = Container(enable_timeouts=enable_timeouts) + container = Container() dangerzone = DangerzoneGui(app, isolation_provider=container) # Allow Ctrl-C to smoothly quit the program instead of throwing an exception diff --git a/dangerzone/isolation_provider/base.py b/dangerzone/isolation_provider/base.py index 23c2c84..24249ca 100644 --- a/dangerzone/isolation_provider/base.py +++ b/dangerzone/isolation_provider/base.py @@ -10,9 +10,9 @@ from typing import IO, Callable, Optional from colorama import Fore, Style from ..conversion import errors -from ..conversion.common import calculate_timeout +from ..conversion.common import INT_BYTES from ..document import Document -from ..util import Stopwatch, nonblocking_read, replace_control_chars +from ..util import replace_control_chars log = logging.getLogger(__name__) @@ -23,27 +23,26 @@ PIXELS_TO_PDF_LOG_START = "----- PIXELS TO PDF LOG START -----" PIXELS_TO_PDF_LOG_END = "----- PIXELS TO PDF LOG END -----" -def read_bytes(f: IO[bytes], size: int, timeout: float, exact: bool = True) -> bytes: +def read_bytes(f: IO[bytes], size: int, exact: bool = True) -> bytes: """Read bytes from a file-like object.""" - buf = nonblocking_read(f, size, timeout) + buf = f.read(size) if exact and len(buf) != size: raise errors.InterruptedConversionException() return buf -def read_int(f: IO[bytes], timeout: float) -> int: +def read_int(f: IO[bytes]) -> int: """Read 2 bytes from a file-like object, and decode them as int.""" - untrusted_int = read_bytes(f, 2, timeout) - if len(untrusted_int) != 2: + untrusted_int = f.read(INT_BYTES) + if len(untrusted_int) != INT_BYTES: raise errors.InterruptedConversionException() return int.from_bytes(untrusted_int, "big", signed=False) def read_debug_text(f: IO[bytes], size: int) -> str: """Read arbitrarily long text (for debug purposes)""" - timeout = calculate_timeout(size) - untrusted_text = read_bytes(f, size, timeout, exact=False) - return untrusted_text.decode("ascii", errors="replace") + untrusted_text = f.read(size).decode("ascii", errors="replace") + return replace_control_chars(untrusted_text) class IsolationProvider(ABC): @@ -51,8 +50,6 @@ class IsolationProvider(ABC): Abstracts an isolation provider """ - STARTUP_TIME_SECONDS = 0 # The maximum time it takes a the provider to start up. - def __init__(self) -> None: if getattr(sys, "dangerzone_dev", False) == True: self.proc_stderr = subprocess.PIPE @@ -105,28 +102,18 @@ class IsolationProvider(ABC): except BrokenPipeError as e: raise errors.ConverterProcException(p) - # Get file size (in MiB) - size = os.path.getsize(document.input_filename) / 1024**2 - timeout = calculate_timeout(size) + self.STARTUP_TIME_SECONDS - - assert p is not None - assert p.stdout is not None - os.set_blocking(p.stdout.fileno(), False) - - n_pages = read_int(p.stdout, timeout) + assert p.stdout + n_pages = read_int(p.stdout) if n_pages == 0 or n_pages > errors.MAX_PAGES: raise errors.MaxPagesException() percentage_per_page = 50.0 / n_pages - timeout = calculate_timeout(size, n_pages) - sw = Stopwatch(timeout) - sw.start() for page in range(1, n_pages + 1): text = f"Converting page {page}/{n_pages} to pixels" self.print_progress(document, False, text, percentage) - width = read_int(p.stdout, timeout=sw.remaining) - height = read_int(p.stdout, timeout=sw.remaining) + width = read_int(p.stdout) + height = read_int(p.stdout) if not (1 <= width <= errors.MAX_PAGE_WIDTH): raise errors.MaxPageWidthException() if not (1 <= height <= errors.MAX_PAGE_HEIGHT): @@ -136,7 +123,6 @@ class IsolationProvider(ABC): untrusted_pixels = read_bytes( p.stdout, num_pixels, - timeout=sw.remaining, ) # Wrapper code @@ -157,8 +143,7 @@ class IsolationProvider(ABC): self.print_progress(document, False, text, percentage) if getattr(sys, "dangerzone_dev", False): - assert p.stderr is not None - os.set_blocking(p.stderr.fileno(), False) + assert p.stderr untrusted_log = read_debug_text(p.stderr, MAX_CONVERSION_LOG_CHARS) p.stderr.close() log.info( diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index c348689..98f47ff 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -33,11 +33,6 @@ class NoContainerTechException(Exception): class Container(IsolationProvider): # Name of the dangerzone container CONTAINER_NAME = "dangerzone.rocks/dangerzone" - STARTUP_TIME_SECONDS = 5 - - def __init__(self, enable_timeouts: bool) -> None: - self.enable_timeouts = 1 if enable_timeouts else 0 - super().__init__() @staticmethod def get_runtime_name() -> str: @@ -227,8 +222,6 @@ class Container(IsolationProvider): f"OCR={0 if ocr_lang is None else 1}", "-e", f"OCR_LANGUAGE={ocr_lang}", - "-e", - f"ENABLE_TIMEOUTS={self.enable_timeouts}", ] pixels_to_pdf_proc = self.exec_container(command, extra_args) @@ -256,14 +249,10 @@ class Container(IsolationProvider): "-m", "dangerzone.conversion.doc_to_pixels", ] - extra_args = [ - "-e", - f"ENABLE_TIMEOUTS={self.enable_timeouts}", - ] - return self.exec_container(command, extra_args) + return self.exec_container(command) def get_max_parallel_conversions(self) -> int: - # FIXME hardcoded 1 until timeouts are more limited and better handled + # FIXME hardcoded 1 until length conversions are better handled # https://github.com/freedomofpress/dangerzone/issues/257 return 1 diff --git a/dangerzone/isolation_provider/qubes.py b/dangerzone/isolation_provider/qubes.py index adbd916..f0b96a9 100644 --- a/dangerzone/isolation_provider/qubes.py +++ b/dangerzone/isolation_provider/qubes.py @@ -23,8 +23,6 @@ log = logging.getLogger(__name__) class Qubes(IsolationProvider): """Uses a disposable qube for performing the conversion""" - STARTUP_TIME_SECONDS = 5 * 60 # 5 minutes - def install(self) -> bool: return True @@ -37,7 +35,7 @@ class Qubes(IsolationProvider): converter = PixelsToPDF(progress_callback=print_progress_wrapper) try: asyncio.run(converter.convert(ocr_lang, tempdir)) - except (RuntimeError, TimeoutError, ValueError) as e: + except (RuntimeError, ValueError) as e: raise errors.UnexpectedConversionError(str(e)) finally: if getattr(sys, "dangerzone_dev", False): diff --git a/tests/isolation_provider/test_container.py b/tests/isolation_provider/test_container.py index 3bef55c..a6de19b 100644 --- a/tests/isolation_provider/test_container.py +++ b/tests/isolation_provider/test_container.py @@ -22,7 +22,7 @@ from .base import IsolationProviderTest @pytest.fixture def provider() -> Container: - return Container(enable_timeouts=False) + return Container() class TestContainer(IsolationProviderTest): diff --git a/tests/test_ocr.py b/tests/test_ocr.py index dc23ea5..d7f0370 100644 --- a/tests/test_ocr.py +++ b/tests/test_ocr.py @@ -49,7 +49,7 @@ def test_ocr_ommisions() -> None: installed_langs -= {"osd", "equ"} # Grab the languages that Dangerzone offers to the user through the GUI/CLI. - offered_langs = set(DangerzoneCore(Container(True)).ocr_languages.values()) + offered_langs = set(DangerzoneCore(Container()).ocr_languages.values()) # Ensure that both the installed languages and the ones we offer to the user are the # same.