From 45a71224cb671b747e2eff54dcaf28c6c700b6f6 Mon Sep 17 00:00:00 2001 From: deeplow Date: Wed, 25 Oct 2023 16:42:42 +0100 Subject: [PATCH] Optimize PDFtoPPM batch conversion to run continuously Previously the PDFtoPPM conversion in batches would stop after conversion to send the data. But by sending the data in the following loop, we can perform the data sending at the "same time" as converting a batch. --- dangerzone/conversion/doc_to_pixels.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/dangerzone/conversion/doc_to_pixels.py b/dangerzone/conversion/doc_to_pixels.py index 6e64972..f871bf4 100644 --- a/dangerzone/conversion/doc_to_pixels.py +++ b/dangerzone/conversion/doc_to_pixels.py @@ -288,8 +288,27 @@ class DocumentToPixels(DangerzoneConverter): else: timeout_per_batch = timeout / (int(num_pages / PAGE_BATCH_SIZE) + 1) for first_page, last_page in batch_iterator(num_pages): - await self.pdf_to_rgb(first_page, last_page, pdf_filename, timeout_per_batch) - await self.send_rgb_files(first_page, last_page, num_pages) + # XXX send data from the previous loop's conversion to + # always be able to process and send data at the same time + if first_page == 1: # If in first pass + await self.pdf_to_rgb( + first_page, last_page, pdf_filename, timeout_per_batch + ) + delayed_send_rgb_files = self.send_rgb_files( + first_page, last_page, num_pages + ) + else: + await asyncio.gather( + self.pdf_to_rgb( + first_page, last_page, pdf_filename, timeout_per_batch + ), + delayed_send_rgb_files, + ) + delayed_send_rgb_files = self.send_rgb_files( + first_page, last_page, num_pages + ) + + await delayed_send_rgb_files final_files = ( glob.glob("/tmp/page-*.rgb")