Optimize PDFtoPPM batch conversion to run continuously

Previously the PDFtoPPM conversion in batches would stop after
conversion to send the data. But by sending the data in the following
loop, we can perform the data sending at the "same time" as converting
a batch.
This commit is contained in:
deeplow 2023-10-25 16:42:42 +01:00
parent 3046cb7b8b
commit 45a71224cb
No known key found for this signature in database
GPG key ID: 577982871529A52A

View file

@ -288,8 +288,27 @@ class DocumentToPixels(DangerzoneConverter):
else: else:
timeout_per_batch = timeout / (int(num_pages / PAGE_BATCH_SIZE) + 1) timeout_per_batch = timeout / (int(num_pages / PAGE_BATCH_SIZE) + 1)
for first_page, last_page in batch_iterator(num_pages): for first_page, last_page in batch_iterator(num_pages):
await self.pdf_to_rgb(first_page, last_page, pdf_filename, timeout_per_batch) # XXX send data from the previous loop's conversion to
await self.send_rgb_files(first_page, last_page, num_pages) # always be able to process and send data at the same time
if first_page == 1: # If in first pass
await self.pdf_to_rgb(
first_page, last_page, pdf_filename, timeout_per_batch
)
delayed_send_rgb_files = self.send_rgb_files(
first_page, last_page, num_pages
)
else:
await asyncio.gather(
self.pdf_to_rgb(
first_page, last_page, pdf_filename, timeout_per_batch
),
delayed_send_rgb_files,
)
delayed_send_rgb_files = self.send_rgb_files(
first_page, last_page, num_pages
)
await delayed_send_rgb_files
final_files = ( final_files = (
glob.glob("/tmp/page-*.rgb") glob.glob("/tmp/page-*.rgb")