Compress per page when not using OCR

Make the compression happen per page when OCR is not enabled [1].

[1]: https://github.com/freedomofpress/dangerzone/pull/622#discussion_r1410986342
This commit is contained in:
deeplow 2024-01-03 12:04:27 +00:00
parent e2531279c0
commit f1d90c6fa9
No known key found for this signature in database
GPG key ID: 577982871529A52A

View file

@ -58,19 +58,21 @@ class PixelsToPDF(DangerzoneConverter):
self.update_progress(
f"Converting page {page_num}/{num_pages} from pixels to searchable PDF"
)
ocr_pdf_bytes = pixmap.pdfocr_tobytes(
page_pdf_bytes = pixmap.pdfocr_tobytes(
compress=True,
language=ocr_lang,
tessdata=get_tessdata_dir(),
)
ocr_pdf = fitz.open("pdf", ocr_pdf_bytes)
safe_doc.insert_pdf(ocr_pdf)
ocr_pdf = fitz.open("pdf", page_pdf_bytes)
else: # Don't OCR
self.update_progress(
f"Converting page {page_num}/{num_pages} from pixels to PDF"
)
safe_doc.insert_file(pixmap)
page_doc = fitz.Document()
page_doc.insert_file(pixmap)
page_pdf_bytes = page_doc.tobytes(deflate_images=True)
safe_doc.insert_pdf(fitz.open("pdf", page_pdf_bytes))
self.percentage += percentage_per_page
# Next operations apply to the all the pages, so we need to recalculate the