Compress per page when not using OCR

Make the compression happen per page when OCR is not enabled [1].

[1]: https://github.com/freedomofpress/dangerzone/pull/622#discussion_r1410986342
This commit is contained in:
deeplow 2024-01-03 12:04:27 +00:00
parent e2531279c0
commit f1d90c6fa9
No known key found for this signature in database
GPG key ID: 577982871529A52A

View file

@ -58,19 +58,21 @@ class PixelsToPDF(DangerzoneConverter):
self.update_progress( self.update_progress(
f"Converting page {page_num}/{num_pages} from pixels to searchable PDF" f"Converting page {page_num}/{num_pages} from pixels to searchable PDF"
) )
ocr_pdf_bytes = pixmap.pdfocr_tobytes( page_pdf_bytes = pixmap.pdfocr_tobytes(
compress=True, compress=True,
language=ocr_lang, language=ocr_lang,
tessdata=get_tessdata_dir(), tessdata=get_tessdata_dir(),
) )
ocr_pdf = fitz.open("pdf", ocr_pdf_bytes) ocr_pdf = fitz.open("pdf", page_pdf_bytes)
safe_doc.insert_pdf(ocr_pdf)
else: # Don't OCR else: # Don't OCR
self.update_progress( self.update_progress(
f"Converting page {page_num}/{num_pages} from pixels to PDF" f"Converting page {page_num}/{num_pages} from pixels to PDF"
) )
safe_doc.insert_file(pixmap) page_doc = fitz.Document()
page_doc.insert_file(pixmap)
page_pdf_bytes = page_doc.tobytes(deflate_images=True)
safe_doc.insert_pdf(fitz.open("pdf", page_pdf_bytes))
self.percentage += percentage_per_page self.percentage += percentage_per_page
# Next operations apply to the all the pages, so we need to recalculate the # Next operations apply to the all the pages, so we need to recalculate the