mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00
Compress per page when not using OCR
Make the compression happen per page when OCR is not enabled [1]. [1]: https://github.com/freedomofpress/dangerzone/pull/622#discussion_r1410986342
This commit is contained in:
parent
e2531279c0
commit
f1d90c6fa9
1 changed files with 6 additions and 4 deletions
|
@ -58,19 +58,21 @@ class PixelsToPDF(DangerzoneConverter):
|
||||||
self.update_progress(
|
self.update_progress(
|
||||||
f"Converting page {page_num}/{num_pages} from pixels to searchable PDF"
|
f"Converting page {page_num}/{num_pages} from pixels to searchable PDF"
|
||||||
)
|
)
|
||||||
ocr_pdf_bytes = pixmap.pdfocr_tobytes(
|
page_pdf_bytes = pixmap.pdfocr_tobytes(
|
||||||
compress=True,
|
compress=True,
|
||||||
language=ocr_lang,
|
language=ocr_lang,
|
||||||
tessdata=get_tessdata_dir(),
|
tessdata=get_tessdata_dir(),
|
||||||
)
|
)
|
||||||
ocr_pdf = fitz.open("pdf", ocr_pdf_bytes)
|
ocr_pdf = fitz.open("pdf", page_pdf_bytes)
|
||||||
safe_doc.insert_pdf(ocr_pdf)
|
|
||||||
else: # Don't OCR
|
else: # Don't OCR
|
||||||
self.update_progress(
|
self.update_progress(
|
||||||
f"Converting page {page_num}/{num_pages} from pixels to PDF"
|
f"Converting page {page_num}/{num_pages} from pixels to PDF"
|
||||||
)
|
)
|
||||||
safe_doc.insert_file(pixmap)
|
page_doc = fitz.Document()
|
||||||
|
page_doc.insert_file(pixmap)
|
||||||
|
page_pdf_bytes = page_doc.tobytes(deflate_images=True)
|
||||||
|
|
||||||
|
safe_doc.insert_pdf(fitz.open("pdf", page_pdf_bytes))
|
||||||
self.percentage += percentage_per_page
|
self.percentage += percentage_per_page
|
||||||
|
|
||||||
# Next operations apply to the all the pages, so we need to recalculate the
|
# Next operations apply to the all the pages, so we need to recalculate the
|
||||||
|
|
Loading…
Reference in a new issue