mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-29 18:22:37 +02:00
Replace pdfinfo logic (get # pages) with PyMuPDF
This commit is contained in:
parent
327ab8791f
commit
317deadbe4
1 changed files with 10 additions and 25 deletions
|
@ -245,33 +245,16 @@ class DocumentToPixels(DangerzoneConverter):
|
||||||
self.percentage += 3
|
self.percentage += 3
|
||||||
|
|
||||||
# Obtain number of pages
|
# Obtain number of pages
|
||||||
self.update_progress("Calculating number of pages")
|
doc = fitz.open(pdf_filename)
|
||||||
stdout, _ = await self.run_command(
|
if doc.page_count > errors.MAX_PAGES:
|
||||||
["pdfinfo", pdf_filename],
|
|
||||||
error_message="PDF file is corrupted",
|
|
||||||
timeout_message=(
|
|
||||||
f"Extracting metadata from PDF timed out after {timeout} second"
|
|
||||||
),
|
|
||||||
timeout=timeout,
|
|
||||||
)
|
|
||||||
|
|
||||||
search = re.search(r"^Pages:\s*(\d+)\s*\n", stdout.decode(), re.MULTILINE)
|
|
||||||
if search is not None:
|
|
||||||
num_pages: int = int(search.group(1))
|
|
||||||
else:
|
|
||||||
raise errors.NoPageCountException()
|
|
||||||
|
|
||||||
if num_pages > errors.MAX_PAGES:
|
|
||||||
raise errors.MaxPagesException()
|
raise errors.MaxPagesException()
|
||||||
|
await self.write_page_count(doc.page_count)
|
||||||
await self.write_page_count(num_pages)
|
|
||||||
|
|
||||||
# Get a more precise timeout, based on the number of pages
|
# Get a more precise timeout, based on the number of pages
|
||||||
timeout = self.calculate_timeout(size, num_pages)
|
timeout = self.calculate_timeout(size, doc.page_count)
|
||||||
percentage_per_page = 45.0 / num_pages
|
percentage_per_page = 45.0 / doc.page_count
|
||||||
page_base = "/tmp/page"
|
page_base = "/tmp/page"
|
||||||
doc = fitz.open(pdf_filename)
|
for page in doc.pages():
|
||||||
for page in doc:
|
|
||||||
# TODO check if page.number is doc-controlled
|
# TODO check if page.number is doc-controlled
|
||||||
page_num = page.number + 1 # pages start in 1
|
page_num = page.number + 1 # pages start in 1
|
||||||
rgb_filename = f"{page_base}-{page_num}.rgb"
|
rgb_filename = f"{page_base}-{page_num}.rgb"
|
||||||
|
@ -279,7 +262,9 @@ class DocumentToPixels(DangerzoneConverter):
|
||||||
height_filename = f"{page_base}-{page_num}.height"
|
height_filename = f"{page_base}-{page_num}.height"
|
||||||
|
|
||||||
self.percentage += percentage_per_page
|
self.percentage += percentage_per_page
|
||||||
self.update_progress(f"Converting page {page_num}/{num_pages} to pixels")
|
self.update_progress(
|
||||||
|
f"Converting page {page_num}/{doc.page_count} to pixels"
|
||||||
|
)
|
||||||
pix = page.get_pixmap(dpi=150)
|
pix = page.get_pixmap(dpi=150)
|
||||||
rgb_buf = pix.samples_mv
|
rgb_buf = pix.samples_mv
|
||||||
await self.write_page_width(pix.width, width_filename)
|
await self.write_page_width(pix.width, width_filename)
|
||||||
|
@ -293,7 +278,7 @@ class DocumentToPixels(DangerzoneConverter):
|
||||||
)
|
)
|
||||||
|
|
||||||
# XXX: Sanity check to avoid situations like #560.
|
# XXX: Sanity check to avoid situations like #560.
|
||||||
if not running_on_qubes() and len(final_files) != 3 * num_pages:
|
if not running_on_qubes() and len(final_files) != 3 * doc.page_count:
|
||||||
raise errors.PageCountMismatch()
|
raise errors.PageCountMismatch()
|
||||||
|
|
||||||
# Move converted files into /tmp/dangerzone
|
# Move converted files into /tmp/dangerzone
|
||||||
|
|
Loading…
Reference in a new issue