Replace 'convert' with PyMuPDF for images

PyMuPDF can also convert images of the types we already support so we
don't need ImageMagick's 'convert'.
This commit is contained in:
deeplow 2023-11-16 11:23:55 +00:00
parent a3a64882a3
commit e5dbe25abb
No known key found for this signature in database
GPG key ID: 577982871529A52A

View file

@ -146,14 +146,14 @@ class DocumentToPixels(DangerzoneConverter):
"type": "libreoffice", "type": "libreoffice",
}, },
# .jpg # .jpg
"image/jpeg": {"type": "convert"}, "image/jpeg": {"type": None},
# .gif # .gif
"image/gif": {"type": "convert"}, "image/gif": {"type": None},
# .png # .png
"image/png": {"type": "convert"}, "image/png": {"type": None},
# .tif # .tif
"image/tiff": {"type": "convert"}, "image/tiff": {"type": None},
"image/x-tiff": {"type": "convert"}, "image/x-tiff": {"type": None},
} }
# Detect MIME type # Detect MIME type
@ -182,7 +182,7 @@ class DocumentToPixels(DangerzoneConverter):
# Convert input document to PDF # Convert input document to PDF
conversion = conversions[mime_type] conversion = conversions[mime_type]
if conversion["type"] is None: if conversion["type"] is None:
pdf_filename = "/tmp/input_file" doc = fitz.open("/tmp/input_file", filetype=mime_type)
elif conversion["type"] == "libreoffice": elif conversion["type"] == "libreoffice":
libreoffice_ext = conversion.get("libreoffice_ext", None) libreoffice_ext = conversion.get("libreoffice_ext", None)
# Disable conversion for HWP/HWPX on specific platforms. See: # Disable conversion for HWP/HWPX on specific platforms. See:
@ -220,24 +220,7 @@ class DocumentToPixels(DangerzoneConverter):
# https://github.com/freedomofpress/dangerzone/issues/494 # https://github.com/freedomofpress/dangerzone/issues/494
if not os.path.exists(pdf_filename): if not os.path.exists(pdf_filename):
raise errors.LibreofficeFailure() raise errors.LibreofficeFailure()
elif conversion["type"] == "convert": doc = fitz.open(pdf_filename)
self.update_progress("Converting to PDF using GraphicsMagick")
args = [
"gm",
"convert",
"/tmp/input_file",
"/tmp/input_file.pdf",
]
await self.run_command(
args,
error_message="Conversion to PDF with GraphicsMagick failed",
timeout_message=(
"Error converting document to PDF, GraphicsMagick timed out after"
f" {timeout} seconds"
),
timeout=timeout,
)
pdf_filename = "/tmp/input_file.pdf"
else: else:
raise errors.InvalidGMConversion( raise errors.InvalidGMConversion(
f"Invalid conversion type {conversion['type']} for MIME type {mime_type}" f"Invalid conversion type {conversion['type']} for MIME type {mime_type}"
@ -245,7 +228,6 @@ class DocumentToPixels(DangerzoneConverter):
self.percentage += 3 self.percentage += 3
# Obtain number of pages # Obtain number of pages
doc = fitz.open(pdf_filename)
if doc.page_count > errors.MAX_PAGES: if doc.page_count > errors.MAX_PAGES:
raise errors.MaxPagesException() raise errors.MaxPagesException()
await self.write_page_count(doc.page_count) await self.write_page_count(doc.page_count)