mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-29 10:12:38 +02:00
219 lines
6.8 KiB
Python
Executable file
219 lines
6.8 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
import sys
|
|
import subprocess
|
|
import glob
|
|
import shutil
|
|
import os
|
|
|
|
import magic
|
|
from PIL import Image
|
|
|
|
|
|
def print_flush(s):
|
|
print(s)
|
|
sys.stdout.flush()
|
|
|
|
|
|
def main():
|
|
conversions = {
|
|
# .pdf
|
|
"application/pdf": {"type": None},
|
|
# .docx
|
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": {
|
|
"type": "libreoffice",
|
|
"libreoffice_output_filter": "writer_pdf_Export",
|
|
},
|
|
# .doc
|
|
"application/msword": {
|
|
"type": "libreoffice",
|
|
"libreoffice_output_filter": "writer_pdf_Export",
|
|
},
|
|
# .docm
|
|
"application/vnd.ms-word.document.macroEnabled.12": {
|
|
"type": "libreoffice",
|
|
"libreoffice_output_filter": "writer_pdf_Export",
|
|
},
|
|
# .xlsx
|
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": {
|
|
"type": "libreoffice",
|
|
"libreoffice_output_filter": "calc_pdf_Export",
|
|
},
|
|
# .xls
|
|
"application/vnd.ms-excel": {
|
|
"type": "libreoffice",
|
|
"libreoffice_output_filter": "calc_pdf_Export",
|
|
},
|
|
# .pptx
|
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation": {
|
|
"type": "libreoffice",
|
|
"libreoffice_output_filter": "impress_pdf_Export",
|
|
},
|
|
# .ppt
|
|
"application/vnd.ms-powerpoint": {
|
|
"type": "libreoffice",
|
|
"libreoffice_output_filter": "impress_pdf_Export",
|
|
},
|
|
# .odt
|
|
"application/vnd.oasis.opendocument.text": {
|
|
"type": "libreoffice",
|
|
"libreoffice_output_filter": "writer_pdf_Export",
|
|
},
|
|
# .odg
|
|
"application/vnd.oasis.opendocument.graphics": {
|
|
"type": "libreoffice",
|
|
"libreoffice_output_filter": "impress_pdf_Export",
|
|
},
|
|
# .odp
|
|
"application/vnd.oasis.opendocument.presentation": {
|
|
"type": "libreoffice",
|
|
"libreoffice_output_filter": "impress_pdf_Export",
|
|
},
|
|
# .ops
|
|
"application/vnd.oasis.opendocument.spreadsheet": {
|
|
"type": "libreoffice",
|
|
"libreoffice_output_filter": "calc_pdf_Export",
|
|
},
|
|
# .jpg
|
|
"image/jpeg": {"type": "convert"},
|
|
# .gif
|
|
"image/gif": {"type": "convert"},
|
|
# .png
|
|
"image/png": {"type": "convert"},
|
|
# .tif
|
|
"image/tiff": {"type": "convert"},
|
|
"image/x-tiff": {"type": "convert"},
|
|
}
|
|
|
|
# Detect MIME type
|
|
mime = magic.Magic(mime=True)
|
|
mime_type = mime.from_file("/tmp/input_file")
|
|
|
|
# Validate MIME type
|
|
if mime_type not in conversions:
|
|
print_flush("The document format is not supported")
|
|
sys.exit(1)
|
|
|
|
# Convert input document to PDF
|
|
conversion = conversions[mime_type]
|
|
if conversion["type"] is None:
|
|
pdf_filename = "/tmp/input_file"
|
|
elif conversion["type"] == "libreoffice":
|
|
print_flush(f"Converting to PDF using LibreOffice")
|
|
args = [
|
|
"libreoffice",
|
|
"--headless",
|
|
"--convert-to",
|
|
f"pdf:{conversion['libreoffice_output_filter']}",
|
|
"--outdir",
|
|
"/tmp",
|
|
"/tmp/input_file",
|
|
]
|
|
try:
|
|
p = subprocess.run(args, timeout=60)
|
|
except subprocess.TimeoutExpired:
|
|
print_flush(
|
|
"Error converting document to PDF, LibreOffice timed out after 60 seconds"
|
|
)
|
|
sys.exit(1)
|
|
|
|
if p.returncode != 0:
|
|
print_flush(f"Conversion to PDF failed: {p.stdout}")
|
|
sys.exit(1)
|
|
pdf_filename = "/tmp/input_file.pdf"
|
|
elif conversion["type"] == "convert":
|
|
print_flush(f"Converting to PDF using GraphicsMagick")
|
|
args = [
|
|
"gm",
|
|
"convert",
|
|
"/tmp/input_file",
|
|
"/tmp/input_file.pdf",
|
|
]
|
|
try:
|
|
p = subprocess.run(args, timeout=60)
|
|
except subprocess.TimeoutExpired:
|
|
print_flush(
|
|
"Error converting document to PDF, GraphicsMagick timed out after 60 seconds"
|
|
)
|
|
sys.exit(1)
|
|
if p.returncode != 0:
|
|
print_flush(f"Conversion to PDF failed: {p.stdout}")
|
|
sys.exit(1)
|
|
pdf_filename = "/tmp/input_file.pdf"
|
|
else:
|
|
print_flush("Invalid conversion type")
|
|
sys.exit(1)
|
|
|
|
# Separate PDF into pages
|
|
print_flush("")
|
|
print_flush(f"Separating document into pages")
|
|
args = ["pdftk", pdf_filename, "burst", "output", "/tmp/page-%d.pdf"]
|
|
try:
|
|
p = subprocess.run(args, timeout=60)
|
|
except subprocess.TimeoutExpired:
|
|
print_flush(
|
|
"Error separating document into pages, pdfseparate timed out after 60 seconds"
|
|
)
|
|
sys.exit(1)
|
|
if p.returncode != 0:
|
|
print_flush(f"Separating document into pages failed: {p.stdout}")
|
|
sys.exit(1)
|
|
|
|
page_filenames = glob.glob("/tmp/page-*.pdf")
|
|
print_flush(f"Document has {len(page_filenames)} pages")
|
|
print_flush("")
|
|
|
|
# Convert to RGB pixel data
|
|
for page in range(1, len(page_filenames) + 1):
|
|
pdf_filename = f"/tmp/page-{page}.pdf"
|
|
png_filename = f"/tmp/page-{page}.png"
|
|
rgb_filename = f"/tmp/page-{page}.rgb"
|
|
width_filename = f"/tmp/page-{page}.width"
|
|
height_filename = f"/tmp/page-{page}.height"
|
|
filename_base = f"/tmp/page-{page}"
|
|
|
|
print_flush(f"Converting page {page} to pixels")
|
|
|
|
# Convert to png
|
|
try:
|
|
p = subprocess.run(
|
|
["pdftocairo", pdf_filename, "-png", "-singlefile", filename_base],
|
|
timeout=60,
|
|
)
|
|
except subprocess.TimeoutExpired:
|
|
print_flush(
|
|
"Error converting from PDF to PNG, pdftocairo timed out after 60 seconds"
|
|
)
|
|
sys.exit(1)
|
|
if p.returncode != 0:
|
|
print_flush(f"Conversion from PDF to PNG failed: {p.stdout}")
|
|
sys.exit(1)
|
|
|
|
# Save the width and height
|
|
im = Image.open(png_filename)
|
|
width, height = im.size
|
|
with open(width_filename, "w") as f:
|
|
f.write(str(width))
|
|
with open(height_filename, "w") as f:
|
|
f.write(str(height))
|
|
|
|
# Convert to RGB pixels
|
|
try:
|
|
p = subprocess.run(
|
|
["gm", "convert", png_filename, "-depth", "8", f"rgb:{rgb_filename}"],
|
|
timeout=60,
|
|
)
|
|
except subprocess.TimeoutExpired:
|
|
print_flush(
|
|
"Error converting from PNG to pixels, convert timed out after 60 seconds"
|
|
)
|
|
sys.exit(1)
|
|
if p.returncode != 0:
|
|
print_flush(f"Conversion from PNG to RGB failed: {p.stdout}")
|
|
sys.exit(1)
|
|
|
|
# Delete the png
|
|
os.remove(png_filename)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|