From a1c87a207aa3aa4216f1eb0f5b46199ca27d7606 Mon Sep 17 00:00:00 2001 From: Alex Pyrgiotis Date: Mon, 27 Mar 2023 20:01:52 +0300 Subject: [PATCH] container: Allow converting more document formats Remove the association between MIME types and export filters, because LibreOffice is able to auto-detect them on its own. Instead, ask LibreOffice to simply convert the document to a .pdf. This association was cumbersome for yet another reason; there are MIME types that may be associated with more than one file type. That's why it's better to let LibreOffice decide the proper filter for the conversion. Our current understanding is that this change won't widen our attack surface for the following reasons: * The output filters for PDF documents are pretty specific, and we don't affect the input filters somehow. * The default behavior of LibreOffice on Alpine Linux is to disable macros. Closes #369 --- CHANGELOG.md | 1 + container/dangerzone.py | 35 ++++++++++++++++++++++------------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9fc2d5..284fbf6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ since 0.4.1, and this project adheres to [Semantic Versioning](https://semver.or - Development: Add dummy isolation provider for testing non-conversion-related issues in virtualized Windows and MacOS, where Docker can't run, due to the lack of nested virtualization ([issue #229](https://github.com/freedomofpress/dangerzone/issues/229)) +- Add support for more MIME types that were previously disregarded ([issue #377](https://github.com/freedomofpress/dangerzone/issues/377)) ### Changed diff --git a/container/dangerzone.py b/container/dangerzone.py index 4996c23..debf1d0 100644 --- a/container/dangerzone.py +++ b/container/dangerzone.py @@ -141,57 +141,66 @@ class DangerzoneConverter: # .docx "application/vnd.openxmlformats-officedocument.wordprocessingml.document": { "type": "libreoffice", - "libreoffice_output_filter": "writer_pdf_Export", }, # .doc "application/msword": { "type": "libreoffice", - "libreoffice_output_filter": "writer_pdf_Export", }, # .docm "application/vnd.ms-word.document.macroEnabled.12": { "type": "libreoffice", - "libreoffice_output_filter": "writer_pdf_Export", }, # .xlsx "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": { "type": "libreoffice", - "libreoffice_output_filter": "calc_pdf_Export", }, # .xls "application/vnd.ms-excel": { "type": "libreoffice", - "libreoffice_output_filter": "calc_pdf_Export", }, # .pptx "application/vnd.openxmlformats-officedocument.presentationml.presentation": { "type": "libreoffice", - "libreoffice_output_filter": "impress_pdf_Export", }, # .ppt "application/vnd.ms-powerpoint": { "type": "libreoffice", - "libreoffice_output_filter": "impress_pdf_Export", }, # .odt "application/vnd.oasis.opendocument.text": { "type": "libreoffice", - "libreoffice_output_filter": "writer_pdf_Export", }, # .odg "application/vnd.oasis.opendocument.graphics": { "type": "libreoffice", - "libreoffice_output_filter": "impress_pdf_Export", }, # .odp "application/vnd.oasis.opendocument.presentation": { "type": "libreoffice", - "libreoffice_output_filter": "impress_pdf_Export", }, - # .ops + # .ods "application/vnd.oasis.opendocument.spreadsheet": { "type": "libreoffice", - "libreoffice_output_filter": "calc_pdf_Export", + }, + # .ods / .ots + "application/vnd.oasis.opendocument.spreadsheet-template": { + "type": "libreoffice", + }, + # .odt / .ott + "application/vnd.oasis.opendocument.text-template": { + "type": "libreoffice", + }, + # At least .odt, .docx, .odg, .odp, .ods, and .pptx + "application/zip": { + "type": "libreoffice", + }, + # At least .doc, .docx, .odg, .odp, .odt, .pdf, .ppt, .pptx, .xls, and .xlsx + "application/octet-stream": { + "type": "libreoffice", + }, + # At least .doc, .ppt, and .xls + "application/x-ole-storage": { + "type": "libreoffice", }, # .jpg "image/jpeg": {"type": "convert"}, @@ -230,7 +239,7 @@ class DangerzoneConverter: "libreoffice", "--headless", "--convert-to", - f"pdf:{conversion['libreoffice_output_filter']}", + "pdf", "--outdir", "/tmp", "/tmp/input_file",