mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-29 10:12:38 +02:00
Flatten DangerzoneConverter methods into functions
This commit is contained in:
parent
82fc69655e
commit
c78b1ea71b
1 changed files with 422 additions and 426 deletions
|
@ -24,12 +24,13 @@ import magic
|
|||
from PIL import Image
|
||||
|
||||
|
||||
class DangerzoneConverter:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
def output(self, error: bool, text: str, percentage: float) -> None:
|
||||
print(json.dumps({"error": error, "text": text, "percentage": int(percentage)}))
|
||||
sys.stdout.flush()
|
||||
|
||||
def document_to_pixels(self) -> int:
|
||||
percentage = 0.0
|
||||
|
||||
def document_to_pixels() -> int:
|
||||
percentage: float = 0.0
|
||||
|
||||
conversions: Dict[str, Dict[str, Optional[str]]] = {
|
||||
# .pdf
|
||||
|
@ -106,7 +107,7 @@ class DangerzoneConverter:
|
|||
|
||||
# Validate MIME type
|
||||
if mime_type not in conversions:
|
||||
self.output(True, "The document format is not supported", percentage)
|
||||
output(True, "The document format is not supported", percentage)
|
||||
return 1
|
||||
|
||||
# Convert input document to PDF
|
||||
|
@ -114,7 +115,7 @@ class DangerzoneConverter:
|
|||
if conversion["type"] is None:
|
||||
pdf_filename = "/tmp/input_file"
|
||||
elif conversion["type"] == "libreoffice":
|
||||
self.output(False, "Converting to PDF using LibreOffice", percentage)
|
||||
output(False, "Converting to PDF using LibreOffice", percentage)
|
||||
args = [
|
||||
"libreoffice",
|
||||
"--headless",
|
||||
|
@ -132,7 +133,7 @@ class DangerzoneConverter:
|
|||
timeout=60,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
"Error converting document to PDF, LibreOffice timed out after 60 seconds",
|
||||
percentage,
|
||||
|
@ -140,7 +141,7 @@ class DangerzoneConverter:
|
|||
return 1
|
||||
|
||||
if p.returncode != 0:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
f"Conversion to PDF with LibreOffice failed",
|
||||
percentage,
|
||||
|
@ -148,7 +149,7 @@ class DangerzoneConverter:
|
|||
return 1
|
||||
pdf_filename = "/tmp/input_file.pdf"
|
||||
elif conversion["type"] == "convert":
|
||||
self.output(False, "Converting to PDF using GraphicsMagick", percentage)
|
||||
output(False, "Converting to PDF using GraphicsMagick", percentage)
|
||||
args = [
|
||||
"gm",
|
||||
"convert",
|
||||
|
@ -163,14 +164,14 @@ class DangerzoneConverter:
|
|||
timeout=60,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
"Error converting document to PDF, GraphicsMagick timed out after 60 seconds",
|
||||
percentage,
|
||||
)
|
||||
return 1
|
||||
if p.returncode != 0:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
"Conversion to PDF with GraphicsMagick failed",
|
||||
percentage,
|
||||
|
@ -178,7 +179,7 @@ class DangerzoneConverter:
|
|||
return 1
|
||||
pdf_filename = "/tmp/input_file.pdf"
|
||||
else:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
"Invalid conversion type",
|
||||
percentage,
|
||||
|
@ -188,7 +189,7 @@ class DangerzoneConverter:
|
|||
percentage += 3
|
||||
|
||||
# Separate PDF into pages
|
||||
self.output(
|
||||
output(
|
||||
False,
|
||||
"Separating document into pages",
|
||||
percentage,
|
||||
|
@ -199,14 +200,14 @@ class DangerzoneConverter:
|
|||
args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=60
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
"Error separating document into pages, pdfseparate timed out after 60 seconds",
|
||||
percentage,
|
||||
)
|
||||
return 1
|
||||
if p.returncode != 0:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
"Separating document into pages failed",
|
||||
percentage,
|
||||
|
@ -227,7 +228,7 @@ class DangerzoneConverter:
|
|||
height_filename = f"/tmp/page-{page}.height"
|
||||
filename_base = f"/tmp/page-{page}"
|
||||
|
||||
self.output(
|
||||
output(
|
||||
False,
|
||||
f"Converting page {page}/{len(page_filenames)} to pixels",
|
||||
percentage,
|
||||
|
@ -242,14 +243,14 @@ class DangerzoneConverter:
|
|||
timeout=60,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
"Error converting from PDF to PNG, pdftocairo timed out after 60 seconds",
|
||||
percentage,
|
||||
)
|
||||
return 1
|
||||
if p.returncode != 0:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
"Conversion from PDF to PNG failed",
|
||||
percentage,
|
||||
|
@ -278,14 +279,14 @@ class DangerzoneConverter:
|
|||
timeout=60,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
"Error converting from PNG to pixels, convert timed out after 60 seconds",
|
||||
percentage,
|
||||
)
|
||||
return 1
|
||||
if p.returncode != 0:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
"Conversion from PNG to RGB failed",
|
||||
percentage,
|
||||
|
@ -297,7 +298,7 @@ class DangerzoneConverter:
|
|||
|
||||
percentage += percentage_per_page
|
||||
|
||||
self.output(
|
||||
output(
|
||||
False,
|
||||
"Converted document to pixels",
|
||||
percentage,
|
||||
|
@ -313,7 +314,8 @@ class DangerzoneConverter:
|
|||
|
||||
return 0
|
||||
|
||||
def pixels_to_pdf(self) -> int:
|
||||
|
||||
def pixels_to_pdf() -> int:
|
||||
percentage: float = 50.0
|
||||
|
||||
num_pages = len(glob.glob("/dangerzone/page-*.rgb"))
|
||||
|
@ -334,9 +336,9 @@ class DangerzoneConverter:
|
|||
with open(height_filename) as f:
|
||||
height = f.read().strip()
|
||||
|
||||
if os.environ.get("OCR") == "1" and os.environ.get("OCR_LANGUAGE"):
|
||||
if os.environ.get("OCR") == "1":
|
||||
# OCR the document
|
||||
self.output(
|
||||
output(
|
||||
False,
|
||||
f"Converting page {page}/{num_pages} from pixels to searchable PDF",
|
||||
percentage,
|
||||
|
@ -360,14 +362,14 @@ class DangerzoneConverter:
|
|||
timeout=60,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
"Error converting pixels to PNG, convert timed out after 60 seconds",
|
||||
percentage,
|
||||
)
|
||||
return 1
|
||||
if p.returncode != 0:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
f"Page {page}/{num_pages} conversion to PNG failed",
|
||||
percentage,
|
||||
|
@ -392,14 +394,14 @@ class DangerzoneConverter:
|
|||
timeout=60,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
"Error converting PNG to searchable PDF, tesseract timed out after 60 seconds",
|
||||
percentage,
|
||||
)
|
||||
return 1
|
||||
if p.returncode != 0:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
f"Page {page}/{num_pages} OCR failed",
|
||||
percentage,
|
||||
|
@ -408,7 +410,7 @@ class DangerzoneConverter:
|
|||
|
||||
else:
|
||||
# Don't OCR
|
||||
self.output(
|
||||
output(
|
||||
False,
|
||||
f"Converting page {page}/{num_pages} from pixels to PDF",
|
||||
percentage,
|
||||
|
@ -432,14 +434,14 @@ class DangerzoneConverter:
|
|||
timeout=60,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
"Error converting RGB to PDF, convert timed out after 60 seconds",
|
||||
percentage,
|
||||
)
|
||||
return 1
|
||||
if p.returncode != 0:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
f"Page {page}/{num_pages} conversion to PDF failed",
|
||||
percentage,
|
||||
|
@ -449,7 +451,7 @@ class DangerzoneConverter:
|
|||
percentage += percentage_per_page
|
||||
|
||||
# Merge pages into a single PDF
|
||||
self.output(
|
||||
output(
|
||||
False,
|
||||
f"Merging {num_pages} pages into a single PDF",
|
||||
percentage,
|
||||
|
@ -463,14 +465,14 @@ class DangerzoneConverter:
|
|||
args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=60
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
"Error merging pages into a single PDF, pdfunite timed out after 60 seconds",
|
||||
percentage,
|
||||
)
|
||||
return 1
|
||||
if p.returncode != 0:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
"Merging pages into a single PDF failed",
|
||||
percentage,
|
||||
|
@ -480,7 +482,7 @@ class DangerzoneConverter:
|
|||
percentage += 2
|
||||
|
||||
# Compress
|
||||
self.output(
|
||||
output(
|
||||
False,
|
||||
f"Compressing PDF",
|
||||
percentage,
|
||||
|
@ -494,14 +496,14 @@ class DangerzoneConverter:
|
|||
timeout=compress_timeout,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
f"Error compressing PDF, ps2pdf timed out after {compress_timeout} seconds",
|
||||
percentage,
|
||||
)
|
||||
return 1
|
||||
if p.returncode != 0:
|
||||
self.output(
|
||||
output(
|
||||
True,
|
||||
f"Compressing PDF failed",
|
||||
percentage,
|
||||
|
@ -509,7 +511,7 @@ class DangerzoneConverter:
|
|||
return 1
|
||||
|
||||
percentage = 100.0
|
||||
self.output(False, "Safe PDF created", percentage)
|
||||
output(False, "Safe PDF created", percentage)
|
||||
|
||||
# Move converted files into /safezone
|
||||
shutil.move("/tmp/safe-output.pdf", "/safezone")
|
||||
|
@ -517,23 +519,17 @@ class DangerzoneConverter:
|
|||
|
||||
return 0
|
||||
|
||||
def output(self, error: bool, text: str, percentage: float) -> None:
|
||||
print(json.dumps({"error": error, "text": text, "percentage": int(percentage)}))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) != 2:
|
||||
print(f"Usage: {sys.argv[0]} [document-to-pixels]|[pixels-to-pdf]")
|
||||
return -1
|
||||
|
||||
converter = DangerzoneConverter()
|
||||
|
||||
if sys.argv[1] == "document-to-pixels":
|
||||
return converter.document_to_pixels()
|
||||
return document_to_pixels()
|
||||
|
||||
if sys.argv[1] == "pixels-to-pdf":
|
||||
return converter.pixels_to_pdf()
|
||||
return pixels_to_pdf()
|
||||
|
||||
return -1
|
||||
|
||||
|
|
Loading…
Reference in a new issue