Flatten DangerzoneConverter methods into functions

This commit is contained in:
Guthrie McAfee Armstrong 2022-06-05 08:50:11 -04:00 committed by deeplow
parent 82fc69655e
commit c78b1ea71b
No known key found for this signature in database
GPG key ID: 577982871529A52A

View file

@ -24,12 +24,13 @@ import magic
from PIL import Image from PIL import Image
class DangerzoneConverter: def output(self, error: bool, text: str, percentage: float) -> None:
def __init__(self) -> None: print(json.dumps({"error": error, "text": text, "percentage": int(percentage)}))
pass sys.stdout.flush()
def document_to_pixels(self) -> int:
percentage = 0.0 def document_to_pixels() -> int:
percentage: float = 0.0
conversions: Dict[str, Dict[str, Optional[str]]] = { conversions: Dict[str, Dict[str, Optional[str]]] = {
# .pdf # .pdf
@ -106,7 +107,7 @@ class DangerzoneConverter:
# Validate MIME type # Validate MIME type
if mime_type not in conversions: if mime_type not in conversions:
self.output(True, "The document format is not supported", percentage) output(True, "The document format is not supported", percentage)
return 1 return 1
# Convert input document to PDF # Convert input document to PDF
@ -114,7 +115,7 @@ class DangerzoneConverter:
if conversion["type"] is None: if conversion["type"] is None:
pdf_filename = "/tmp/input_file" pdf_filename = "/tmp/input_file"
elif conversion["type"] == "libreoffice": elif conversion["type"] == "libreoffice":
self.output(False, "Converting to PDF using LibreOffice", percentage) output(False, "Converting to PDF using LibreOffice", percentage)
args = [ args = [
"libreoffice", "libreoffice",
"--headless", "--headless",
@ -132,7 +133,7 @@ class DangerzoneConverter:
timeout=60, timeout=60,
) )
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
self.output( output(
True, True,
"Error converting document to PDF, LibreOffice timed out after 60 seconds", "Error converting document to PDF, LibreOffice timed out after 60 seconds",
percentage, percentage,
@ -140,7 +141,7 @@ class DangerzoneConverter:
return 1 return 1
if p.returncode != 0: if p.returncode != 0:
self.output( output(
True, True,
f"Conversion to PDF with LibreOffice failed", f"Conversion to PDF with LibreOffice failed",
percentage, percentage,
@ -148,7 +149,7 @@ class DangerzoneConverter:
return 1 return 1
pdf_filename = "/tmp/input_file.pdf" pdf_filename = "/tmp/input_file.pdf"
elif conversion["type"] == "convert": elif conversion["type"] == "convert":
self.output(False, "Converting to PDF using GraphicsMagick", percentage) output(False, "Converting to PDF using GraphicsMagick", percentage)
args = [ args = [
"gm", "gm",
"convert", "convert",
@ -163,14 +164,14 @@ class DangerzoneConverter:
timeout=60, timeout=60,
) )
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
self.output( output(
True, True,
"Error converting document to PDF, GraphicsMagick timed out after 60 seconds", "Error converting document to PDF, GraphicsMagick timed out after 60 seconds",
percentage, percentage,
) )
return 1 return 1
if p.returncode != 0: if p.returncode != 0:
self.output( output(
True, True,
"Conversion to PDF with GraphicsMagick failed", "Conversion to PDF with GraphicsMagick failed",
percentage, percentage,
@ -178,7 +179,7 @@ class DangerzoneConverter:
return 1 return 1
pdf_filename = "/tmp/input_file.pdf" pdf_filename = "/tmp/input_file.pdf"
else: else:
self.output( output(
True, True,
"Invalid conversion type", "Invalid conversion type",
percentage, percentage,
@ -188,7 +189,7 @@ class DangerzoneConverter:
percentage += 3 percentage += 3
# Separate PDF into pages # Separate PDF into pages
self.output( output(
False, False,
"Separating document into pages", "Separating document into pages",
percentage, percentage,
@ -199,14 +200,14 @@ class DangerzoneConverter:
args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=60 args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=60
) )
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
self.output( output(
True, True,
"Error separating document into pages, pdfseparate timed out after 60 seconds", "Error separating document into pages, pdfseparate timed out after 60 seconds",
percentage, percentage,
) )
return 1 return 1
if p.returncode != 0: if p.returncode != 0:
self.output( output(
True, True,
"Separating document into pages failed", "Separating document into pages failed",
percentage, percentage,
@ -227,7 +228,7 @@ class DangerzoneConverter:
height_filename = f"/tmp/page-{page}.height" height_filename = f"/tmp/page-{page}.height"
filename_base = f"/tmp/page-{page}" filename_base = f"/tmp/page-{page}"
self.output( output(
False, False,
f"Converting page {page}/{len(page_filenames)} to pixels", f"Converting page {page}/{len(page_filenames)} to pixels",
percentage, percentage,
@ -242,14 +243,14 @@ class DangerzoneConverter:
timeout=60, timeout=60,
) )
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
self.output( output(
True, True,
"Error converting from PDF to PNG, pdftocairo timed out after 60 seconds", "Error converting from PDF to PNG, pdftocairo timed out after 60 seconds",
percentage, percentage,
) )
return 1 return 1
if p.returncode != 0: if p.returncode != 0:
self.output( output(
True, True,
"Conversion from PDF to PNG failed", "Conversion from PDF to PNG failed",
percentage, percentage,
@ -278,14 +279,14 @@ class DangerzoneConverter:
timeout=60, timeout=60,
) )
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
self.output( output(
True, True,
"Error converting from PNG to pixels, convert timed out after 60 seconds", "Error converting from PNG to pixels, convert timed out after 60 seconds",
percentage, percentage,
) )
return 1 return 1
if p.returncode != 0: if p.returncode != 0:
self.output( output(
True, True,
"Conversion from PNG to RGB failed", "Conversion from PNG to RGB failed",
percentage, percentage,
@ -297,7 +298,7 @@ class DangerzoneConverter:
percentage += percentage_per_page percentage += percentage_per_page
self.output( output(
False, False,
"Converted document to pixels", "Converted document to pixels",
percentage, percentage,
@ -313,7 +314,8 @@ class DangerzoneConverter:
return 0 return 0
def pixels_to_pdf(self) -> int:
def pixels_to_pdf() -> int:
percentage: float = 50.0 percentage: float = 50.0
num_pages = len(glob.glob("/dangerzone/page-*.rgb")) num_pages = len(glob.glob("/dangerzone/page-*.rgb"))
@ -334,9 +336,9 @@ class DangerzoneConverter:
with open(height_filename) as f: with open(height_filename) as f:
height = f.read().strip() height = f.read().strip()
if os.environ.get("OCR") == "1" and os.environ.get("OCR_LANGUAGE"): if os.environ.get("OCR") == "1":
# OCR the document # OCR the document
self.output( output(
False, False,
f"Converting page {page}/{num_pages} from pixels to searchable PDF", f"Converting page {page}/{num_pages} from pixels to searchable PDF",
percentage, percentage,
@ -360,14 +362,14 @@ class DangerzoneConverter:
timeout=60, timeout=60,
) )
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
self.output( output(
True, True,
"Error converting pixels to PNG, convert timed out after 60 seconds", "Error converting pixels to PNG, convert timed out after 60 seconds",
percentage, percentage,
) )
return 1 return 1
if p.returncode != 0: if p.returncode != 0:
self.output( output(
True, True,
f"Page {page}/{num_pages} conversion to PNG failed", f"Page {page}/{num_pages} conversion to PNG failed",
percentage, percentage,
@ -392,14 +394,14 @@ class DangerzoneConverter:
timeout=60, timeout=60,
) )
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
self.output( output(
True, True,
"Error converting PNG to searchable PDF, tesseract timed out after 60 seconds", "Error converting PNG to searchable PDF, tesseract timed out after 60 seconds",
percentage, percentage,
) )
return 1 return 1
if p.returncode != 0: if p.returncode != 0:
self.output( output(
True, True,
f"Page {page}/{num_pages} OCR failed", f"Page {page}/{num_pages} OCR failed",
percentage, percentage,
@ -408,7 +410,7 @@ class DangerzoneConverter:
else: else:
# Don't OCR # Don't OCR
self.output( output(
False, False,
f"Converting page {page}/{num_pages} from pixels to PDF", f"Converting page {page}/{num_pages} from pixels to PDF",
percentage, percentage,
@ -432,14 +434,14 @@ class DangerzoneConverter:
timeout=60, timeout=60,
) )
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
self.output( output(
True, True,
"Error converting RGB to PDF, convert timed out after 60 seconds", "Error converting RGB to PDF, convert timed out after 60 seconds",
percentage, percentage,
) )
return 1 return 1
if p.returncode != 0: if p.returncode != 0:
self.output( output(
True, True,
f"Page {page}/{num_pages} conversion to PDF failed", f"Page {page}/{num_pages} conversion to PDF failed",
percentage, percentage,
@ -449,7 +451,7 @@ class DangerzoneConverter:
percentage += percentage_per_page percentage += percentage_per_page
# Merge pages into a single PDF # Merge pages into a single PDF
self.output( output(
False, False,
f"Merging {num_pages} pages into a single PDF", f"Merging {num_pages} pages into a single PDF",
percentage, percentage,
@ -463,14 +465,14 @@ class DangerzoneConverter:
args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=60 args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=60
) )
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
self.output( output(
True, True,
"Error merging pages into a single PDF, pdfunite timed out after 60 seconds", "Error merging pages into a single PDF, pdfunite timed out after 60 seconds",
percentage, percentage,
) )
return 1 return 1
if p.returncode != 0: if p.returncode != 0:
self.output( output(
True, True,
"Merging pages into a single PDF failed", "Merging pages into a single PDF failed",
percentage, percentage,
@ -480,7 +482,7 @@ class DangerzoneConverter:
percentage += 2 percentage += 2
# Compress # Compress
self.output( output(
False, False,
f"Compressing PDF", f"Compressing PDF",
percentage, percentage,
@ -494,14 +496,14 @@ class DangerzoneConverter:
timeout=compress_timeout, timeout=compress_timeout,
) )
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
self.output( output(
True, True,
f"Error compressing PDF, ps2pdf timed out after {compress_timeout} seconds", f"Error compressing PDF, ps2pdf timed out after {compress_timeout} seconds",
percentage, percentage,
) )
return 1 return 1
if p.returncode != 0: if p.returncode != 0:
self.output( output(
True, True,
f"Compressing PDF failed", f"Compressing PDF failed",
percentage, percentage,
@ -509,7 +511,7 @@ class DangerzoneConverter:
return 1 return 1
percentage = 100.0 percentage = 100.0
self.output(False, "Safe PDF created", percentage) output(False, "Safe PDF created", percentage)
# Move converted files into /safezone # Move converted files into /safezone
shutil.move("/tmp/safe-output.pdf", "/safezone") shutil.move("/tmp/safe-output.pdf", "/safezone")
@ -517,23 +519,17 @@ class DangerzoneConverter:
return 0 return 0
def output(self, error: bool, text: str, percentage: float) -> None:
print(json.dumps({"error": error, "text": text, "percentage": int(percentage)}))
sys.stdout.flush()
def main() -> int: def main() -> int:
if len(sys.argv) != 2: if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} [document-to-pixels]|[pixels-to-pdf]") print(f"Usage: {sys.argv[0]} [document-to-pixels]|[pixels-to-pdf]")
return -1 return -1
converter = DangerzoneConverter()
if sys.argv[1] == "document-to-pixels": if sys.argv[1] == "document-to-pixels":
return converter.document_to_pixels() return document_to_pixels()
if sys.argv[1] == "pixels-to-pdf": if sys.argv[1] == "pixels-to-pdf":
return converter.pixels_to_pdf() return pixels_to_pdf()
return -1 return -1