Wrap dangerzone.py back into a class to keep track of percentage

This commit is contained in:
Guthrie McAfee Armstrong 2022-06-05 18:23:10 -04:00 committed by deeplow
parent eaa08c9c3d
commit 17939cb70c
No known key found for this signature in database
GPG key ID: 577982871529A52A

View file

@ -18,10 +18,7 @@ import os
import shutil import shutil
import subprocess import subprocess
import sys import sys
<<<<<<< HEAD
from typing import Dict, Optional from typing import Dict, Optional
=======
>>>>>>> d990cfb (refactor dangerzone.py, raise exceptions instead of returning int)
import magic import magic
from PIL import Image from PIL import Image
@ -55,13 +52,12 @@ def run_command(
raise TimeoutError(timeout_message) from e raise TimeoutError(timeout_message) from e
def output(self, error: bool, text: str, percentage: float) -> None: class ConversionJob(object):
print(json.dumps({"error": error, "text": text, "percentage": int(percentage)})) def __init__(self) -> None:
sys.stdout.flush() self.percentage: float = 0.0 # TODO Optional[float], but this default value will be overwritten immediately
def document_to_pixels(self) -> None:
def document_to_pixels() -> None: self.percentage: float = 0.0
percentage: float = 0.0
conversions: Dict[str, Dict[str, Optional[str]]] = { conversions: Dict[str, Dict[str, Optional[str]]] = {
# .pdf # .pdf
@ -145,7 +141,7 @@ def document_to_pixels() -> None:
if conversion["type"] is None: if conversion["type"] is None:
pdf_filename = "/tmp/input_file" pdf_filename = "/tmp/input_file"
elif conversion["type"] == "libreoffice": elif conversion["type"] == "libreoffice":
output(False, "Converting to PDF using LibreOffice", percentage) self.update_progress("Converting to PDF using LibreOffice")
args = [ args = [
"libreoffice", "libreoffice",
"--headless", "--headless",
@ -162,7 +158,7 @@ def document_to_pixels() -> None:
) )
pdf_filename = "/tmp/input_file.pdf" pdf_filename = "/tmp/input_file.pdf"
elif conversion["type"] == "convert": elif conversion["type"] == "convert":
output(False, "Converting to PDF using GraphicsMagick", percentage) self.update_progress("Converting to PDF using GraphicsMagick")
args = [ args = [
"gm", "gm",
"convert", "convert",
@ -179,14 +175,10 @@ def document_to_pixels() -> None:
raise ValueError( raise ValueError(
f"Invalid conversion type {conversion['type']} for MIME type {mime_type}" f"Invalid conversion type {conversion['type']} for MIME type {mime_type}"
) )
percentage += 3 self.percentage += 3
# Separate PDF into pages # Separate PDF into pages
output( self.update_progress("Separating document into pages"),
False,
"Separating document into pages",
percentage,
)
args = ["pdftk", pdf_filename, "burst", "output", "/tmp/page-%d.pdf"] args = ["pdftk", pdf_filename, "burst", "output", "/tmp/page-%d.pdf"]
run_command( run_command(
args, args,
@ -196,7 +188,7 @@ def document_to_pixels() -> None:
page_filenames = glob.glob("/tmp/page-*.pdf") page_filenames = glob.glob("/tmp/page-*.pdf")
percentage += 2 self.percentage += 2
# Convert to RGB pixel data # Convert to RGB pixel data
percentage_per_page = 45.0 / len(page_filenames) percentage_per_page = 45.0 / len(page_filenames)
@ -208,10 +200,8 @@ def document_to_pixels() -> None:
height_filename = f"/tmp/page-{page}.height" height_filename = f"/tmp/page-{page}.height"
filename_base = f"/tmp/page-{page}" filename_base = f"/tmp/page-{page}"
output( self.update_progress(
False, f"Converting page {page}/{len(page_filenames)} to pixels"
f"Converting page {page}/{len(page_filenames)} to pixels",
percentage,
) )
# Convert to png # Convert to png
@ -245,15 +235,11 @@ def document_to_pixels() -> None:
# Delete the png # Delete the png
os.remove(png_filename) os.remove(png_filename)
percentage += percentage_per_page self.percentage += percentage_per_page
# END OF FOR LOOP # END OF FOR LOOP
output( self.update_progress("Converted document to pixels")
False,
"Converted document to pixels",
percentage,
)
# Move converted files into /dangerzone # Move converted files into /dangerzone
for filename in ( for filename in (
@ -263,9 +249,8 @@ def document_to_pixels() -> None:
): ):
shutil.move(filename, "/dangerzone") shutil.move(filename, "/dangerzone")
def pixels_to_pdf(self) -> None:
def pixels_to_pdf() -> None: self.percentage = 50.0
percentage = 50.0
num_pages = len(glob.glob("/dangerzone/page-*.rgb")) num_pages = len(glob.glob("/dangerzone/page-*.rgb"))
@ -286,10 +271,8 @@ def pixels_to_pdf() -> None:
height = f.read().strip() height = f.read().strip()
if os.environ.get("OCR") == "1": # OCR the document if os.environ.get("OCR") == "1": # OCR the document
output( self.update_progress(
False, f"Converting page {page}/{num_pages} from pixels to searchable PDF"
f"Converting page {page}/{num_pages} from pixels to searchable PDF",
percentage,
) )
run_command( run_command(
[ [
@ -321,10 +304,8 @@ def pixels_to_pdf() -> None:
) )
else: # Don't OCR else: # Don't OCR
output( self.update_progress(
False, f"Converting page {page}/{num_pages} from pixels to PDF"
f"Converting page {page}/{num_pages} from pixels to PDF",
percentage,
) )
run_command( run_command(
[ [
@ -341,16 +322,12 @@ def pixels_to_pdf() -> None:
timeout_message=f"Error converting RGB to PDF, convert timed out after {DEFAULT_TIMEOUT} seconds", timeout_message=f"Error converting RGB to PDF, convert timed out after {DEFAULT_TIMEOUT} seconds",
) )
percentage += percentage_per_page self.percentage += percentage_per_page
# END OF FOR LOOP # END OF FOR LOOP
# Merge pages into a single PDF # Merge pages into a single PDF
output( self.update_progress(f"Merging {num_pages} pages into a single PDF")
False,
f"Merging {num_pages} pages into a single PDF",
percentage,
)
args = ["pdfunite"] args = ["pdfunite"]
for page in range(1, num_pages + 1): for page in range(1, num_pages + 1):
args.append(f"/tmp/page-{page}.pdf") args.append(f"/tmp/page-{page}.pdf")
@ -361,14 +338,10 @@ def pixels_to_pdf() -> None:
timeout_message=f"Error merging pages into a single PDF, pdfunite timed out after {DEFAULT_TIMEOUT} seconds", timeout_message=f"Error merging pages into a single PDF, pdfunite timed out after {DEFAULT_TIMEOUT} seconds",
) )
percentage += 2 self.percentage += 2
# Compress # Compress
output( self.update_progress(f"Compressing PDF")
False,
f"Compressing PDF",
percentage,
)
compress_timeout = num_pages * 3 compress_timeout = num_pages * 3
run_command( run_command(
["ps2pdf", "/tmp/safe-output.pdf", "/tmp/safe-output-compressed.pdf"], ["ps2pdf", "/tmp/safe-output.pdf", "/tmp/safe-output-compressed.pdf"],
@ -377,34 +350,46 @@ def pixels_to_pdf() -> None:
timeout=compress_timeout, timeout=compress_timeout,
) )
percentage = 100.0 self.percentage = 100.0
output(False, "Safe PDF created", percentage) self.update_progress("Safe PDF created")
# Move converted files into /safezone # Move converted files into /safezone
shutil.move("/tmp/safe-output.pdf", "/safezone") shutil.move("/tmp/safe-output.pdf", "/safezone")
shutil.move("/tmp/safe-output-compressed.pdf", "/safezone") shutil.move("/tmp/safe-output-compressed.pdf", "/safezone")
def update_progress(self, text, *, error: bool = False):
print(
json.dumps(
{"error": error, "text": text, "percentage": int(self.percentage)}
)
)
sys.stdout.flush()
def main() -> int: def main() -> int:
if len(sys.argv) != 2: if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} [document-to-pixels]|[pixels-to-pdf]") print(f"Usage: {sys.argv[0]} [document-to-pixels]|[pixels-to-pdf]")
return -1 return -1
job = ConversionJob()
if sys.argv[1] == "document-to-pixels": if sys.argv[1] == "document-to-pixels":
try: try:
document_to_pixels() job.document_to_pixels()
except: except (RuntimeError, TimeoutError) as e:
job.update_progress(str(e), error=True)
return 1 return 1
else: else:
return 0 return 0 # Success!
if sys.argv[1] == "pixels-to-pdf": elif sys.argv[1] == "pixels-to-pdf":
try: try:
pixels_to_pdf() job.pixels_to_pdf()
except: except (RuntimeError, TimeoutError) as e:
job.update_progress(str(e), error=True)
return 1 return 1
else: else:
return 0 return 0 # Success!
return -1 return -1