mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00
212 lines
6 KiB
Python
212 lines
6 KiB
Python
import platform
|
|
import subprocess
|
|
import pipes
|
|
import shutil
|
|
import os
|
|
import tempfile
|
|
import appdirs
|
|
|
|
# What container tech is used for this platform?
|
|
if platform.system() == "Linux":
|
|
container_tech = "podman"
|
|
else:
|
|
# Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
|
|
container_tech = "docker"
|
|
|
|
# Define startupinfo for subprocesses
|
|
if platform.system() == "Windows":
|
|
startupinfo = subprocess.STARTUPINFO()
|
|
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
|
|
else:
|
|
startupinfo = None
|
|
|
|
|
|
# Name of the dangerzone container
|
|
container_name = "dangerzone.rocks/dangerzone"
|
|
|
|
|
|
def exec(args, stdout_callback=None):
|
|
args_str = " ".join(pipes.quote(s) for s in args)
|
|
print("> " + args_str)
|
|
|
|
with subprocess.Popen(
|
|
args,
|
|
stdin=None,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
bufsize=1,
|
|
universal_newlines=True,
|
|
startupinfo=startupinfo,
|
|
) as p:
|
|
if stdout_callback:
|
|
for line in p.stdout:
|
|
stdout_callback(line)
|
|
|
|
p.communicate()
|
|
return p.returncode
|
|
|
|
|
|
def exec_container(args, stdout_callback=None):
|
|
if container_tech == "podman":
|
|
container_runtime = shutil.which("podman")
|
|
else:
|
|
container_runtime = shutil.which("docker")
|
|
|
|
args = [container_runtime] + args
|
|
return exec(args, stdout_callback)
|
|
|
|
|
|
def convert(input_filename, output_filename, ocr_lang, stdout_callback):
|
|
success = False
|
|
|
|
if ocr_lang:
|
|
ocr = "1"
|
|
else:
|
|
ocr = "0"
|
|
|
|
dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp")
|
|
os.makedirs(dz_tmp, exist_ok=True)
|
|
|
|
tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp)
|
|
pixel_dir = os.path.join(tmpdir.name, "pixels")
|
|
safe_dir = os.path.join(tmpdir.name, "safe")
|
|
os.makedirs(pixel_dir, exist_ok=True)
|
|
os.makedirs(safe_dir, exist_ok=True)
|
|
|
|
if container_tech == "docker":
|
|
platform_args = ["--platform", "linux/amd64"]
|
|
else:
|
|
platform_args = []
|
|
|
|
# Convert document to pixels
|
|
args = (
|
|
["run", "--network", "none"]
|
|
+ platform_args
|
|
+ [
|
|
"-v",
|
|
f"{input_filename}:/tmp/input_file",
|
|
"-v",
|
|
f"{pixel_dir}:/dangerzone",
|
|
container_name,
|
|
"/usr/bin/python3",
|
|
"/usr/local/bin/dangerzone.py",
|
|
"document-to-pixels",
|
|
]
|
|
)
|
|
ret = exec_container(args, stdout_callback)
|
|
if ret != 0:
|
|
print("documents-to-pixels failed")
|
|
else:
|
|
# TODO: validate convert to pixels output
|
|
|
|
# Convert pixels to safe PDF
|
|
args = (
|
|
["run", "--network", "none"]
|
|
+ platform_args
|
|
+ [
|
|
"-v",
|
|
f"{pixel_dir}:/dangerzone",
|
|
"-v",
|
|
f"{safe_dir}:/safezone",
|
|
"-e",
|
|
f"OCR={ocr}",
|
|
"-e",
|
|
f"OCR_LANGUAGE={ocr_lang}",
|
|
container_name,
|
|
"/usr/bin/python3",
|
|
"/usr/local/bin/dangerzone.py",
|
|
"pixels-to-pdf",
|
|
]
|
|
)
|
|
ret = exec_container(args, stdout_callback)
|
|
if ret != 0:
|
|
print("pixels-to-pdf failed")
|
|
else:
|
|
# Move the final file to the right place
|
|
if os.path.exists(output_filename):
|
|
os.remove(output_filename)
|
|
|
|
container_output_filename = os.path.join(
|
|
safe_dir, "safe-output-compressed.pdf"
|
|
)
|
|
shutil.move(container_output_filename, output_filename)
|
|
|
|
# We did it
|
|
success = True
|
|
|
|
# Clean up
|
|
tmpdir.cleanup()
|
|
|
|
return success
|
|
|
|
|
|
# From global_common:
|
|
|
|
# def validate_convert_to_pixel_output(self, common, output):
|
|
# """
|
|
# Take the output from the convert to pixels tasks and validate it. Returns
|
|
# a tuple like: (success (boolean), error_message (str))
|
|
# """
|
|
# max_image_width = 10000
|
|
# max_image_height = 10000
|
|
|
|
# # Did we hit an error?
|
|
# for line in output.split("\n"):
|
|
# if (
|
|
# "failed:" in line
|
|
# or "The document format is not supported" in line
|
|
# or "Error" in line
|
|
# ):
|
|
# return False, output
|
|
|
|
# # How many pages was that?
|
|
# num_pages = None
|
|
# for line in output.split("\n"):
|
|
# if line.startswith("Document has "):
|
|
# num_pages = line.split(" ")[2]
|
|
# break
|
|
# if not num_pages or not num_pages.isdigit() or int(num_pages) <= 0:
|
|
# return False, "Invalid number of pages returned"
|
|
# num_pages = int(num_pages)
|
|
|
|
# # Make sure we have the files we expect
|
|
# expected_filenames = []
|
|
# for i in range(1, num_pages + 1):
|
|
# expected_filenames += [
|
|
# f"page-{i}.rgb",
|
|
# f"page-{i}.width",
|
|
# f"page-{i}.height",
|
|
# ]
|
|
# expected_filenames.sort()
|
|
# actual_filenames = os.listdir(common.pixel_dir.name)
|
|
# actual_filenames.sort()
|
|
|
|
# if expected_filenames != actual_filenames:
|
|
# return (
|
|
# False,
|
|
# f"We expected these files:\n{expected_filenames}\n\nBut we got these files:\n{actual_filenames}",
|
|
# )
|
|
|
|
# # Make sure the files are the correct sizes
|
|
# for i in range(1, num_pages + 1):
|
|
# with open(f"{common.pixel_dir.name}/page-{i}.width") as f:
|
|
# w_str = f.read().strip()
|
|
# with open(f"{common.pixel_dir.name}/page-{i}.height") as f:
|
|
# h_str = f.read().strip()
|
|
# w = int(w_str)
|
|
# h = int(h_str)
|
|
# if (
|
|
# not w_str.isdigit()
|
|
# or not h_str.isdigit()
|
|
# or w <= 0
|
|
# or w > max_image_width
|
|
# or h <= 0
|
|
# or h > max_image_height
|
|
# ):
|
|
# return False, f"Page {i} has invalid geometry"
|
|
|
|
# # Make sure the RGB file is the correct size
|
|
# if os.path.getsize(f"{common.pixel_dir.name}/page-{i}.rgb") != w * h * 3:
|
|
# return False, f"Page {i} has an invalid RGB file size"
|
|
|
|
# return True, True
|