mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00
Move the ConvertToPixels task validation into global_common so the CLI and GUI can share it
This commit is contained in:
parent
8aaf7ebcf1
commit
73d412501c
3 changed files with 107 additions and 69 deletions
|
@ -1,6 +1,7 @@
|
||||||
import click
|
import click
|
||||||
|
|
||||||
from .global_common import GlobalCommon
|
from .global_common import GlobalCommon
|
||||||
|
from .common import Common
|
||||||
|
|
||||||
|
|
||||||
def exec_container(global_common, args):
|
def exec_container(global_common, args):
|
||||||
|
@ -35,6 +36,7 @@ def exec_container(global_common, args):
|
||||||
@click.argument("filename", required=True)
|
@click.argument("filename", required=True)
|
||||||
def cli_main(custom_container, safe_pdf_filename, ocr_lang, skip_update, filename):
|
def cli_main(custom_container, safe_pdf_filename, ocr_lang, skip_update, filename):
|
||||||
global_common = GlobalCommon()
|
global_common = GlobalCommon()
|
||||||
|
common = Common()
|
||||||
|
|
||||||
# Make sure custom container exists
|
# Make sure custom container exists
|
||||||
if custom_container:
|
if custom_container:
|
||||||
|
@ -56,8 +58,34 @@ def cli_main(custom_container, safe_pdf_filename, ocr_lang, skip_update, filenam
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Pull the latest image
|
||||||
if not skip_update:
|
if not skip_update:
|
||||||
click.echo("Pulling container image (this might take a few minutes)")
|
click.echo("Pulling container image (this might take a few minutes)")
|
||||||
returncode, _, _ = exec_container(global_common, ["pull"])
|
returncode, _, _ = exec_container(global_common, ["pull"])
|
||||||
if returncode != 0:
|
if returncode != 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Document to pixels
|
||||||
|
click.echo("Converting document to pixels")
|
||||||
|
returncode, output, _ = exec_container(
|
||||||
|
global_common,
|
||||||
|
[
|
||||||
|
"documenttopixels",
|
||||||
|
"--document-filename",
|
||||||
|
common.document_filename,
|
||||||
|
"--pixel-dir",
|
||||||
|
common.pixel_dir.name,
|
||||||
|
"--container-name",
|
||||||
|
global_common.get_container_name(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
if returncode != 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
success, error_message = global_common.validate_convert_to_pixel_output(
|
||||||
|
common, output
|
||||||
|
)
|
||||||
|
if not success:
|
||||||
|
click.echo(error_message)
|
||||||
|
return
|
||||||
|
|
|
@ -276,6 +276,10 @@ class GlobalCommon(object):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def container_exists(self, container_name):
|
def container_exists(self, container_name):
|
||||||
|
"""
|
||||||
|
Check if container_name is a valid container. Returns a tuple like:
|
||||||
|
(success (boolean), error_message (str))
|
||||||
|
"""
|
||||||
# Do we have this container?
|
# Do we have this container?
|
||||||
with self.exec_dangerzone_container(
|
with self.exec_dangerzone_container(
|
||||||
["ls", "--container-name", container_name]
|
["ls", "--container-name", container_name]
|
||||||
|
@ -297,3 +301,72 @@ class GlobalCommon(object):
|
||||||
return False, f"Container '{container_name}' not found"
|
return False, f"Container '{container_name}' not found"
|
||||||
|
|
||||||
return True, True
|
return True, True
|
||||||
|
|
||||||
|
def validate_convert_to_pixel_output(self, common, output):
|
||||||
|
"""
|
||||||
|
Take the output from the convert to pixels tasks and validate it. Returns
|
||||||
|
a tuple like: (success (boolean), error_message (str))
|
||||||
|
"""
|
||||||
|
max_image_width = 10000
|
||||||
|
max_image_height = 10000
|
||||||
|
|
||||||
|
# Did we hit an error?
|
||||||
|
for line in output.split("\n"):
|
||||||
|
if (
|
||||||
|
"failed:" in line
|
||||||
|
or "The document format is not supported" in line
|
||||||
|
or "Error" in line
|
||||||
|
):
|
||||||
|
return False, output
|
||||||
|
|
||||||
|
# How many pages was that?
|
||||||
|
num_pages = None
|
||||||
|
for line in output.split("\n"):
|
||||||
|
if line.startswith("Document has "):
|
||||||
|
num_pages = line.split(" ")[2]
|
||||||
|
break
|
||||||
|
if not num_pages or not num_pages.isdigit() or int(num_pages) <= 0:
|
||||||
|
return False, "Invalid number of pages returned"
|
||||||
|
num_pages = int(num_pages)
|
||||||
|
|
||||||
|
# Make sure we have the files we expect
|
||||||
|
expected_filenames = []
|
||||||
|
for i in range(1, num_pages + 1):
|
||||||
|
expected_filenames += [
|
||||||
|
f"page-{i}.rgb",
|
||||||
|
f"page-{i}.width",
|
||||||
|
f"page-{i}.height",
|
||||||
|
]
|
||||||
|
expected_filenames.sort()
|
||||||
|
actual_filenames = os.listdir(common.pixel_dir.name)
|
||||||
|
actual_filenames.sort()
|
||||||
|
|
||||||
|
if expected_filenames != actual_filenames:
|
||||||
|
return (
|
||||||
|
False,
|
||||||
|
f"We expected these files:\n{expected_filenames}\n\nBut we got these files:\n{actual_filenames}",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Make sure the files are the correct sizes
|
||||||
|
for i in range(1, num_pages + 1):
|
||||||
|
with open(f"{common.pixel_dir.name}/page-{i}.width") as f:
|
||||||
|
w_str = f.read().strip()
|
||||||
|
with open(f"{common.pixel_dir.name}/page-{i}.height") as f:
|
||||||
|
h_str = f.read().strip()
|
||||||
|
w = int(w_str)
|
||||||
|
h = int(h_str)
|
||||||
|
if (
|
||||||
|
not w_str.isdigit()
|
||||||
|
or not h_str.isdigit()
|
||||||
|
or w <= 0
|
||||||
|
or w > max_image_width
|
||||||
|
or h <= 0
|
||||||
|
or h > max_image_height
|
||||||
|
):
|
||||||
|
return False, f"Page {i} has invalid geometry"
|
||||||
|
|
||||||
|
# Make sure the RGB file is the correct size
|
||||||
|
if os.path.getsize(f"{common.pixel_dir.name}/page-{i}.rgb") != w * h * 3:
|
||||||
|
return False, f"Page {i} has an invalid RGB file size"
|
||||||
|
|
||||||
|
return True, True
|
||||||
|
|
|
@ -65,10 +65,6 @@ class ConvertToPixels(TaskBase):
|
||||||
self.global_common = global_common
|
self.global_common = global_common
|
||||||
self.common = common
|
self.common = common
|
||||||
|
|
||||||
self.max_image_width = 10000
|
|
||||||
self.max_image_height = 10000
|
|
||||||
self.max_image_size = self.max_image_width * self.max_image_height * 3
|
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.update_label.emit("Converting document to pixels")
|
self.update_label.emit("Converting document to pixels")
|
||||||
args = [
|
args = [
|
||||||
|
@ -80,76 +76,17 @@ class ConvertToPixels(TaskBase):
|
||||||
"--container-name",
|
"--container-name",
|
||||||
self.global_common.get_container_name(),
|
self.global_common.get_container_name(),
|
||||||
]
|
]
|
||||||
returncode, output, stderr = self.exec_container(args)
|
returncode, output, _ = self.exec_container(args)
|
||||||
|
|
||||||
if returncode != 0:
|
if returncode != 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Did we hit an error?
|
success, error_message = self.global_common.validate_convert_to_pixel_output(
|
||||||
for line in output.split("\n"):
|
self.common, output
|
||||||
if (
|
)
|
||||||
"failed:" in line
|
if not success:
|
||||||
or "The document format is not supported" in line
|
self.task_failed.emit(error_message)
|
||||||
or "Error" in line
|
|
||||||
):
|
|
||||||
self.task_failed.emit(output)
|
|
||||||
return
|
|
||||||
|
|
||||||
# How many pages was that?
|
|
||||||
num_pages = None
|
|
||||||
for line in output.split("\n"):
|
|
||||||
if line.startswith("Document has "):
|
|
||||||
num_pages = line.split(" ")[2]
|
|
||||||
break
|
|
||||||
if not num_pages or not num_pages.isdigit() or int(num_pages) <= 0:
|
|
||||||
self.task_failed.emit("Invalid number of pages returned")
|
|
||||||
return
|
return
|
||||||
num_pages = int(num_pages)
|
|
||||||
|
|
||||||
# Make sure we have the files we expect
|
|
||||||
expected_filenames = []
|
|
||||||
for i in range(1, num_pages + 1):
|
|
||||||
expected_filenames += [
|
|
||||||
f"page-{i}.rgb",
|
|
||||||
f"page-{i}.width",
|
|
||||||
f"page-{i}.height",
|
|
||||||
]
|
|
||||||
expected_filenames.sort()
|
|
||||||
actual_filenames = os.listdir(self.common.pixel_dir.name)
|
|
||||||
actual_filenames.sort()
|
|
||||||
|
|
||||||
if expected_filenames != actual_filenames:
|
|
||||||
self.task_failed.emit(
|
|
||||||
f"We expected these files:\n{expected_filenames}\n\nBut we got these files:\n{actual_filenames}"
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Make sure the files are the correct sizes
|
|
||||||
for i in range(1, num_pages + 1):
|
|
||||||
with open(f"{self.common.pixel_dir.name}/page-{i}.width") as f:
|
|
||||||
w_str = f.read().strip()
|
|
||||||
with open(f"{self.common.pixel_dir.name}/page-{i}.height") as f:
|
|
||||||
h_str = f.read().strip()
|
|
||||||
w = int(w_str)
|
|
||||||
h = int(h_str)
|
|
||||||
if (
|
|
||||||
not w_str.isdigit()
|
|
||||||
or not h_str.isdigit()
|
|
||||||
or w <= 0
|
|
||||||
or w > self.max_image_width
|
|
||||||
or h <= 0
|
|
||||||
or h > self.max_image_height
|
|
||||||
):
|
|
||||||
self.task_failed.emit(f"Page {i} has invalid geometry")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Make sure the RGB file is the correct size
|
|
||||||
if (
|
|
||||||
os.path.getsize(f"{self.common.pixel_dir.name}/page-{i}.rgb")
|
|
||||||
!= w * h * 3
|
|
||||||
):
|
|
||||||
self.task_failed.emit(f"Page {i} has an invalid RGB file size")
|
|
||||||
return
|
|
||||||
|
|
||||||
self.task_finished.emit()
|
self.task_finished.emit()
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue