Move the ConvertToPixels task validation into global_common so the CLI and GUI can share it

This commit is contained in:
Micah Lee 2021-06-09 16:32:06 -07:00
parent 8aaf7ebcf1
commit 73d412501c
No known key found for this signature in database
GPG key ID: 403C2657CD994F73
3 changed files with 107 additions and 69 deletions

View file

@ -1,6 +1,7 @@
import click import click
from .global_common import GlobalCommon from .global_common import GlobalCommon
from .common import Common
def exec_container(global_common, args): def exec_container(global_common, args):
@ -35,6 +36,7 @@ def exec_container(global_common, args):
@click.argument("filename", required=True) @click.argument("filename", required=True)
def cli_main(custom_container, safe_pdf_filename, ocr_lang, skip_update, filename): def cli_main(custom_container, safe_pdf_filename, ocr_lang, skip_update, filename):
global_common = GlobalCommon() global_common = GlobalCommon()
common = Common()
# Make sure custom container exists # Make sure custom container exists
if custom_container: if custom_container:
@ -56,8 +58,34 @@ def cli_main(custom_container, safe_pdf_filename, ocr_lang, skip_update, filenam
) )
return return
# Pull the latest image
if not skip_update: if not skip_update:
click.echo("Pulling container image (this might take a few minutes)") click.echo("Pulling container image (this might take a few minutes)")
returncode, _, _ = exec_container(global_common, ["pull"]) returncode, _, _ = exec_container(global_common, ["pull"])
if returncode != 0: if returncode != 0:
return return
# Document to pixels
click.echo("Converting document to pixels")
returncode, output, _ = exec_container(
global_common,
[
"documenttopixels",
"--document-filename",
common.document_filename,
"--pixel-dir",
common.pixel_dir.name,
"--container-name",
global_common.get_container_name(),
],
)
if returncode != 0:
return
success, error_message = global_common.validate_convert_to_pixel_output(
common, output
)
if not success:
click.echo(error_message)
return

View file

@ -276,6 +276,10 @@ class GlobalCommon(object):
return None return None
def container_exists(self, container_name): def container_exists(self, container_name):
"""
Check if container_name is a valid container. Returns a tuple like:
(success (boolean), error_message (str))
"""
# Do we have this container? # Do we have this container?
with self.exec_dangerzone_container( with self.exec_dangerzone_container(
["ls", "--container-name", container_name] ["ls", "--container-name", container_name]
@ -297,3 +301,72 @@ class GlobalCommon(object):
return False, f"Container '{container_name}' not found" return False, f"Container '{container_name}' not found"
return True, True return True, True
def validate_convert_to_pixel_output(self, common, output):
"""
Take the output from the convert to pixels tasks and validate it. Returns
a tuple like: (success (boolean), error_message (str))
"""
max_image_width = 10000
max_image_height = 10000
# Did we hit an error?
for line in output.split("\n"):
if (
"failed:" in line
or "The document format is not supported" in line
or "Error" in line
):
return False, output
# How many pages was that?
num_pages = None
for line in output.split("\n"):
if line.startswith("Document has "):
num_pages = line.split(" ")[2]
break
if not num_pages or not num_pages.isdigit() or int(num_pages) <= 0:
return False, "Invalid number of pages returned"
num_pages = int(num_pages)
# Make sure we have the files we expect
expected_filenames = []
for i in range(1, num_pages + 1):
expected_filenames += [
f"page-{i}.rgb",
f"page-{i}.width",
f"page-{i}.height",
]
expected_filenames.sort()
actual_filenames = os.listdir(common.pixel_dir.name)
actual_filenames.sort()
if expected_filenames != actual_filenames:
return (
False,
f"We expected these files:\n{expected_filenames}\n\nBut we got these files:\n{actual_filenames}",
)
# Make sure the files are the correct sizes
for i in range(1, num_pages + 1):
with open(f"{common.pixel_dir.name}/page-{i}.width") as f:
w_str = f.read().strip()
with open(f"{common.pixel_dir.name}/page-{i}.height") as f:
h_str = f.read().strip()
w = int(w_str)
h = int(h_str)
if (
not w_str.isdigit()
or not h_str.isdigit()
or w <= 0
or w > max_image_width
or h <= 0
or h > max_image_height
):
return False, f"Page {i} has invalid geometry"
# Make sure the RGB file is the correct size
if os.path.getsize(f"{common.pixel_dir.name}/page-{i}.rgb") != w * h * 3:
return False, f"Page {i} has an invalid RGB file size"
return True, True

View file

@ -65,10 +65,6 @@ class ConvertToPixels(TaskBase):
self.global_common = global_common self.global_common = global_common
self.common = common self.common = common
self.max_image_width = 10000
self.max_image_height = 10000
self.max_image_size = self.max_image_width * self.max_image_height * 3
def run(self): def run(self):
self.update_label.emit("Converting document to pixels") self.update_label.emit("Converting document to pixels")
args = [ args = [
@ -80,76 +76,17 @@ class ConvertToPixels(TaskBase):
"--container-name", "--container-name",
self.global_common.get_container_name(), self.global_common.get_container_name(),
] ]
returncode, output, stderr = self.exec_container(args) returncode, output, _ = self.exec_container(args)
if returncode != 0: if returncode != 0:
return return
# Did we hit an error? success, error_message = self.global_common.validate_convert_to_pixel_output(
for line in output.split("\n"): self.common, output
if ( )
"failed:" in line if not success:
or "The document format is not supported" in line self.task_failed.emit(error_message)
or "Error" in line
):
self.task_failed.emit(output)
return
# How many pages was that?
num_pages = None
for line in output.split("\n"):
if line.startswith("Document has "):
num_pages = line.split(" ")[2]
break
if not num_pages or not num_pages.isdigit() or int(num_pages) <= 0:
self.task_failed.emit("Invalid number of pages returned")
return return
num_pages = int(num_pages)
# Make sure we have the files we expect
expected_filenames = []
for i in range(1, num_pages + 1):
expected_filenames += [
f"page-{i}.rgb",
f"page-{i}.width",
f"page-{i}.height",
]
expected_filenames.sort()
actual_filenames = os.listdir(self.common.pixel_dir.name)
actual_filenames.sort()
if expected_filenames != actual_filenames:
self.task_failed.emit(
f"We expected these files:\n{expected_filenames}\n\nBut we got these files:\n{actual_filenames}"
)
return
# Make sure the files are the correct sizes
for i in range(1, num_pages + 1):
with open(f"{self.common.pixel_dir.name}/page-{i}.width") as f:
w_str = f.read().strip()
with open(f"{self.common.pixel_dir.name}/page-{i}.height") as f:
h_str = f.read().strip()
w = int(w_str)
h = int(h_str)
if (
not w_str.isdigit()
or not h_str.isdigit()
or w <= 0
or w > self.max_image_width
or h <= 0
or h > self.max_image_height
):
self.task_failed.emit(f"Page {i} has invalid geometry")
return
# Make sure the RGB file is the correct size
if (
os.path.getsize(f"{self.common.pixel_dir.name}/page-{i}.rgb")
!= w * h * 3
):
self.task_failed.emit(f"Page {i} has an invalid RGB file size")
return
self.task_finished.emit() self.task_finished.emit()