diff --git a/.gitignore b/.gitignore index 6620ae7..12b8da9 100644 --- a/.gitignore +++ b/.gitignore @@ -133,7 +133,8 @@ dmypy.json deb_dist .DS_Store install/windows/Dangerzone.wxs -test_docs/sample-safe.pdf +**/*-safe.pdf +test_docs/out/ share/container.tar share/container.tar.gz share/image-id.txt diff --git a/dangerzone/cli.py b/dangerzone/cli.py index cd8faa8..b026790 100644 --- a/dangerzone/cli.py +++ b/dangerzone/cli.py @@ -19,43 +19,31 @@ def print_header(s): @click.option("--output-filename", help="Default is filename ending with -safe.pdf") @click.option("--ocr-lang", help="Language to OCR, defaults to none") @click.argument("filename", required=True) -def cli_main(output_filename, ocr_lang, filename): +def cli_main(output_filename: str, ocr_lang: str, filename: str): colorama.init(autoreset=True) common = Common() dzutil.display_banner() # Validate filename - valid = True try: - with open(os.path.abspath(filename), "rb") as f: + with open(os.path.abspath(filename), "rb"): pass - except: - valid = False - - if not valid: - click.echo("Invalid filename") - return - - common.input_filename = os.path.abspath(filename) + except FileNotFoundError as e: + raise + else: + common.input_filename = os.path.abspath(filename) # Validate safe PDF output filename if output_filename: - valid = True - if not output_filename.endswith(".pdf"): - click.echo("Safe PDF filename must end in '.pdf'") - return - + if not output_filename.endswith((".pdf", ".PDF")): + raise RuntimeError("Safe PDF filename must end in '.pdf'") try: - with open(os.path.abspath(output_filename), "wb") as f: + with open(os.path.abspath(output_filename), "wb"): pass - except: - valid = False - - if not valid: - click.echo("Safe PDF filename is not writable") - return - - common.output_filename = os.path.abspath(output_filename) + except IOError: + raise IOError("Safe PDF filename is not writable") + else: + common.output_filename = os.path.abspath(output_filename) else: common.output_filename = ( @@ -64,11 +52,8 @@ def cli_main(output_filename, ocr_lang, filename): try: with open(common.output_filename, "wb") as f: pass - except: - click.echo( - f"Output filename {common.output_filename} is not writable, use --output-filename" - ) - return + except IOError as e: + raise IOError("/Users/guthrie/Projects/dangerzone/test_docs/sample.pdf") from e # Validate OCR language if ocr_lang: @@ -78,10 +63,10 @@ def cli_main(output_filename, ocr_lang, filename): valid = True break if not valid: - click.echo("Invalid OCR language code. Valid language codes:") + click.echo("Invalid OCR language code. Valid language codes:", err=True) for lang in dzutil.OCR_LANGUAGES: - click.echo(f"{dzutil.OCR_LANGUAGES[lang]}: {lang}") - return + click.echo(f"{dzutil.OCR_LANGUAGES[lang]}: {lang}", err=True) + exit(1) # Ensure container is installed container.install_container() diff --git a/dangerzone/tests/__init__.py b/dangerzone/tests/__init__.py new file mode 100644 index 0000000..fba0404 --- /dev/null +++ b/dangerzone/tests/__init__.py @@ -0,0 +1,4 @@ +import unittest + +if __name__ == "__main__": + unittest.main() diff --git a/dangerzone/tests/test_cli.py b/dangerzone/tests/test_cli.py new file mode 100644 index 0000000..0689698 --- /dev/null +++ b/dangerzone/tests/test_cli.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +import os.path +import sys +from pathlib import Path +from unittest import TestCase + +from click.testing import CliRunner, Result + +from dangerzone.cli import cli_main + + +# TODO --output-filename +# TODO --output-filename with spaces +# TODO explore any symlink edge cases +# TODO simulate ctrl-c, ctrl-d, SIGINT/SIGKILL/SIGTERM... (man 7 signal), etc? +# TODO validate output PDFs https://github.com/pdfminer/pdfminer.six +# TODO trigger "Invalid json returned from container" +# TODO trigger "pdf-to-pixels failed" +# TODO simulate container runtime missing +# TODO simulate container connection error +# TODO simulate container connection loss +# TODO pass invalid filenames +# FIXME "/" path separator is platform-dependent, use pathlib instead + + +class CliTestCase(TestCase): + SAMPLE_DIRECTORY = "test_docs" + BASIC_SAMPLE = f"{SAMPLE_DIRECTORY}/sample.pdf" + SAFE_SUFFIX = "-safe.pdf" + + def setUp(self): + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + sys.dangerzone_dev = True + self.runner = CliRunner() + # TODO Use pathlib or similar for safer file handling here + samples_dir = Path(self.SAMPLE_DIRECTORY) + self.samples: list[Path | str] = [ + p + for p in samples_dir.rglob("*") + if p.is_file() and not p.name.endswith(self.SAFE_SUFFIX) + ] + if len(self.samples) < 10: + raise RuntimeWarning(f"Only ${len(self.samples)} samples found.") + + def invoke_runner(self, *args, **kwargs) -> Result: + return self.runner.invoke(cli_main, *args, **kwargs) + + +class CliBasicTestCase(CliTestCase): + def test_no_args(self): + """``$ dangerzone-cli``""" + result = self.invoke_runner() + self.assertNotEqual(result.exit_code, 0) + + def test_help(self): + """``$ dangerzone-cli --help``""" + result = self.invoke_runner("--help") + self.assertEqual(result.exit_code, 0) + + def test_version(self): + """``$ dangerzone-cli --version``""" + # Note: fails for now, "--version" is not yet implemented. + result = self.invoke_runner("--version") + self.assertEqual(result.exit_code, 0) + + +class CliConversionTestCase(CliTestCase): + def test_invalid_lang(self): + result = self.invoke_runner(f"{self.BASIC_SAMPLE} --ocr-lang piglatin") + self.assertNotEqual(result.exit_code, 0) + + def test_samples(self): + for sample in self.samples: + with self.subTest(f"Convert {sample}"): + result = self.invoke_runner(f'"{sample}"') + self.assertEqual(result.exit_code, 0) + + def test_output_filename(self): + result = self.invoke_runner(f"{self.BASIC_SAMPLE} --output-filename ${self.SAMPLE_DIRECTORY}/out/my-output.pdf") + self.assertEqual(result.exit_code, 0) + + def test_output_filename_new_dir(self): + result = self.invoke_runner(f"{self.BASIC_SAMPLE} --output-filename fake-directory/my-output.pdf") + self.assertEqual(result.exit_code, 0) + + def test_sample_not_found(self): + with self.subTest(): + result = self.invoke_runner("fake-directory/fake-file.pdf") + self.assertEquals(result.exit_code, 0) + + def test_lang_mismatch(self): + """Try to OCR sample.pdf (Lorem ipsum) as traditional Chinese characters.""" + # TODO how should we handle these cases? + with self.assertWarns(RuntimeWarning): + self.invoke_runner(f"${self.BASIC_SAMPLE} --ocr-lang chi_tra") + + def test_lang_eng(self): + # Rewrite this case if samples in other languages or scripts are added. + result = self.invoke_runner(f'"{self.BASIC_SAMPLE}" --ocr-lang eng') + self.assertEqual(result.exit_code, 0) + + def test_bulk(self): + """ + Try to convert all sample documents in one run. + Fails for now, since bulk conversion is not yet implemented. + """ + # FIXME Once bulk conversion is implemented, return here to expand and quote self.samples correctly. + result = self.invoke_runner(self.samples) + self.assertEqual(result.exit_code, 0) + + def test_bulk_input_one_name(self): + """ + Try to convert all sample documents in one run and supplies --output-filename This should fail. + """ + # FIXME Once bulk conversion is implemented, return here to expand and quote self.samples correctly. + result = self.invoke_runner(self.samples + ["--output-filename sample-safe.pdf"]) # more samples than names + self.assertNotEqual(result.exit_code, 0) + + def test_bulk_ocr_eng(self): + """ + Try to convert all sample documents in one run and with English OCR. + Fails for now, since bulk conversion is not yet implemented. + """ + # FIXME Once bulk conversion is implemented, return here to expand and quote self.samples correctly. + result = self.invoke_runner(self.samples + ["--ocr-lang eng"]) + self.assertEqual(result.exit_code, 0) diff --git a/dangerzone/tests/test_util.py b/dangerzone/tests/test_util.py new file mode 100644 index 0000000..6f85626 --- /dev/null +++ b/dangerzone/tests/test_util.py @@ -0,0 +1,80 @@ +import io +import os +import platform +import subprocess +import unittest +from unittest import mock +from io import StringIO +from pathlib import Path +import sys +from unittest import TestCase + +from strip_ansi import strip_ansi # type: ignore + +import dangerzone.util as dzutil + + +class TestUtil(TestCase): + + VERSION_FILE_NAME = "version.txt" + + def setUp(self): + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + sys.dangerzone_dev = True + + def test_dev_mode(self): + self.assertTrue(dzutil.dev_mode()) + + def test__dev_root_path(self): + current_dir = Path().resolve() + root_path = dzutil._dev_root_path() + self.assertTrue( + current_dir.samefile(root_path), + msg=f"{current_dir} is not the same file as {root_path}", + ) + + def test_get_resource_path(self): + share_dir = Path("share").resolve() + resource_path = Path(dzutil.get_resource_path(self.VERSION_FILE_NAME)).parent + self.assertTrue( + share_dir.samefile(resource_path), + msg=f"{share_dir} is not the same file as {resource_path}", + ) + + @unittest.skipUnless(platform.system() == "Windows", "STARTUPINFO is for Windows") + def test_get_subprocess_startupinfo(self): + startupinfo = dzutil.get_subprocess_startupinfo() + self.assertIsInstance(startupinfo, subprocess.STARTUPINFO) + + def test__get_version(self): + version = dzutil._get_version() + semver_pattern = ( + r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(" + r"?:\.(?:0|[;1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[" + r"0-9a-zA-Z-]+)*))?$" + ) + self.assertRegex( + version, + semver_pattern, + f"{version} is not a semantic version, see .", + ) + + @unittest.mock.patch("sys.stdout", new_callable=io.StringIO) + def test_display_banner(self, mock_stdout: StringIO): + dzutil.display_banner() # call the test subject + banner = mock_stdout.getvalue() + plain_lines = [strip_ansi(line) for line in banner.splitlines()] + with self.subTest("banner top border"): + self.assertEqual("╭──────────────────────────╮", plain_lines[0]) + with self.subTest("banner bottom border"): + self.assertEqual("╰──────────────────────────╯", plain_lines[14]) + with self.subTest("banner consistent dimensions"): + width = len(plain_lines[0]) + for line in plain_lines: + self.assertEqual(len(line), width) + + @unittest.mock.patch("sys.stdout", new_callable=io.StringIO) + def test_display_banner_dims(self, mock_stdout: StringIO): + dzutil.display_banner() # call the test subject + banner = mock_stdout.getvalue() + banner_lines = banner.splitlines() diff --git a/dangerzone/util.py b/dangerzone/util.py index 215ba12..a2ab333 100644 --- a/dangerzone/util.py +++ b/dangerzone/util.py @@ -8,10 +8,10 @@ import shutil import subprocess import sys import appdirs +from colorama import Back, Fore, Style # type: ignore # If a general-purpose function or constant doesn't depend on anything else in the dangerzone package, # then it belongs here. -from colorama import Back, Fore, Style # type: ignore SYSTEM = platform.system() diff --git a/pyproject.toml b/pyproject.toml index be017b8..96f9124 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ typing-extensions = "*" types-Pillow = "*" types-appdirs = "*" pyxdg = "*" +mypy = "*" [tool.poetry.scripts] dangerzone = 'dangerzone:main' diff --git a/test_docs/edge_cases/sample with spaces.pdf b/test_docs/edge_cases/sample with spaces.pdf new file mode 100644 index 0000000..111284d Binary files /dev/null and b/test_docs/edge_cases/sample with spaces.pdf differ diff --git a/test_docs/edge_cases/Оригинал.png b/test_docs/edge_cases/Оригинал.png new file mode 100644 index 0000000..ff04dcc Binary files /dev/null and b/test_docs/edge_cases/Оригинал.png differ diff --git a/test_docs/edge_cases/“Curly Quotes”_ and (O)the'r Mes$y Characters_.pdf b/test_docs/edge_cases/“Curly Quotes”_ and (O)the'r Mes$y Characters_.pdf new file mode 100644 index 0000000..111284d Binary files /dev/null and b/test_docs/edge_cases/“Curly Quotes”_ and (O)the'r Mes$y Characters_.pdf differ