dangerzone/tests/__init__.py
deeplow b73ce5bf6a
Add large test logic and documentation
Adds a large pool of document that can and should be used prior to a
release to understand effects of the new release over a real-world
scenario.

Documents are stored in an external git LFS repo under
`tests/test_docs_large` and currently it's about 11K documents gathered
from multiple PDF readers and office suite's test sets.

Documentation on how to run the tests is under
`docs/developer/TESTING.md`
2023-08-22 16:11:31 +01:00

94 lines
2.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import sys
from pathlib import Path
from typing import Callable, List
import pytest
from dangerzone.document import SAFE_EXTENSION
sys.dangerzone_dev = True # type: ignore[attr-defined]
SAMPLE_DIRECTORY = "test_docs"
BASIC_SAMPLE_PDF = "sample-pdf.pdf"
BASIC_SAMPLE_DOC = "sample-doc.doc"
SAMPLE_EXTERNAL_DIRECTORY = "test_docs_external"
test_docs_dir = Path(__file__).parent.joinpath(SAMPLE_DIRECTORY)
test_docs = [
p
for p in test_docs_dir.rglob("*")
if p.is_file()
and not (p.name.endswith(SAFE_EXTENSION) or p.name.startswith("sample_bad"))
]
# Pytest parameter decorators
for_each_doc = pytest.mark.parametrize(
"doc", test_docs, ids=[str(doc.name) for doc in test_docs]
)
@pytest.fixture
def sample_pdf() -> str:
return str(test_docs_dir.joinpath(BASIC_SAMPLE_PDF))
# External Docs - base64 docs encoded for externally sourced documents
# XXX to reduce the chance of accidentally opening them
test_docs_external_dir = Path(__file__).parent.joinpath(SAMPLE_EXTERNAL_DIRECTORY)
@pytest.fixture
def sample_doc() -> str:
return str(test_docs_dir.joinpath(BASIC_SAMPLE_DOC))
def get_docs_external(pattern: str = "*") -> List[Path]:
if not pattern.endswith("*"):
pattern = f"{pattern}.b64"
return [
p
for p in test_docs_external_dir.rglob(pattern)
if p.is_file() and not (p.name.endswith(SAFE_EXTENSION))
]
# Pytest parameter decorators
def for_each_external_doc(glob_pattern: str = "*") -> Callable:
test_docs_external = get_docs_external(glob_pattern)
return pytest.mark.parametrize(
"doc",
test_docs_external,
ids=[str(doc.name).rstrip(".b64") for doc in test_docs_external],
)
class TestBase:
sample_doc = str(test_docs_dir.joinpath(BASIC_SAMPLE_PDF))
@pytest.fixture
def unreadable_pdf(tmp_path: Path) -> str:
file_path = tmp_path / "document.pdf"
file_path.touch(mode=0o000)
return str(file_path)
@pytest.fixture
def uncommon_text() -> str:
"""Craft a string with Unicode characters that are considered not common.
Create a string that contains the following uncommon characters:
* ANSI escape sequences: \033[31;1;4m and \033[0m
* A Unicode character that resembles an English character: greek "X" (U+03A7)
* A Unicode control character that is not part of ASCII: zero-width joiner
(U+200D)
* An emoji: Cross Mark (U+274C)
"""
return "\033[31;1;4m BaD TeΧt \u200d\033[0m"
@pytest.fixture
def sanitized_text() -> str:
"""Return a sanitized version of the uncommon_text."""
return "_[31;1;4m BaD Te_t _ _ _[0m"