Fixup: use numpy to do the comparison

This commit is contained in:
Alexis Métaireau 2025-03-10 15:44:58 +01:00
parent bbebbeb054
commit 3e5fcc2e4a
No known key found for this signature in database
GPG key ID: C65C7A89A8FFC56E

View file

@ -5,13 +5,16 @@ import copy
import os
import platform
import shutil
import subprocess
import sys
import tempfile
import time
import traceback
from pathlib import Path
from typing import Optional, Sequence
import fitz
import numpy as np
import pytest
from click.testing import CliRunner, Result
from pytest_mock import MockerFixture
@ -204,13 +207,44 @@ class TestCliConversion(TestCliBasic):
converted = fitz.open(destination)
ref = fitz.open(reference)
assert len(converted) == len(ref), "different number of pages"
errors = []
if len(converted) != len(ref):
errors.append("different number of pages")
diffs = doc.parent / "diffs"
diffs.mkdir(parents=True, exist_ok=True)
for page, ref_page in zip(converted, ref):
page.get_pixmap(dpi=150)
ref_page.get_pixmap(dpi=150)
assert page.get_pixmap().tobytes() == ref_page.get_pixmap().tobytes(), (
f"different page content for page {page.number}"
curr_pixmap = page.get_pixmap(dpi=150)
ref_pixmap = ref_page.get_pixmap(dpi=150)
if curr_pixmap.tobytes() != ref_pixmap.tobytes():
errors.append(f"page {page.number} differs")
t0 = time.perf_counter()
arr_ref = np.frombuffer(ref_pixmap.samples, dtype=np.uint8).reshape(
ref_pixmap.height, ref_pixmap.width, ref_pixmap.n
)
arr_curr = np.frombuffer(curr_pixmap.samples, dtype=np.uint8).reshape(
curr_pixmap.height, curr_pixmap.width, curr_pixmap.n
)
# Find differences (any channel differs)
diff = (arr_ref != arr_curr).any(axis=2)
# Get coordinates of differences
diff_coords = np.where(diff)
# Mark differences in red
for y, x in zip(diff_coords[0], diff_coords[1]):
# Note: PyMuPDF's set_pixel takes (x, y) not (y, x)
ref_pixmap.set_pixel(int(x), int(y), (255, 0, 0)) # Red
t1 = time.perf_counter()
print(f"diff took {t1 - t0} seconds")
ref_pixmap.save(diffs / f"{destination.stem}_{page.number}.jpeg")
if len(errors) > 0:
raise AssertionError(
f"The resulting document differs from the reference. See {str(diffs)} for a visual diff."
)
def test_output_filename(self, sample_pdf: str) -> None: