Make PyMuPDF always log to stderr

PyMUPDF logs to stdout by default, which is problematic because we use
the stdout of the conversion process to read the pixel stream of a
document.

Make PyMuPDF always log to stderr, by setting the following environment
variables: PYMUPDF_MESSAGE and PYMUPDF_LOG.

Fixes #877
This commit is contained in:
Alex Pyrgiotis 2024-07-25 12:41:23 +03:00
parent 08f03b4bb4
commit 3f86e7b465
No known key found for this signature in database
GPG key ID: B6C15EBA0357C9AA
2 changed files with 28 additions and 8 deletions

View file

@ -3,6 +3,17 @@ import os
import sys import sys
from typing import Dict, Optional from typing import Dict, Optional
# XXX: PyMUPDF logs to stdout by default [1]. The PyMuPDF devs provide a way [2] to log to
# stderr, but it's based on environment variables. These envvars are consulted at import
# time [3], so we have to set them here, before we import `fitz`.
#
# [1] https://github.com/freedomofpress/dangerzone/issues/877
# [2] https://github.com/pymupdf/PyMuPDF/issues/3135#issuecomment-1992625724
# [3] https://github.com/pymupdf/PyMuPDF/blob/9717935eeb2d50d15440d62575878214226795f9/src/__init__.py#L62-L63
os.environ["PYMUPDF_MESSAGE"] = "fd:2"
os.environ["PYMUPDF_LOG"] = "fd:2"
import fitz import fitz
import magic import magic

View file

@ -16,6 +16,16 @@ from typing import Optional
from .common import DEFAULT_DPI, DangerzoneConverter, get_tessdata_dir, running_on_qubes from .common import DEFAULT_DPI, DangerzoneConverter, get_tessdata_dir, running_on_qubes
# XXX: PyMUPDF logs to stdout by default [1]. The PyMuPDF devs provide a way [2] to log to
# stderr, but it's based on environment variables. These envvars are consulted at import
# time [3], so we have to set them here, before we import `fitz`.
#
# [1] https://github.com/freedomofpress/dangerzone/issues/877
# [2] https://github.com/pymupdf/PyMuPDF/issues/3135#issuecomment-1992625724
# [3] https://github.com/pymupdf/PyMuPDF/blob/9717935eeb2d50d15440d62575878214226795f9/src/__init__.py#L62-L63
os.environ["PYMUPDF_MESSAGE"] = "fd:2"
os.environ["PYMUPDF_LOG"] = "fd:2"
class PixelsToPDF(DangerzoneConverter): class PixelsToPDF(DangerzoneConverter):
async def convert( async def convert(
@ -50,7 +60,6 @@ class PixelsToPDF(DangerzoneConverter):
# The first few operations happen on a per-page basis. # The first few operations happen on a per-page basis.
page_size = len(untrusted_rgb_data) page_size = len(untrusted_rgb_data)
total_size += page_size total_size += page_size
with contextlib.redirect_stdout(io.StringIO()):
pixmap = fitz.Pixmap( pixmap = fitz.Pixmap(
fitz.Colorspace(fitz.CS_RGB), fitz.Colorspace(fitz.CS_RGB),
width, width,