mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00
Make PyMuPDF always log to stderr
PyMUPDF logs to stdout by default, which is problematic because we use the stdout of the conversion process to read the pixel stream of a document. Make PyMuPDF always log to stderr, by setting the following environment variables: PYMUPDF_MESSAGE and PYMUPDF_LOG. Fixes #877
This commit is contained in:
parent
08f03b4bb4
commit
3f86e7b465
2 changed files with 28 additions and 8 deletions
|
@ -3,6 +3,17 @@ import os
|
||||||
import sys
|
import sys
|
||||||
from typing import Dict, Optional
|
from typing import Dict, Optional
|
||||||
|
|
||||||
|
# XXX: PyMUPDF logs to stdout by default [1]. The PyMuPDF devs provide a way [2] to log to
|
||||||
|
# stderr, but it's based on environment variables. These envvars are consulted at import
|
||||||
|
# time [3], so we have to set them here, before we import `fitz`.
|
||||||
|
#
|
||||||
|
# [1] https://github.com/freedomofpress/dangerzone/issues/877
|
||||||
|
# [2] https://github.com/pymupdf/PyMuPDF/issues/3135#issuecomment-1992625724
|
||||||
|
# [3] https://github.com/pymupdf/PyMuPDF/blob/9717935eeb2d50d15440d62575878214226795f9/src/__init__.py#L62-L63
|
||||||
|
os.environ["PYMUPDF_MESSAGE"] = "fd:2"
|
||||||
|
os.environ["PYMUPDF_LOG"] = "fd:2"
|
||||||
|
|
||||||
|
|
||||||
import fitz
|
import fitz
|
||||||
import magic
|
import magic
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,16 @@ from typing import Optional
|
||||||
|
|
||||||
from .common import DEFAULT_DPI, DangerzoneConverter, get_tessdata_dir, running_on_qubes
|
from .common import DEFAULT_DPI, DangerzoneConverter, get_tessdata_dir, running_on_qubes
|
||||||
|
|
||||||
|
# XXX: PyMUPDF logs to stdout by default [1]. The PyMuPDF devs provide a way [2] to log to
|
||||||
|
# stderr, but it's based on environment variables. These envvars are consulted at import
|
||||||
|
# time [3], so we have to set them here, before we import `fitz`.
|
||||||
|
#
|
||||||
|
# [1] https://github.com/freedomofpress/dangerzone/issues/877
|
||||||
|
# [2] https://github.com/pymupdf/PyMuPDF/issues/3135#issuecomment-1992625724
|
||||||
|
# [3] https://github.com/pymupdf/PyMuPDF/blob/9717935eeb2d50d15440d62575878214226795f9/src/__init__.py#L62-L63
|
||||||
|
os.environ["PYMUPDF_MESSAGE"] = "fd:2"
|
||||||
|
os.environ["PYMUPDF_LOG"] = "fd:2"
|
||||||
|
|
||||||
|
|
||||||
class PixelsToPDF(DangerzoneConverter):
|
class PixelsToPDF(DangerzoneConverter):
|
||||||
async def convert(
|
async def convert(
|
||||||
|
@ -50,14 +60,13 @@ class PixelsToPDF(DangerzoneConverter):
|
||||||
# The first few operations happen on a per-page basis.
|
# The first few operations happen on a per-page basis.
|
||||||
page_size = len(untrusted_rgb_data)
|
page_size = len(untrusted_rgb_data)
|
||||||
total_size += page_size
|
total_size += page_size
|
||||||
with contextlib.redirect_stdout(io.StringIO()):
|
pixmap = fitz.Pixmap(
|
||||||
pixmap = fitz.Pixmap(
|
fitz.Colorspace(fitz.CS_RGB),
|
||||||
fitz.Colorspace(fitz.CS_RGB),
|
width,
|
||||||
width,
|
height,
|
||||||
height,
|
untrusted_rgb_data,
|
||||||
untrusted_rgb_data,
|
False,
|
||||||
False,
|
)
|
||||||
)
|
|
||||||
pixmap.set_dpi(DEFAULT_DPI, DEFAULT_DPI)
|
pixmap.set_dpi(DEFAULT_DPI, DEFAULT_DPI)
|
||||||
if ocr_lang: # OCR the document
|
if ocr_lang: # OCR the document
|
||||||
self.update_progress(
|
self.update_progress(
|
||||||
|
|
Loading…
Reference in a new issue