Fix DPI mismatch between doc2pixels and pixels2pdf

The original document was larger in dimensions than the original one due
to a mismatch in DPI settings. When converting documents to pixels we
were setting the DPI to 150 pixels per inch. Then when converting back
into a PDF we were using 70 DPI. This difference would result in an
overall larger document in dimensions (though not necessarily in file
size).

Fixes #626
This commit is contained in:
deeplow 2023-12-04 16:38:53 +00:00
parent e5dbe25abb
commit 576cbd3382
No known key found for this signature in database
GPG key ID: 577982871529A52A
4 changed files with 8 additions and 3 deletions

View file

@ -7,6 +7,9 @@ since 0.4.1, and this project adheres to [Semantic Versioning](https://semver.or
## Unreleased
## Fixed
- Fix mismatched between between original document and converted one ([issue #626](https://github.com/freedomofpress/dangerzone/issues/)). This does not affect the quality of the final document.
### Changed
- Feature: Add support for HWP/HWPX files (Hancom Office) for macOS Apple Silicon devices ([issue #498](https://github.com/freedomofpress/dangerzone/issues/498), thanks to [@OctopusET](https://github.com/OctopusET))

View file

@ -15,6 +15,7 @@ from typing import Callable, Dict, List, Optional, Tuple, Union
TIMEOUT_PER_PAGE: float = 30 # (seconds)
TIMEOUT_PER_MB: float = 30 # (seconds)
TIMEOUT_MIN: float = 60 # (seconds)
DEFAULT_DPI = 150 # Pixels per inch
def running_on_qubes() -> bool:

View file

@ -19,7 +19,7 @@ import fitz
import magic
from . import errors
from .common import DangerzoneConverter, running_on_qubes
from .common import DEFAULT_DPI, DangerzoneConverter, running_on_qubes
class DocumentToPixels(DangerzoneConverter):
@ -245,7 +245,7 @@ class DocumentToPixels(DangerzoneConverter):
self.update_progress(
f"Converting page {page_num}/{doc.page_count} to pixels"
)
pix = page.get_pixmap(dpi=150)
pix = page.get_pixmap(dpi=DEFAULT_DPI)
rgb_buf = pix.samples_mv
await self.write_page_width(pix.width, width_filename)
await self.write_page_height(pix.height, height_filename)

View file

@ -15,7 +15,7 @@ from typing import Optional
import fitz
from .common import DangerzoneConverter, get_tessdata_dir, running_on_qubes
from .common import DEFAULT_DPI, DangerzoneConverter, get_tessdata_dir, running_on_qubes
class PixelsToPDF(DangerzoneConverter):
@ -52,6 +52,7 @@ class PixelsToPDF(DangerzoneConverter):
pixmap = fitz.Pixmap(
fitz.Colorspace(fitz.CS_RGB), width, height, untrusted_rgb_data, False
)
pixmap.set_dpi(DEFAULT_DPI, DEFAULT_DPI)
if ocr_lang: # OCR the document
self.update_progress(
f"Converting page {page_num}/{num_pages} from pixels to searchable PDF"