From 3a1b41f988f40323a8b78e0cdc33aad48439e12b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20M=C3=A9taireau?= Date: Thu, 31 Oct 2024 14:21:18 +0100 Subject: [PATCH 1/5] docs: Add a step to download tesseract data in the RELEASE notes --- RELEASE.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index 89141cf..3248fa7 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -76,6 +76,7 @@ and newer platforms, we have to do the following: - [ ] Create a new development environment with Poetry. - [ ] Build the container image and ensure the development environment uses the new image. + - [ ] Download the OCR language data using `./install/common/download-tessdata.py` - [ ] Run the Dangerzone tests. - [ ] Build and run the Dangerzone .exe - [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below). @@ -84,6 +85,7 @@ and newer platforms, we have to do the following: - [ ] Create a new development environment with Poetry. - [ ] Build the container image and ensure the development environment uses the new image. + - [ ] Download the OCR language data using `./install/common/download-tessdata.py` - [ ] Run the Dangerzone tests. - [ ] Create and run an app bundle. - [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below). @@ -92,6 +94,7 @@ and newer platforms, we have to do the following: - [ ] Create a new development environment with Poetry. - [ ] Build the container image and ensure the development environment uses the new image. + - [ ] Download the OCR language data using `./install/common/download-tessdata.py` - [ ] Run the Dangerzone tests. - [ ] Create and run an app bundle. - [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below). @@ -100,6 +103,7 @@ and newer platforms, we have to do the following: - [ ] Create a new development environment with Poetry. - [ ] Build the container image and ensure the development environment uses the new image. + - [ ] Download the OCR language data using `./install/common/download-tessdata.py` - [ ] Run the Dangerzone tests. - [ ] Create a .deb package and install it system-wide. - [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below). @@ -108,6 +112,7 @@ and newer platforms, we have to do the following: - [ ] Create a new development environment with Poetry. - [ ] Build the container image and ensure the development environment uses the new image. + - [ ] Download the OCR language data using `./install/common/download-tessdata.py` - [ ] Run the Dangerzone tests. - [ ] Create an .rpm package and install it system-wide. - [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below). From 1f42d6825940f173f0c3a6d4d343938a05998eb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20M=C3=A9taireau?= Date: Mon, 4 Nov 2024 12:53:33 +0100 Subject: [PATCH 2/5] Update QA script to support Fedora 41 --- dev_scripts/qa.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dev_scripts/qa.py b/dev_scripts/qa.py index 8bc95b7..42fd940 100755 --- a/dev_scripts/qa.py +++ b/dev_scripts/qa.py @@ -1005,6 +1005,10 @@ class QAFedora(QALinux): ) +class QAFedora41(QAFedora): + VERSION = "41" + + class QAFedora40(QAFedora): VERSION = "40" From 10c52dbf8cc8516514cb801e1080abfbe57501ca Mon Sep 17 00:00:00 2001 From: Alex Pyrgiotis Date: Mon, 4 Nov 2024 15:48:53 +0200 Subject: [PATCH 3/5] WIP: Improve Windows QA --- dev_scripts/qa.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/dev_scripts/qa.py b/dev_scripts/qa.py index 42fd940..2413866 100755 --- a/dev_scripts/qa.py +++ b/dev_scripts/qa.py @@ -3,14 +3,20 @@ import abc import argparse import difflib +import json import logging import re import selectors import subprocess import sys +import urllib.request logger = logging.getLogger(__name__) +PYTHON_VERSION_STR = "3.12" +PYTHON_VERSION = [int(num) for num in PYTHON_VERSION_STR.split(".")] +EOL_PYTHON_URL = "https://endoflife.date/api/python.json" + CONTENT_QA = r"""## QA To ensure that new releases do not introduce regressions, and support existing @@ -802,6 +808,26 @@ class QAWindows(QABase): while msvcrt.kbhit(): msvcrt.getch() + @QABase.task(f"Install the latest version of Python {PYTHON_VERSION_STR}", ref=REF_BUILD) + def install_python(self): + cur_version = list(sys.version_info[:3]) + + logger.info("Getting latest Python release") + with urllib.request.urlopen(EOL_PYTHON_URL) as f: + resp = f.read() + releases = json.loads(resp) + for release in releases: + if release["cycle"] == PYTHON_VERSION_STR: + latest_version = [int(num) for num in release["latest"].split(".")] + if latest_version > cur_version: + self.prompt(f"You need to install the latest Python version ({release['latest']})") + elif latest_version == cur_version: + logger.info(f"Verified that the latest Python version ({release['latest']}) is installed") + return + + logger.error("Could not verify that the latest Python version is installed") + + @QABase.task("Install and Run Docker Desktop", ref=REF_BUILD) def install_docker(self): logger.info("Checking if Docker Desktop is installed and running") @@ -816,12 +842,16 @@ class QAWindows(QABase): ) def install_poetry(self): self.run("python", "-m", "pip", "install", "poetry") - self.run("poetry", "install") + self.run("poetry", "install", "--sync") @QABase.task("Build Dangerzone container image", ref=REF_BUILD, auto=True) def build_image(self): self.run("python", r".\install\common\build-image.py") + @QABase.task("Download Tesseract data", ref=REF_BUILD, auto=True) + def download_tessdata(self): + self.run("python", r".\install\common\download-tessdata.py") + @QABase.task("Run tests", ref="REF_BUILD", auto=True) def run_tests(self): # NOTE: Windows does not have Makefile by default. @@ -838,9 +868,11 @@ class QAWindows(QABase): return "windows" def start(self): + self.install_python() self.install_docker() self.install_poetry() self.build_image() + self.download_tessdata() self.run_tests() self.build_dangerzone_exe() From 0bb13f9d7b31d612e7b71e00e3a836ae491945aa Mon Sep 17 00:00:00 2001 From: Alex Pyrgiotis Date: Mon, 4 Nov 2024 15:52:35 +0200 Subject: [PATCH 4/5] Increase the size of the `dz` qube to 5GiB Increase the size of the `dz` qube in our build instructions. We increase it from 2GiB (default), to 5GiB (suggested), in order to cater for some extra space that our build instructions need (e.g., the download of the Tesseract data). --- BUILD.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/BUILD.md b/BUILD.md index c395f50..5845a65 100644 --- a/BUILD.md +++ b/BUILD.md @@ -260,11 +260,16 @@ The following instructions require typing commands in a terminal in dom0. ``` qvm-create --class AppVM --label red --template fedora-40-dz dz + qvm-volume resize dz:private $(numfmt --from=auto 5Gi) ``` > :bulb: Alternatively, you can use a different app qube for Dangerzone > development. In that case, replace `dz` with the qube of your choice in the > steps below. + > + > In the commands above, we also resize the private volume of the `dz` qube + > to 5GiB, since the Tesseract data that will be downloaded in the next steps + > take a bit of space. 4. Add an RPC policy (`/etc/qubes/policy.d/50-dangerzone.policy`) that will allow launching a disposable qube (`dz-dvm`) when Dangerzone converts a From ca3e634b49111b635fe84a49fa1657958cdebb21 Mon Sep 17 00:00:00 2001 From: Alex Pyrgiotis Date: Mon, 4 Nov 2024 16:17:35 +0200 Subject: [PATCH 5/5] Update our description --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 137f0c4..b637a5b 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Take potentially dangerous PDFs, office documents, or images and convert them to | ![Settings](./assets/screenshot1.png) | ![Converting](./assets/screenshot2.png) |--|--| -Dangerzone works like this: You give it a document that you don't know if you can trust (for example, an email attachment). Inside of a sandbox, Dangerzone converts the document to a PDF (if it isn't already one), and then converts the PDF into raw pixel data: a huge list of RGB color values for each page. Then, in a separate sandbox, Dangerzone takes this pixel data and converts it back into a PDF. +Dangerzone works like this: You give it a document that you don't know if you can trust (for example, an email attachment). Inside of a sandbox, Dangerzone converts the document to a PDF (if it isn't already one), and then converts the PDF into raw pixel data: a huge list of RGB color values for each page. Then, outside of the sandbox, Dangerzone takes this pixel data and converts it back into a PDF. _Read more about Dangerzone in the [official site](https://dangerzone.rocks/about/)._