Replace download-tessdata with inventory.py sync

This commit is contained in:
Alex Pyrgiotis 2025-05-07 15:32:58 +03:00
parent 05deb6c464
commit 57f59c77d3
No known key found for this signature in database
GPG key ID: B6C15EBA0357C9AA
6 changed files with 88 additions and 91 deletions

View file

@ -75,33 +75,8 @@ jobs:
name: container.tar name: container.tar
path: share/container.tar path: share/container.tar
download-tessdata:
name: Download and cache Tesseract data
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Cache Tessdata
id: cache-tessdata
uses: actions/cache@v4
with:
path: share/tessdata/
key: v1-tessdata-${{ hashFiles('./install/common/download-tessdata.py') }}
enableCrossOsArchive: true
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Download Tessdata
run: |-
if [ -f "share/tessdata" ]; then
echo "Already cached, skipping"
else
python3 ./install/common/download-tessdata.py
fi
windows: windows:
runs-on: windows-latest runs-on: windows-latest
needs:
- download-tessdata
env: env:
DUMMY_CONVERSION: 1 DUMMY_CONVERSION: 1
steps: steps:
@ -111,13 +86,18 @@ jobs:
python-version: "3.12" python-version: "3.12"
- run: pip install poetry - run: pip install poetry
- run: poetry install - run: poetry install
- name: Restore cached tessdata - name: Cache inventory
uses: actions/cache/restore@v4 uses: actions/cache@v4
with: with:
path: share/tessdata/ path: share/tessdata/
enableCrossOsArchive: true key: v1-inventory-windows-${{ hashFiles('./inventory.lock') }}
fail-on-cache-miss: true - name: Sync inventory
key: v1-tessdata-${{ hashFiles('./install/common/download-tessdata.py') }} run: |-
if [ -f "share/tessdata" ]; then
echo "Already cached, skipping"
else
poetry run python3 ./dev_scripts/inventory.py sync
fi
- name: Run CLI tests - name: Run CLI tests
run: poetry run make test run: poetry run make test
- name: Set up .NET CLI environment - name: Set up .NET CLI environment
@ -142,8 +122,6 @@ jobs:
macOS: macOS:
name: "macOS (${{ matrix.arch }})" name: "macOS (${{ matrix.arch }})"
runs-on: ${{ matrix.runner }} runs-on: ${{ matrix.runner }}
needs:
- download-tessdata
strategy: strategy:
matrix: matrix:
include: include:
@ -158,15 +136,20 @@ jobs:
- uses: actions/setup-python@v5 - uses: actions/setup-python@v5
with: with:
python-version: "3.12" python-version: "3.12"
- name: Restore cached tessdata
uses: actions/cache/restore@v4
with:
path: share/tessdata/
enableCrossOsArchive: true
fail-on-cache-miss: true
key: v1-tessdata-${{ hashFiles('./install/common/download-tessdata.py') }}
- run: pip install poetry - run: pip install poetry
- run: poetry install - run: poetry install
- name: Cache inventory
uses: actions/cache@v4
with:
path: share/tessdata/
key: v1-inventory-darwin-${{ matrix.arch }}-${{ hashFiles('./inventory.lock') }}
- name: Sync inventory
run: |-
if [ -f "share/tessdata" ]; then
echo "Already cached, skipping"
else
poetry run python3 ./dev_scripts/inventory.py sync
fi
- name: Run CLI tests - name: Run CLI tests
run: poetry run make test run: poetry run make test
- name: Build macOS app - name: Build macOS app
@ -206,7 +189,7 @@ jobs:
- uses: actions/setup-python@v5 - uses: actions/setup-python@v5
with: with:
python-version: "3.10" python-version: "3.11"
- name: Login to GHCR - name: Login to GHCR
run: | run: |
@ -379,7 +362,6 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: needs:
- build-container-image - build-container-image
- download-tessdata
strategy: strategy:
matrix: matrix:
include: include:
@ -410,7 +392,27 @@ jobs:
- uses: actions/setup-python@v5 - uses: actions/setup-python@v5
with: with:
python-version: "3.10" python-version: "3.11"
- name: Install inventory dependencies
run: |
sudo apt install pipx
pipx install poetry
poetry install
- name: Cache inventory
uses: actions/cache@v4
with:
path: share/tessdata/
key: v1-inventory-linux-${{ hashFiles('./inventory.lock') }}
- name: Sync inventory
run: |-
if [ -f "share/tessdata" ]; then
echo "Already cached, skipping"
else
poetry run python3 ./dev_scripts/inventory.py sync
fi
- name: Login to GHCR - name: Login to GHCR
run: | run: |
@ -436,14 +438,6 @@ jobs:
share/image-id.txt share/image-id.txt
fail-on-cache-miss: true fail-on-cache-miss: true
- name: Restore cached tessdata
uses: actions/cache/restore@v4
with:
path: share/tessdata/
enableCrossOsArchive: true
fail-on-cache-miss: true
key: v1-tessdata-${{ hashFiles('./install/common/download-tessdata.py') }}
- name: Setup xvfb (Linux) - name: Setup xvfb (Linux)
run: | run: |
sudo apt update sudo apt update
@ -473,7 +467,7 @@ jobs:
# file successfully. # file successfully.
xvfb-run -s '-ac' ./dev_scripts/env.py --distro ${{ matrix.distro }} --version ${{ matrix.version }} run --dev \ xvfb-run -s '-ac' ./dev_scripts/env.py --distro ${{ matrix.distro }} --version ${{ matrix.version }} run --dev \
bash -c 'cd dangerzone; poetry run make test' bash -c 'cd dangerzone; poetry run make test'
- name: Upload PDF diffs - name: Upload PDF diffs
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:

View file

@ -75,10 +75,10 @@ Build the latest container:
python3 ./install/common/build-image.py python3 ./install/common/build-image.py
``` ```
Download the OCR language data: Download the necessary assets:
```sh ```sh
python3 ./install/common/download-tessdata.py poetry run python3 ./dev_scripts/inventory.py sync
``` ```
Run from source tree: Run from source tree:
@ -137,10 +137,10 @@ Build the latest container:
python3 ./install/common/build-image.py python3 ./install/common/build-image.py
``` ```
Download the OCR language data: Download the OCR language data and other useful assets:
```sh ```sh
python3 ./install/common/download-tessdata.py python3 ./dev_scripts/inventory sync
``` ```
Run from source tree: Run from source tree:
@ -342,10 +342,10 @@ Build the dangerzone container image:
python3 ./install/common/build-image.py python3 ./install/common/build-image.py
``` ```
Download the OCR language data: Download the necessary assets:
```sh ```sh
python3 ./install/common/download-tessdata.py poetry run python3 ./dev_scripts/inventory.py sync
``` ```
Run from source tree: Run from source tree:
@ -409,10 +409,10 @@ Build the dangerzone container image:
python3 .\install\common\build-image.py python3 .\install\common\build-image.py
``` ```
Download the OCR language data: Download the necessary assets:
```sh ```sh
python3 .\install\common\download-tessdata.py poetry run python3 .\dev_scripts\inventory.py sync
``` ```
After that you can launch dangerzone during development with: After that you can launch dangerzone during development with:

10
QA.md
View file

@ -26,7 +26,7 @@ poetry run ./dev_scripts/qa.py {distro}-{version}
- [ ] Create a new development environment with Poetry. - [ ] Create a new development environment with Poetry.
- [ ] Build the container image and ensure the development environment uses - [ ] Build the container image and ensure the development environment uses
the new image. the new image.
- [ ] Download the OCR language data using `./install/common/download-tessdata.py` - [ ] Download the necessary assets using `./dev_scripts/inventory.py sync`
- [ ] Run the Dangerzone tests. - [ ] Run the Dangerzone tests.
- [ ] Build and run the Dangerzone .exe - [ ] Build and run the Dangerzone .exe
- [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below). - [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below).
@ -35,7 +35,7 @@ poetry run ./dev_scripts/qa.py {distro}-{version}
- [ ] Create a new development environment with Poetry. - [ ] Create a new development environment with Poetry.
- [ ] Build the container image and ensure the development environment uses - [ ] Build the container image and ensure the development environment uses
the new image. the new image.
- [ ] Download the OCR language data using `./install/common/download-tessdata.py` - [ ] Download the necessary assets using `./dev_scripts/inventory.py sync`
- [ ] Run the Dangerzone tests. - [ ] Run the Dangerzone tests.
- [ ] Create and run an app bundle. - [ ] Create and run an app bundle.
- [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below). - [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below).
@ -44,7 +44,7 @@ poetry run ./dev_scripts/qa.py {distro}-{version}
- [ ] Create a new development environment with Poetry. - [ ] Create a new development environment with Poetry.
- [ ] Build the container image and ensure the development environment uses - [ ] Build the container image and ensure the development environment uses
the new image. the new image.
- [ ] Download the OCR language data using `./install/common/download-tessdata.py` - [ ] Download the necessary assets using `./dev_scripts/inventory.py sync`
- [ ] Run the Dangerzone tests. - [ ] Run the Dangerzone tests.
- [ ] Create and run an app bundle. - [ ] Create and run an app bundle.
- [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below). - [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below).
@ -53,7 +53,7 @@ poetry run ./dev_scripts/qa.py {distro}-{version}
- [ ] Create a new development environment with Poetry. - [ ] Create a new development environment with Poetry.
- [ ] Build the container image and ensure the development environment uses - [ ] Build the container image and ensure the development environment uses
the new image. the new image.
- [ ] Download the OCR language data using `./install/common/download-tessdata.py` - [ ] Download the necessary assets using `./dev_scripts/inventory.py sync`
- [ ] Run the Dangerzone tests. - [ ] Run the Dangerzone tests.
- [ ] Create a .deb package and install it system-wide. - [ ] Create a .deb package and install it system-wide.
- [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below). - [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below).
@ -62,7 +62,7 @@ poetry run ./dev_scripts/qa.py {distro}-{version}
- [ ] Create a new development environment with Poetry. - [ ] Create a new development environment with Poetry.
- [ ] Build the container image and ensure the development environment uses - [ ] Build the container image and ensure the development environment uses
the new image. the new image.
- [ ] Download the OCR language data using `./install/common/download-tessdata.py` - [ ] Download the necessary assets using `./dev_scripts/inventory.py sync`
- [ ] Run the Dangerzone tests. - [ ] Run the Dangerzone tests.
- [ ] Create an .rpm package and install it system-wide. - [ ] Create an .rpm package and install it system-wide.
- [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below). - [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below).

View file

@ -145,11 +145,11 @@ Here is what you need to do:
poetry sync poetry sync
``` ```
- [ ] Build the container image and the OCR language data - [ ] Build the container image and download the necessary assets
```bash ```bash
poetry run ./install/common/build-image.py poetry run ./install/common/build-image.py
poetry run ./install/common/download-tessdata.py poetry run ./dev_scripts/inventory.py sync
# Copy the container image to the assets folder # Copy the container image to the assets folder
cp share/container.tar ~dz/release-assets/$VERSION/dangerzone-$VERSION-arm64.tar cp share/container.tar ~dz/release-assets/$VERSION/dangerzone-$VERSION-arm64.tar
@ -230,6 +230,7 @@ The Windows release is performed in a Windows 11 virtual machine (as opposed to
- [ ] Copy the container image into the VM - [ ] Copy the container image into the VM
> [!IMPORTANT] > [!IMPORTANT]
> Instead of running `python .\install\windows\build-image.py` in the VM, run the build image script on the host (making sure to build for `linux/amd64`). Copy `share/container.tar` and `share/image-id.txt` from the host into the `share` folder in the VM. > Instead of running `python .\install\windows\build-image.py` in the VM, run the build image script on the host (making sure to build for `linux/amd64`). Copy `share/container.tar` and `share/image-id.txt` from the host into the `share` folder in the VM.
- [ ] Download the necessary assets with `poetry run .\dev_scripts\inventory.py sync`
- [ ] Run `poetry run .\install\windows\build-app.bat` - [ ] Run `poetry run .\install\windows\build-app.bat`
- [ ] When you're done you will have `dist\Dangerzone.msi` - [ ] When you're done you will have `dist\Dangerzone.msi`

View file

@ -45,7 +45,7 @@ poetry run ./dev_scripts/qa.py {distro}-{version}
- [ ] Create a new development environment with Poetry. - [ ] Create a new development environment with Poetry.
- [ ] Build the container image and ensure the development environment uses - [ ] Build the container image and ensure the development environment uses
the new image. the new image.
- [ ] Download the OCR language data using `./install/common/download-tessdata.py` - [ ] Download the necessary assets using `./dev_scripts/inventory.py sync`
- [ ] Run the Dangerzone tests. - [ ] Run the Dangerzone tests.
- [ ] Build and run the Dangerzone .exe - [ ] Build and run the Dangerzone .exe
- [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below). - [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below).
@ -54,7 +54,7 @@ poetry run ./dev_scripts/qa.py {distro}-{version}
- [ ] Create a new development environment with Poetry. - [ ] Create a new development environment with Poetry.
- [ ] Build the container image and ensure the development environment uses - [ ] Build the container image and ensure the development environment uses
the new image. the new image.
- [ ] Download the OCR language data using `./install/common/download-tessdata.py` - [ ] Download the necessary assets using `./dev_scripts/inventory.py sync`
- [ ] Run the Dangerzone tests. - [ ] Run the Dangerzone tests.
- [ ] Create and run an app bundle. - [ ] Create and run an app bundle.
- [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below). - [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below).
@ -63,7 +63,7 @@ poetry run ./dev_scripts/qa.py {distro}-{version}
- [ ] Create a new development environment with Poetry. - [ ] Create a new development environment with Poetry.
- [ ] Build the container image and ensure the development environment uses - [ ] Build the container image and ensure the development environment uses
the new image. the new image.
- [ ] Download the OCR language data using `./install/common/download-tessdata.py` - [ ] Download the necessary assets using `./dev_scripts/inventory.py sync`
- [ ] Run the Dangerzone tests. - [ ] Run the Dangerzone tests.
- [ ] Create and run an app bundle. - [ ] Create and run an app bundle.
- [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below). - [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below).
@ -72,7 +72,7 @@ poetry run ./dev_scripts/qa.py {distro}-{version}
- [ ] Create a new development environment with Poetry. - [ ] Create a new development environment with Poetry.
- [ ] Build the container image and ensure the development environment uses - [ ] Build the container image and ensure the development environment uses
the new image. the new image.
- [ ] Download the OCR language data using `./install/common/download-tessdata.py` - [ ] Download the necessary assets using `./dev_scripts/inventory.py sync`
- [ ] Run the Dangerzone tests. - [ ] Run the Dangerzone tests.
- [ ] Create a .deb package and install it system-wide. - [ ] Create a .deb package and install it system-wide.
- [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below). - [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below).
@ -81,7 +81,7 @@ poetry run ./dev_scripts/qa.py {distro}-{version}
- [ ] Create a new development environment with Poetry. - [ ] Create a new development environment with Poetry.
- [ ] Build the container image and ensure the development environment uses - [ ] Build the container image and ensure the development environment uses
the new image. the new image.
- [ ] Download the OCR language data using `./install/common/download-tessdata.py` - [ ] Download the necessary assets using `./dev_scripts/inventory.py sync`
- [ ] Run the Dangerzone tests. - [ ] Run the Dangerzone tests.
- [ ] Create an .rpm package and install it system-wide. - [ ] Create an .rpm package and install it system-wide.
- [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below). - [ ] Test some QA scenarios (see [Scenarios](#Scenarios) below).
@ -292,10 +292,10 @@ Build the latest container:
python3 ./install/common/build-image.py python3 ./install/common/build-image.py
``` ```
Download the OCR language data: Download the necessary assets:
```sh ```sh
python3 ./install/common/download-tessdata.py poetry run python3 ./dev_scripts/inventory.py sync
``` ```
Run from source tree: Run from source tree:
@ -355,10 +355,10 @@ Build the latest container:
python3 ./install/common/build-image.py python3 ./install/common/build-image.py
``` ```
Download the OCR language data: Download the necessary assets:
```sh ```sh
python3 ./install/common/download-tessdata.py poetry run python3 ./dev_scripts/inventory.py sync
``` ```
Run from source tree: Run from source tree:
@ -419,10 +419,10 @@ Build the dangerzone container image:
python3 .\install\common\build-image.py python3 .\install\common\build-image.py
``` ```
Download the OCR language data: Download the necessary assets:
```sh ```sh
python3 .\install\common\download-tessdata.py poetry run python3 .\dev_scripts\inventory.py sync
``` ```
After that you can launch dangerzone during development with: After that you can launch dangerzone during development with:
@ -758,9 +758,11 @@ class QABase(abc.ABC):
self.prompt("Does it pass?", choices=["y", "n"]) self.prompt("Does it pass?", choices=["y", "n"])
logger.info("Successfully completed QA scenarios") logger.info("Successfully completed QA scenarios")
@task("Download Tesseract data", auto=True) @task("Download the necessary assets", auto=True)
def download_tessdata(self): def sync_inventory(self):
self.run("python", str(Path("install", "common", "download-tessdata.py"))) self.run(
"poetry", "run", "python", str(Path("dev_scripts", "inventory.py")), "sync"
)
@classmethod @classmethod
@abc.abstractmethod @abc.abstractmethod
@ -862,7 +864,7 @@ class QAWindows(QABase):
self.install_docker() self.install_docker()
self.install_poetry() self.install_poetry()
self.build_image() self.build_image()
self.download_tessdata() self.sync_inventory()
self.run_tests() self.run_tests()
self.build_dangerzone_exe() self.build_dangerzone_exe()
@ -954,7 +956,7 @@ class QALinux(QABase):
def start(self): def start(self):
self.build_dev_image() self.build_dev_image()
self.build_container_image() self.build_container_image()
self.download_tessdata() self.sync_inventory()
self.run_tests() self.run_tests()
self.build_package() self.build_package()
self.build_qa_image() self.build_qa_image()

18
dodo.py
View file

@ -46,8 +46,8 @@ def list_language_data():
return targets return targets
TESSDATA_DEPS = ["install/common/download-tessdata.py", "share/ocr-languages.json"] INVENTORY_DEPS = ["dev_scripts/inventory.py", "inventory.lock"]
TESSDATA_TARGETS = list_language_data() INVENTORY_TARGETS = list_language_data()
IMAGE_DEPS = [ IMAGE_DEPS = [
"Dockerfile", "Dockerfile",
@ -67,7 +67,7 @@ PYTHON_DEPS = ["poetry.lock", "pyproject.toml"]
DMG_DEPS = [ DMG_DEPS = [
*list_files("install/macos"), *list_files("install/macos"),
*TESSDATA_TARGETS, *INVENTORY_TARGETS,
*IMAGE_TARGETS, *IMAGE_TARGETS,
*PYTHON_DEPS, *PYTHON_DEPS,
*SOURCE_DEPS, *SOURCE_DEPS,
@ -174,12 +174,12 @@ def task_init_release_dir():
} }
def task_download_tessdata(): def task_sync_inventory():
"""Download the Tesseract data using ./install/common/download-tessdata.py""" """Download the necessary assets using ./dev_scripts/inventory.py sync"""
return { return {
"actions": ["python install/common/download-tessdata.py"], "actions": ["poetry run python3 dev_scripts/inventory.py sync"],
"file_dep": TESSDATA_DEPS, "file_dep": INVENTORY_DEPS,
"targets": TESSDATA_TARGETS, "targets": INVENTORY_TARGETS,
"clean": True, "clean": True,
} }
@ -233,7 +233,7 @@ def task_macos_build_dmg():
"macos_check_system", "macos_check_system",
"init_release_dir", "init_release_dir",
"poetry_install", "poetry_install",
"download_tessdata", "inventory",
], ],
"targets": [dmg_src, dmg_dst], "targets": [dmg_src, dmg_dst],
"clean": True, "clean": True,