From c37ff7322d65eb19659bdca838666a7e3a29db9b Mon Sep 17 00:00:00 2001 From: Alex Pyrgiotis Date: Wed, 9 Oct 2024 21:54:42 +0300 Subject: [PATCH] FIXUP: Replace print statements with logging --- install/common/download-tessdata.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/install/common/download-tessdata.py b/install/common/download-tessdata.py index f2da96e..c35d4b3 100644 --- a/install/common/download-tessdata.py +++ b/install/common/download-tessdata.py @@ -1,13 +1,15 @@ import hashlib import io import json +import logging import pathlib -import re import subprocess import sys import tarfile import urllib.request +logger = logging.getLogger(__name__) + TESSDATA_RELEASES_URL = ( "https://api.github.com/repos/tesseract-ocr/tessdata_fast/releases/latest" ) @@ -29,6 +31,12 @@ def git_root(): def main(): + logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + share_dir = git_root() / "share" tessdata_dir = share_dir / "tessdata" @@ -41,22 +49,21 @@ def main(): expected_files = {f"{lang}.traineddata" for lang in langs_short} files = {f.name for f in tessdata_dir.iterdir()} if files == expected_files: - msg = "> Skipping tessdata download, language data already exists" - print(msg, file=sys.stderr) + logger.info("Skipping tessdata download, language data already exists") return else: - print(f"Found {tessdata_dir} but contents do not match", file=sys.stderr) + logger.info(f"Found {tessdata_dir} but contents do not match") return 1 # Get latest release of Tesseract data. - print(f"> Getting latest tessdata release", file=sys.stderr) + logger.info("Getting latest tessdata release") with urllib.request.urlopen(TESSDATA_RELEASES_URL) as f: resp = f.read() releases = json.loads(resp) tag = releases["tag_name"] # Get latest release of Tesseract data. - print(f"> Downloading tessdata release {tag}", file=sys.stderr) + logger.info(f"Downloading tessdata release {tag}") archive_url = TESSDATA_ARCHIVE_URL.format(tessdata_version=tag) with urllib.request.urlopen(archive_url) as f: archive = f.read() @@ -65,12 +72,11 @@ def main(): raise RuntimeError(f"Checksum mismatch {digest} != {TESSDATA_CHECKSUM}") # Extract the languages models from the tessdata archive. - print(f"> Extracting tessdata archive into {tessdata_dir}", file=sys.stderr) + logger.info(f"Extracting tessdata archive into {tessdata_dir}") with tarfile.open(fileobj=io.BytesIO(archive)) as t: for lang in langs_short: member = f"tessdata_fast-{tag}/{lang}.traineddata" - print(f">> Extracting {member}") - t.extract(member=member, path=share_dir, set_attrs=False) + logger.info(f"Extracting {member}") tessdata_dl_dir = share_dir / f"tessdata_fast-{tag}" tessdata_dl_dir.rename(tessdata_dir)