mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-29 18:22:37 +02:00
FIXUP: Replace print statements with logging
This commit is contained in:
parent
8db9261ccf
commit
c37ff7322d
1 changed files with 15 additions and 9 deletions
|
@ -1,13 +1,15 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import pathlib
|
import pathlib
|
||||||
import re
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import tarfile
|
import tarfile
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
TESSDATA_RELEASES_URL = (
|
TESSDATA_RELEASES_URL = (
|
||||||
"https://api.github.com/repos/tesseract-ocr/tessdata_fast/releases/latest"
|
"https://api.github.com/repos/tesseract-ocr/tessdata_fast/releases/latest"
|
||||||
)
|
)
|
||||||
|
@ -29,6 +31,12 @@ def git_root():
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.DEBUG,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
datefmt="%Y-%m-%d %H:%M:%S",
|
||||||
|
)
|
||||||
|
|
||||||
share_dir = git_root() / "share"
|
share_dir = git_root() / "share"
|
||||||
tessdata_dir = share_dir / "tessdata"
|
tessdata_dir = share_dir / "tessdata"
|
||||||
|
|
||||||
|
@ -41,22 +49,21 @@ def main():
|
||||||
expected_files = {f"{lang}.traineddata" for lang in langs_short}
|
expected_files = {f"{lang}.traineddata" for lang in langs_short}
|
||||||
files = {f.name for f in tessdata_dir.iterdir()}
|
files = {f.name for f in tessdata_dir.iterdir()}
|
||||||
if files == expected_files:
|
if files == expected_files:
|
||||||
msg = "> Skipping tessdata download, language data already exists"
|
logger.info("Skipping tessdata download, language data already exists")
|
||||||
print(msg, file=sys.stderr)
|
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
print(f"Found {tessdata_dir} but contents do not match", file=sys.stderr)
|
logger.info(f"Found {tessdata_dir} but contents do not match")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
# Get latest release of Tesseract data.
|
# Get latest release of Tesseract data.
|
||||||
print(f"> Getting latest tessdata release", file=sys.stderr)
|
logger.info("Getting latest tessdata release")
|
||||||
with urllib.request.urlopen(TESSDATA_RELEASES_URL) as f:
|
with urllib.request.urlopen(TESSDATA_RELEASES_URL) as f:
|
||||||
resp = f.read()
|
resp = f.read()
|
||||||
releases = json.loads(resp)
|
releases = json.loads(resp)
|
||||||
tag = releases["tag_name"]
|
tag = releases["tag_name"]
|
||||||
|
|
||||||
# Get latest release of Tesseract data.
|
# Get latest release of Tesseract data.
|
||||||
print(f"> Downloading tessdata release {tag}", file=sys.stderr)
|
logger.info(f"Downloading tessdata release {tag}")
|
||||||
archive_url = TESSDATA_ARCHIVE_URL.format(tessdata_version=tag)
|
archive_url = TESSDATA_ARCHIVE_URL.format(tessdata_version=tag)
|
||||||
with urllib.request.urlopen(archive_url) as f:
|
with urllib.request.urlopen(archive_url) as f:
|
||||||
archive = f.read()
|
archive = f.read()
|
||||||
|
@ -65,12 +72,11 @@ def main():
|
||||||
raise RuntimeError(f"Checksum mismatch {digest} != {TESSDATA_CHECKSUM}")
|
raise RuntimeError(f"Checksum mismatch {digest} != {TESSDATA_CHECKSUM}")
|
||||||
|
|
||||||
# Extract the languages models from the tessdata archive.
|
# Extract the languages models from the tessdata archive.
|
||||||
print(f"> Extracting tessdata archive into {tessdata_dir}", file=sys.stderr)
|
logger.info(f"Extracting tessdata archive into {tessdata_dir}")
|
||||||
with tarfile.open(fileobj=io.BytesIO(archive)) as t:
|
with tarfile.open(fileobj=io.BytesIO(archive)) as t:
|
||||||
for lang in langs_short:
|
for lang in langs_short:
|
||||||
member = f"tessdata_fast-{tag}/{lang}.traineddata"
|
member = f"tessdata_fast-{tag}/{lang}.traineddata"
|
||||||
print(f">> Extracting {member}")
|
logger.info(f"Extracting {member}")
|
||||||
t.extract(member=member, path=share_dir, set_attrs=False)
|
|
||||||
|
|
||||||
tessdata_dl_dir = share_dir / f"tessdata_fast-{tag}"
|
tessdata_dl_dir = share_dir / f"tessdata_fast-{tag}"
|
||||||
tessdata_dl_dir.rename(tessdata_dir)
|
tessdata_dl_dir.rename(tessdata_dir)
|
||||||
|
|
Loading…
Reference in a new issue