FIXUP: Detect proper tessdata dir for Linux systems

This commit is contained in:
Alex Pyrgiotis 2024-10-09 20:21:31 +03:00
parent 6a5b6e4249
commit b3d8ddc086
No known key found for this signature in database
GPG key ID: B6C15EBA0357C9AA

View file

@ -42,9 +42,19 @@ def get_tessdata_dir() -> pathlib.Path:
# development builds, or in Windows/macOS platforms. # development builds, or in Windows/macOS platforms.
return pathlib.Path(get_resource_path("tessdata")) return pathlib.Path(get_resource_path("tessdata"))
# In case of Linux systems, grab the Tesseract data from any of the following
# locations. We have found some of the locations through trial and error, whereas
# others are taken from the docs:
#
# [...] Possibilities are /usr/share/tesseract-ocr/tessdata or
# /usr/share/tessdata or /usr/share/tesseract-ocr/4.00/tessdata. [1]
#
# [1] https://tesseract-ocr.github.io/tessdoc/Installation.html
tessdata_dirs = [ tessdata_dirs = [
pathlib.Path("/usr/share/tessdata/"), # on debian pathlib.Path("/usr/share/tessdata/"), # on Debian
pathlib.Path("/usr/share/tesseract/tessdata/"), # on fedora pathlib.Path("/usr/share/tesseract/tessdata/"), # on Fedora
pathlib.Path("/usr/share/tesseract-ocr/tessdata/"), # ? (documented, but not encountered)
pathlib.Path("/usr/share/tesseract-ocr/4.00/tessdata/"), # on Ubuntu
] ]
for dir in tessdata_dirs: for dir in tessdata_dirs: