diff --git a/Dockerfile b/Dockerfile index 1ef701c..4c80d78 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ FROM alpine:edge -ARG TESSDATA_CHECKSUM=990fffb9b7a9b52dc9a2d053a9ef6852ca2b72bd8dfb22988b0b990a700fd3c7 +ARG TESSDATA_CHECKSUM=d0e3bb6f3b4e75748680524a1d116f2bfb145618f8ceed55b279d15098a530f9 ARG H2ORESTART_CHECKSUM=5db816a1e57b510456633f55e693cb5ef3675ef8b35df4f31c90ab9d4c66071a # Install dependencies @@ -26,11 +26,11 @@ RUN apk --no-cache -U upgrade && \ # # Before we untar the models, we also check if the checksum is the expected one. RUN mkdir tessdata && cd tessdata \ - && TESSDATA_VERSION=$(wget -O- -nv https://api.github.com/repos/tesseract-ocr/tessdata/releases/latest \ + && TESSDATA_VERSION=$(wget -O- -nv https://api.github.com/repos/tesseract-ocr/tessdata_fast/releases/latest \ | sed -n 's/^.*"tag_name": "\([0-9.]\+\)".*$/\1/p') \ - && wget https://github.com/tesseract-ocr/tessdata/archive/$TESSDATA_VERSION/tessdata-$TESSDATA_VERSION.tar.gz \ - && echo "$TESSDATA_CHECKSUM tessdata-$TESSDATA_VERSION.tar.gz" | sha256sum -c \ - && tar -xzvf tessdata-$TESSDATA_VERSION.tar.gz -C . \ + && wget https://github.com/tesseract-ocr/tessdata_fast/archive/$TESSDATA_VERSION/tessdata_fast-$TESSDATA_VERSION.tar.gz \ + && echo "$TESSDATA_CHECKSUM tessdata_fast-$TESSDATA_VERSION.tar.gz" | sha256sum -c \ + && tar -xzvf tessdata_fast-$TESSDATA_VERSION.tar.gz -C . \ && find . -name '*.traineddata' -maxdepth 2 -exec cp {} /usr/share/tessdata \; \ && cd .. && rm -r tessdata diff --git a/share/ocr-languages.json b/share/ocr-languages.json index 2ae54e6..3c8db5b 100644 --- a/share/ocr-languages.json +++ b/share/ocr-languages.json @@ -22,9 +22,7 @@ "Corsican": "cos", "Welsh": "cym", "Danish": "dan", - "Danish - Fraktur": "dan_frak", "German": "deu", - "German - Fraktur": "deu_frak", "Divehi": "div", "Dzongkha": "dzo", "Greek": "ell", @@ -97,7 +95,6 @@ "Sanskrit": "san", "Sinhala": "sin", "Slovakian": "slk", - "Slovak - Fraktur": "slk_frak", "Slovenian": "slv", "Sindhi": "snd", "Spanish": "spa", @@ -113,7 +110,6 @@ "Tatar": "tat", "Telugu": "tel", "Tajik": "tgk", - "Tagalog (new - Filipino)": "tgl", "Thai": "tha", "Tigrinya": "tir", "Tonga": "ton",