From 58332fdd6e9955adf449ea65be0d6ab464b211d8 Mon Sep 17 00:00:00 2001 From: deeplow Date: Tue, 28 Feb 2023 19:36:54 +0000 Subject: [PATCH] tesseract: add new lanaguages and others Tagalo was replaced with filipino [1] in newer tesseract versions, so it doesn't make sense for us to use the new name and map it to the old "tgl" name (Tagalo) under the hood. Language names obtained from tesseract's man page [2]. [1]: https://github.com/tesseract-ocr/tesseract/commit/58f7a72f0065b6e1d2e601cc535c5fac0f14eeb3 [2]: https://github.com/tesseract-ocr/tesseract/blob/main/doc/tesseract.1.asc --- share/ocr-languages.json | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/share/ocr-languages.json b/share/ocr-languages.json index 5ab9187..0621117 100644 --- a/share/ocr-languages.json +++ b/share/ocr-languages.json @@ -24,6 +24,7 @@ "Frankish": "frk", "French, Middle (ca.1400-1600)": "frm", "Galician": "glg", + "Greek, Ancient, to 1453": "grc", "Hebrew": "heb", "Hindi": "hin", "Croatian": "hrv", @@ -50,14 +51,16 @@ "Russian": "rus", "Slovakian": "slk", "Spanish": "spa", - "Spanish": "spa_old", + "Spanish; Castilian - Old": "spa_old", "Albanian": "sqi", "Serbian": "srp", "Swahili": "swa", "Swedish": "swe", "Tamil": "tam", "Telugu": "tel", + "Filipino": "tgl", "Thai": "tha", "Turkish": "tur", - "Ukrainian": "ukr" -} \ No newline at end of file + "Ukrainian": "ukr", + "Vietnamese": "vie" +}