From 35e439f9e81b042ab4c960cee07850cf5a690217 Mon Sep 17 00:00:00 2001 From: Alex Pyrgiotis Date: Thu, 18 May 2023 23:54:43 +0300 Subject: [PATCH] Restore the OCR languages Restore the OCR languages to the state they were in 66d3c401636e64b2139a565d3e303534b3c380a7, with some minor changes. We can now do so because we download all the trained models, not just the ones that Alpine Linux offers. --- share/ocr-languages.json | 72 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 68 insertions(+), 4 deletions(-) diff --git a/share/ocr-languages.json b/share/ocr-languages.json index 0621117..2c7002f 100644 --- a/share/ocr-languages.json +++ b/share/ocr-languages.json @@ -1,66 +1,130 @@ { - "Afrikaans": "arf", + "Afrikaans": "afr", + "Amharic": "amh", "Arabic": "ara", "Assamese": "asm", "Azerbaijani": "aze", + "Azerbaijani (Cyrillic)": "aze_cyrl", "Belarusian": "bel", "Bengali": "ben", + "Tibetan Standard": "bod", + "Bosnian": "bos", + "Breton": "bre", "Bulgarian": "bul", "Catalan": "cat", + "Cebuano": "ceb", "Czech": "ces", "Chinese - Simplified": "chi_sim", + "Chinese - Simplified (vertical)": "chi_sim_vert", "Chinese - Traditional": "chi_tra", + "Chinese - Traditional (vertical)": "chi_tra_vert", "Cherokee": "chr", + "Corsican": "cos", + "Welsh": "cym", "Danish": "dan", + "Danish - Fraktur": "dan_frak", "German": "deu", + "German - Fraktur": "deu_frak", + "Divehi": "div", + "Dzongkha": "dzo", "Greek": "ell", "English": "eng", "English, Middle (1100-1500)": "enm", "Esperanto": "epo", "Estonian": "est", "Basque": "eus", + "Faroese": "fao", + "Persian": "fas", + "Filipino": "fil", "Finnish": "fin", "French": "fra", "Frankish": "frk", "French, Middle (ca.1400-1600)": "frm", + "Frisian (Western)": "fry", + "Gaelic (Scots)": "gla", + "Irish": "gle", "Galician": "glg", - "Greek, Ancient, to 1453": "grc", + "Greek, Ancient (to 1453)": "grc", + "Gujarati": "guj", + "Hatian": "hat", "Hebrew": "heb", "Hindi": "hin", "Croatian": "hrv", "Hungarian": "hun", + "Armenian": "hye", + "Inuktitut": "iku", "Indonesian": "ind", "Icelandic": "isl", "Italian": "ita", "Italian - Old": "ita_old", + "Javanese": "jav", "Japanese": "jpn", + "Japanese (vertical)": "jpn_vert", "Kannada": "kan", "Georgian": "kat", + "Old Georgian": "kat_old", + "Kazakh": "kaz", + "Khmer": "khm", + "Kyrgyz": "kir", + "Kurmanji (Kurdish - Latin Script)": "kmr", "Korean": "kor", + "Korean (vertical)": "kor_vert", + "Kurdish (Arabic)": "kur_ara", + "Lao": "lao", + "Latin": "lat", "Latvian": "lav", "Lithuanian": "lit", + "Luxembourgish": "ltz", "Malayalam": "mal", + "Marathi": "mar", "Macedonian": "mkd", "Maltese": "mlt", + "Mongolian": "mon", + "Maori": "mri", "Malay": "msa", + "Burmese": "mya", + "Nepali": "nep", "Dutch": "nld", "Norwegian": "nor", + "Occitan (post 1500)": "oci", + "Oriya": "ori", + "Punjabi": "pan", "Polish": "pol", "Portuguese": "por", + "Pashto": "pus", + "Quechua": "que", "Romanian": "ron", "Russian": "rus", + "Sanskrit": "san", + "Sinhala": "sin", "Slovakian": "slk", + "Slovak - Fraktur": "slk_frak", + "Slovenian": "slv", + "Sindhi": "snd", "Spanish": "spa", "Spanish; Castilian - Old": "spa_old", "Albanian": "sqi", "Serbian": "srp", + "Serbian (Latin)": "srp_latn", + "Sundanese": "sun", "Swahili": "swa", "Swedish": "swe", + "Syriac": "syr", "Tamil": "tam", + "Tatar": "tat", "Telugu": "tel", - "Filipino": "tgl", + "Tajik": "tgk", + "Tagalog (new - Filipino)": "tgl", "Thai": "tha", + "Tigrinya": "tir", + "Tonga": "ton", "Turkish": "tur", + "Uyghur": "uig", "Ukrainian": "ukr", - "Vietnamese": "vie" + "Urdu": "urd", + "Uzbek": "uzb", + "Uzbek (Cyrillic)": "uzb_cyrl", + "Vietnamese": "vie", + "Yiddish": "yid", + "Yoruba": "yor" }