From 66d3c401636e64b2139a565d3e303534b3c380a7 Mon Sep 17 00:00:00 2001 From: deeplow Date: Tue, 28 Feb 2023 19:19:09 +0000 Subject: [PATCH] Sort OCR languages by tesseract arg name Make it easier to compare the list of languages with the output of `tesseract --list-langs`. --- share/ocr-languages.json | 182 +++++++++++++++++++-------------------- 1 file changed, 91 insertions(+), 91 deletions(-) diff --git a/share/ocr-languages.json b/share/ocr-languages.json index be6e686..2730af7 100644 --- a/share/ocr-languages.json +++ b/share/ocr-languages.json @@ -1,162 +1,162 @@ { - "Afrikaans": "ar", - "Albanian": "sqi", - "Amharic": "amh", - "Arabic": "ara", "Arabic script": "Arabic", - "Armenian": "hye", "Armenian script": "Armenian", - "Assamese": "asm", - "Azerbaijani": "aze", - "Azerbaijani (Cyrillic)": "aze_cyrl", - "Basque": "eus", - "Belarusian": "bel", - "Bengali": "ben", "Bengali script": "Bengali", - "Bosnian": "bos", - "Breton": "bre", - "Bulgarian": "bul", - "Burmese": "mya", "Canadian Aboriginal script": "Canadian_Aboriginal", - "Catalan": "cat", - "Cebuano": "ceb", - "Cherokee": "chr", "Cherokee script": "Cherokee", - "Chinese - Simplified": "chi_sim", - "Chinese - Simplified (vertical)": "chi_sim_vert", - "Chinese - Traditional": "chi_tra", - "Chinese - Traditional (vertical)": "chi_tra_vert", - "Corsican": "cos", - "Croatian": "hrv", "Cyrillic script": "Cyrillic", - "Czech": "ces", - "Danish": "dan", "Devanagari script": "Devanagari", - "Divehi": "div", - "Dutch": "nld", - "Dzongkha": "dzo", - "English": "eng", - "English, Middle (1100-1500)": "enm", - "Esperanto": "epo", - "Estonian": "est", "Ethiopic script": "Ethiopic", - "Faroese": "fao", - "Filipino": "fil", - "Finnish": "fin", "Fraktur script": "Fraktur", - "Frankish": "frk", - "French": "fra", - "French, Middle (ca.1400-1600)": "frm", - "Frisian (Western)": "fry", - "Gaelic (Scots)": "gla", - "Galician": "glg", - "Georgian": "kat", "Georgian script": "Georgian", - "German": "deu", - "Greek": "ell", "Greek script": "Greek", - "Gujarati": "guj", "Gujarati script": "Gujarati", "Gurmukhi script": "Gurmukhi", - "Hangul script": "Hangul", - "Hangul (vertical) script": "Hangul_vert", "Han - Simplified script": "HanS", "Han - Simplified (vertical) script": "HanS_vert", "Han - Traditional script": "HanT", "Han - Traditional (vertical) script": "HanT_vert", + "Hangul script": "Hangul", + "Hangul (vertical) script": "Hangul_vert", + "Hebrew script": "Hebrew", + "Japanese script": "Japanese", + "Japanese (vertical) script": "Japanese_vert", + "Kannada script": "Kannada", + "Khmer script": "Khmer", + "Lao script": "Lao", + "Latin script": "Latin", + "Malayalam script": "Malayalam", + "Myanmar script": "Myanmar", + "Oriya (Odia) script": "Oriya", + "Sinhala script": "Sinhala", + "Syriac script": "Syriac", + "Tamil script": "Tamil", + "Telugu script": "Telugu", + "Thaana script": "Thaana", + "Thai script": "Thai", + "Tibetan script": "Tibetan", + "Vietnamese script": "Vietnamese", + "Amharic": "amh", + "Afrikaans": "arf", + "Arabic": "ara", + "Assamese": "asm", + "Azerbaijani": "aze", + "Azerbaijani (Cyrillic)": "aze_cyrl", + "Belarusian": "bel", + "Bengali": "ben", + "Tibetan Standard": "bod", + "Bosnian": "bos", + "Breton": "bre", + "Bulgarian": "bul", + "Catalan": "cat", + "Cebuano": "ceb", + "Czech": "ces", + "Chinese - Simplified": "chi_sim", + "Chinese - Simplified (vertical)": "chi_sim_vert", + "Chinese - Traditional": "chi_tra", + "Chinese - Traditional (vertical)": "chi_tra_vert", + "Cherokee": "chr", + "Corsican": "cos", + "Welsh": "cym", + "Danish": "dan", + "German": "deu", + "Divehi": "div", + "Dzongkha": "dzo", + "Greek": "ell", + "English": "eng", + "English, Middle (1100-1500)": "enm", + "Esperanto": "epo", + "Estonian": "est", + "Basque": "eus", + "Faroese": "fao", + "Persian": "fas", + "Filipino": "fil", + "Finnish": "fin", + "French": "fra", + "Frankish": "frk", + "French, Middle (ca.1400-1600)": "frm", + "Frisian (Western)": "fry", + "Gaelic (Scots)": "gla", + "Irish": "gle", + "Galician": "glg", + "Gujarati": "guj", "Hatian": "hat", "Hebrew": "heb", - "Hebrew script": "Hebrew", "Hindi": "hin", + "Croatian": "hrv", "Hungarian": "hun", - "Icelandic": "isl", - "Indonesian": "ind", + "Armenian": "hye", "Inuktitut": "iku", - "Irish": "gle", + "Indonesian": "ind", + "Icelandic": "isl", "Italian": "ita", "Italian - Old": "ita_old", - "Japanese": "jpn", - "Japanese script": "Japanese", - "Japanese (vertical)": "jpn_vert", - "Japanese (vertical) script": "Japanese_vert", "Javanese": "jav", + "Japanese": "jpn", + "Japanese (vertical)": "jpn_vert", "Kannada": "kan", - "Kannada script": "Kannada", + "Georgian": "kat", + "Old Georgian": "kat_old", "Kazakh": "kaz", "Khmer": "khm", - "Khmer script": "Khmer", + "Kyrgyz": "kir", "Korean": "kor", "Korean (vertical)": "kor_vert", "Kurdish (Arabic)": "kur_ara", - "Kyrgyz": "kir", "Lao": "lao", - "Lao script": "Lao", "Latin": "lat", - "Latin script": "Latin", "Latvian": "lav", "Lithuanian": "lit", "Luxembourgish": "ltz", - "Macedonian": "mkd", "Malayalam": "mal", - "Malayalam script": "Malayalam", - "Malay": "msa", - "Maltese": "mlt", - "Maori": "mri", "Marathi": "mar", + "Macedonian": "mkd", + "Maltese": "mlt", "Mongolian": "mon", - "Myanmar script": "Myanmar", + "Maori": "mri", + "Malay": "msa", + "Burmese": "mya", "Nepali": "nep", + "Dutch": "nld", "Norwegian": "nor", "Occitan (post 1500)": "oci", - "Old Georgian": "kat_old", - "Oriya (Odia) script": "Oriya", "Oriya": "ori", - "Pashto": "pus", - "Persian": "fas", + "script and orientation": "osd", + "Punjabi": "pan", "Polish": "pol", "Portuguese": "por", - "Punjabi": "pan", + "Pashto": "pus", "Quechua": "que", "Romanian": "ron", "Russian": "rus", "Sanskrit": "san", - "script and orientation": "osd", - "Serbian (Latin)": "srp_latn", - "Serbian": "srp", - "Sindhi": "snd", - "Sinhala script": "Sinhala", "Sinhala": "sin", "Slovakian": "slk", "Slovenian": "slv", - "Spanish, Castilian - Old": "spa_old", + "Sindhi": "snd", "Spanish": "spa", + "Spanish": "spa_old", + "Albanian": "sqi", + "Serbian": "srp", + "Serbian (Latin)": "srp_latn", "Sundanese": "sun", "Swahili": "swa", "Swedish": "swe", - "Syriac script": "Syriac", "Syriac": "syr", - "Tajik": "tgk", - "Tamil script": "Tamil", "Tamil": "tam", "Tatar": "tat", - "Telugu script": "Telugu", "Telugu": "tel", - "Thaana script": "Thaana", - "Thai script": "Thai", + "Tajik": "tgk", "Thai": "tha", - "Tibetan script": "Tibetan", - "Tibetan Standard": "bod", "Tigrinya": "tir", "Tonga": "ton", "Turkish": "tur", + "Uyghur": "uig", "Ukrainian": "ukr", "Urdu": "urd", - "Uyghur": "uig", - "Uzbek (Cyrillic)": "uzb_cyrl", "Uzbek": "uzb", - "Vietnamese script": "Vietnamese", + "Uzbek (Cyrillic)": "uzb_cyrl", "Vietnamese": "vie", - "Welsh": "cym", "Yiddish": "yid", - "Yoruba": "yor" + "yor": "Yoruba" } \ No newline at end of file