From 48fd8ece40b061dc6667dcdcdc75a986fa30a983 Mon Sep 17 00:00:00 2001 From: Micah Lee Date: Thu, 9 Jan 2020 09:47:33 -0800 Subject: [PATCH] Alphebatize the OCR language dropdown --- dangerzone/common.py | 200 +++++++++++++++++++++---------------------- 1 file changed, 100 insertions(+), 100 deletions(-) diff --git a/dangerzone/common.py b/dangerzone/common.py index 3eb4a9b..208d150 100644 --- a/dangerzone/common.py +++ b/dangerzone/common.py @@ -43,165 +43,165 @@ class Common(object): # Languages supported by tesseract self.ocr_languages = { "Afrikaans": "ar", + "Albanian": "sqi", "Amharic": "amh", "Arabic": "ara", + "Arabic script": "Arabic", + "Armenian": "hye", + "Armenian script": "Armenian", "Assamese": "asm", "Azerbaijani": "aze", "Azerbaijani (Cyrillic)": "aze_cyrl", + "Basque": "eus", "Belarusian": "bel", "Bengali": "ben", - "Tibetan Standard": "bod", + "Bengali script": "Bengali", "Bosnian": "bos", "Breton": "bre", "Bulgarian": "bul", + "Burmese": "mya", + "Canadian Aboriginal script": "Canadian_Aboriginal", "Catalan": "cat", "Cebuano": "ceb", - "Czech": "ces", + "Cherokee": "chr", + "Cherokee script": "Cherokee", "Chinese - Simplified": "chi_sim", "Chinese - Simplified (vertical)": "chi_sim_vert", "Chinese - Traditional": "chi_tra", "Chinese - Traditional (vertical)": "chi_tra_vert", - "Cherokee": "chr", "Corsican": "cos", - "Welsh": "cym", + "Croatian": "hrv", + "Cyrillic script": "Cyrillic", + "Czech": "ces", "Danish": "dan", - "German": "deu", + "Devanagari script": "Devanagari", "Divehi": "div", + "Dutch": "nld", "Dzongkha": "dzo", - "Greek": "ell", "English": "eng", "English, Middle (1100-1500)": "enm", "Esperanto": "epo", "Estonian": "est", - "Basque": "eus", + "Ethiopic script": "Ethiopic", "Faroese": "fao", - "Persian": "fas", "Filipino": "fil", "Finnish": "fin", - "French": "fra", + "Fraktur script": "Fraktur", "Frankish": "frk", + "French": "fra", "French, Middle (ca.1400-1600)": "frm", "Frisian (Western)": "fry", "Gaelic (Scots)": "gla", - "Irish": "gle", "Galician": "glg", - "Gujarati": "guj", - "Hatian": "hat", - "Hebrew": "heb", - "Hindi": "hin", - "Croatian": "hrv", - "Hungarian": "hun", - "Armenian": "hye", - "Inuktitut": "iku", - "Indonesian": "ind", - "Icelandic": "isl", - "Italian": "ita", - "Italian - Old": "ita_old", - "Javanese": "jav", - "Japanese": "jpn", - "Japanese (vertical)": "jpn_vert", - "Kannada": "kan", "Georgian": "kat", - "Old Georgian": "kat_old", - "Kazakh": "kaz", - "Khmer": "khm", - "Kyrgyz": "kir", - "Korean": "kor", - "Korean (vertical)": "kor_vert", - "Kurdish (Arabic)": "kur_ara", - "Lao": "lao", - "Latin": "lat", - "Latvian": "lav", - "Lithuanian": "lit", - "Luxembourgish": "ltz", - "Malayalam": "mal", - "Marathi": "mar", - "Macedonian": "mkd", - "Maltese": "mlt", - "Mongolian": "mon", - "Maori": "mri", - "Malay": "msa", - "Burmese": "mya", - "Nepali": "nep", - "Dutch": "nld", - "Norwegian": "nor", - "Occitan (post 1500)": "oci", - "Oriya": "ori", - "script and orientation": "osd", - "Punjabi": "pan", - "Polish": "pol", - "Portuguese": "por", - "Pashto": "pus", - "Quechua": "que", - "Romanian": "ron", - "Russian": "rus", - "Sanskrit": "san", - "Sinhala": "sin", - "Slovakian": "slk", - "Slovenian": "slv", - "Sindhi": "snd", - "Spanish": "spa", - "Spanish, Castilian - Old": "spa_old", - "Albanian": "sqi", - "Serbian": "srp", - "Serbian (Latin)": "srp_latn", - "Sundanese": "sun", - "Swahili": "swa", - "Swedish": "swe", - "Syriac": "syr", - "Tamil": "tam", - "Tatar": "tat", - "Telugu": "tel", - "Tajik": "tgk", - "Thai": "tha", - "Tigrinya": "tir", - "Tonga": "ton", - "Turkish": "tur", - "Uyghur": "uig", - "Ukrainian": "ukr", - "Urdu": "urd", - "Uzbek": "uzb", - "Uzbek (Cyrillic)": "uzb_cyrl", - "Vietnamese": "vie", - "Yiddish": "yid", - "Yoruba": "yor", - "Arabic script": "Arabic", - "Armenian script": "Armenian", - "Bengali script": "Bengali", - "Canadian Aboriginal script": "Canadian_Aboriginal", - "Cherokee script": "Cherokee", - "Cyrillic script": "Cyrillic", - "Devanagari script": "Devanagari", - "Ethiopic script": "Ethiopic", - "Fraktur script": "Fraktur", "Georgian script": "Georgian", + "German": "deu", + "Greek": "ell", "Greek script": "Greek", + "Gujarati": "guj", "Gujarati script": "Gujarati", "Gurmukhi script": "Gurmukhi", + "Hangul script": "Hangul", + "Hangul (vertical) script": "Hangul_vert", "Han - Simplified script": "HanS", "Han - Simplified (vertical) script": "HanS_vert", "Han - Traditional script": "HanT", "Han - Traditional (vertical) script": "HanT_vert", - "Hangul script": "Hangul", - "Hangul (vertical) script": "Hangul_vert", + "Hatian": "hat", + "Hebrew": "heb", "Hebrew script": "Hebrew", + "Hindi": "hin", + "Hungarian": "hun", + "Icelandic": "isl", + "Indonesian": "ind", + "Inuktitut": "iku", + "Irish": "gle", + "Italian": "ita", + "Italian - Old": "ita_old", + "Japanese": "jpn", "Japanese script": "Japanese", + "Japanese (vertical)": "jpn_vert", "Japanese (vertical) script": "Japanese_vert", + "Javanese": "jav", + "Kannada": "kan", "Kannada script": "Kannada", + "Kazakh": "kaz", + "Khmer": "khm", "Khmer script": "Khmer", + "Korean": "kor", + "Korean (vertical)": "kor_vert", + "Kurdish (Arabic)": "kur_ara", + "Kyrgyz": "kir", + "Lao": "lao", "Lao script": "Lao", + "Latin": "lat", "Latin script": "Latin", + "Latvian": "lav", + "Lithuanian": "lit", + "Luxembourgish": "ltz", + "Macedonian": "mkd", + "Malayalam": "mal", "Malayalam script": "Malayalam", + "Malay": "msa", + "Maltese": "mlt", + "Maori": "mri", + "Marathi": "mar", + "Mongolian": "mon", "Myanmar script": "Myanmar", + "Nepali": "nep", + "Norwegian": "nor", + "Occitan (post 1500)": "oci", + "Old Georgian": "kat_old", "Oriya (Odia) script": "Oriya", + "Oriya": "ori", + "Pashto": "pus", + "Persian": "fas", + "Polish": "pol", + "Portuguese": "por", + "Punjabi": "pan", + "Quechua": "que", + "Romanian": "ron", + "Russian": "rus", + "Sanskrit": "san", + "script and orientation": "osd", + "Serbian (Latin)": "srp_latn", + "Serbian": "srp", + "Sindhi": "snd", "Sinhala script": "Sinhala", + "Sinhala": "sin", + "Slovakian": "slk", + "Slovenian": "slv", + "Spanish, Castilian - Old": "spa_old", + "Spanish": "spa", + "Sundanese": "sun", + "Swahili": "swa", + "Swedish": "swe", "Syriac script": "Syriac", + "Syriac": "syr", + "Tajik": "tgk", "Tamil script": "Tamil", + "Tamil": "tam", + "Tatar": "tat", "Telugu script": "Telugu", + "Telugu": "tel", "Thaana script": "Thaana", "Thai script": "Thai", + "Thai": "tha", "Tibetan script": "Tibetan", + "Tibetan Standard": "bod", + "Tigrinya": "tir", + "Tonga": "ton", + "Turkish": "tur", + "Ukrainian": "ukr", + "Urdu": "urd", + "Uyghur": "uig", + "Uzbek (Cyrillic)": "uzb_cyrl", + "Uzbek": "uzb", "Vietnamese script": "Vietnamese", + "Vietnamese": "vie", + "Welsh": "cym", + "Yiddish": "yid", + "Yoruba": "yor", } # Load settings