move ocr_languages from global_common to share/

ocr_languages can be treated as just a json file instead of being
in global_common. This way it is easier to maintain and makes
global_common cleaner.
This commit is contained in:
deeplow 2022-09-15 09:38:34 +01:00
parent c0f0e7bf6a
commit 2d6826afa9
No known key found for this signature in database
GPG key ID: 577982871529A52A
2 changed files with 164 additions and 162 deletions

View file

@ -34,168 +34,8 @@ class GlobalCommon(object):
self.container_name = "dangerzone.rocks/dangerzone" self.container_name = "dangerzone.rocks/dangerzone"
# Languages supported by tesseract # Languages supported by tesseract
self.ocr_languages = { with open(get_resource_path("ocr-languages.json"), "r") as f:
"Afrikaans": "ar", self.ocr_languages = json.load(f)
"Albanian": "sqi",
"Amharic": "amh",
"Arabic": "ara",
"Arabic script": "Arabic",
"Armenian": "hye",
"Armenian script": "Armenian",
"Assamese": "asm",
"Azerbaijani": "aze",
"Azerbaijani (Cyrillic)": "aze_cyrl",
"Basque": "eus",
"Belarusian": "bel",
"Bengali": "ben",
"Bengali script": "Bengali",
"Bosnian": "bos",
"Breton": "bre",
"Bulgarian": "bul",
"Burmese": "mya",
"Canadian Aboriginal script": "Canadian_Aboriginal",
"Catalan": "cat",
"Cebuano": "ceb",
"Cherokee": "chr",
"Cherokee script": "Cherokee",
"Chinese - Simplified": "chi_sim",
"Chinese - Simplified (vertical)": "chi_sim_vert",
"Chinese - Traditional": "chi_tra",
"Chinese - Traditional (vertical)": "chi_tra_vert",
"Corsican": "cos",
"Croatian": "hrv",
"Cyrillic script": "Cyrillic",
"Czech": "ces",
"Danish": "dan",
"Devanagari script": "Devanagari",
"Divehi": "div",
"Dutch": "nld",
"Dzongkha": "dzo",
"English": "eng",
"English, Middle (1100-1500)": "enm",
"Esperanto": "epo",
"Estonian": "est",
"Ethiopic script": "Ethiopic",
"Faroese": "fao",
"Filipino": "fil",
"Finnish": "fin",
"Fraktur script": "Fraktur",
"Frankish": "frk",
"French": "fra",
"French, Middle (ca.1400-1600)": "frm",
"Frisian (Western)": "fry",
"Gaelic (Scots)": "gla",
"Galician": "glg",
"Georgian": "kat",
"Georgian script": "Georgian",
"German": "deu",
"Greek": "ell",
"Greek script": "Greek",
"Gujarati": "guj",
"Gujarati script": "Gujarati",
"Gurmukhi script": "Gurmukhi",
"Hangul script": "Hangul",
"Hangul (vertical) script": "Hangul_vert",
"Han - Simplified script": "HanS",
"Han - Simplified (vertical) script": "HanS_vert",
"Han - Traditional script": "HanT",
"Han - Traditional (vertical) script": "HanT_vert",
"Hatian": "hat",
"Hebrew": "heb",
"Hebrew script": "Hebrew",
"Hindi": "hin",
"Hungarian": "hun",
"Icelandic": "isl",
"Indonesian": "ind",
"Inuktitut": "iku",
"Irish": "gle",
"Italian": "ita",
"Italian - Old": "ita_old",
"Japanese": "jpn",
"Japanese script": "Japanese",
"Japanese (vertical)": "jpn_vert",
"Japanese (vertical) script": "Japanese_vert",
"Javanese": "jav",
"Kannada": "kan",
"Kannada script": "Kannada",
"Kazakh": "kaz",
"Khmer": "khm",
"Khmer script": "Khmer",
"Korean": "kor",
"Korean (vertical)": "kor_vert",
"Kurdish (Arabic)": "kur_ara",
"Kyrgyz": "kir",
"Lao": "lao",
"Lao script": "Lao",
"Latin": "lat",
"Latin script": "Latin",
"Latvian": "lav",
"Lithuanian": "lit",
"Luxembourgish": "ltz",
"Macedonian": "mkd",
"Malayalam": "mal",
"Malayalam script": "Malayalam",
"Malay": "msa",
"Maltese": "mlt",
"Maori": "mri",
"Marathi": "mar",
"Mongolian": "mon",
"Myanmar script": "Myanmar",
"Nepali": "nep",
"Norwegian": "nor",
"Occitan (post 1500)": "oci",
"Old Georgian": "kat_old",
"Oriya (Odia) script": "Oriya",
"Oriya": "ori",
"Pashto": "pus",
"Persian": "fas",
"Polish": "pol",
"Portuguese": "por",
"Punjabi": "pan",
"Quechua": "que",
"Romanian": "ron",
"Russian": "rus",
"Sanskrit": "san",
"script and orientation": "osd",
"Serbian (Latin)": "srp_latn",
"Serbian": "srp",
"Sindhi": "snd",
"Sinhala script": "Sinhala",
"Sinhala": "sin",
"Slovakian": "slk",
"Slovenian": "slv",
"Spanish, Castilian - Old": "spa_old",
"Spanish": "spa",
"Sundanese": "sun",
"Swahili": "swa",
"Swedish": "swe",
"Syriac script": "Syriac",
"Syriac": "syr",
"Tajik": "tgk",
"Tamil script": "Tamil",
"Tamil": "tam",
"Tatar": "tat",
"Telugu script": "Telugu",
"Telugu": "tel",
"Thaana script": "Thaana",
"Thai script": "Thai",
"Thai": "tha",
"Tibetan script": "Tibetan",
"Tibetan Standard": "bod",
"Tigrinya": "tir",
"Tonga": "ton",
"Turkish": "tur",
"Ukrainian": "ukr",
"Urdu": "urd",
"Uyghur": "uig",
"Uzbek (Cyrillic)": "uzb_cyrl",
"Uzbek": "uzb",
"Vietnamese script": "Vietnamese",
"Vietnamese": "vie",
"Welsh": "cym",
"Yiddish": "yid",
"Yoruba": "yor",
}
# Load settings # Load settings
self.settings = Settings(self) self.settings = Settings(self)

162
share/ocr-languages.json Normal file
View file

@ -0,0 +1,162 @@
{
"Afrikaans": "ar",
"Albanian": "sqi",
"Amharic": "amh",
"Arabic": "ara",
"Arabic script": "Arabic",
"Armenian": "hye",
"Armenian script": "Armenian",
"Assamese": "asm",
"Azerbaijani": "aze",
"Azerbaijani (Cyrillic)": "aze_cyrl",
"Basque": "eus",
"Belarusian": "bel",
"Bengali": "ben",
"Bengali script": "Bengali",
"Bosnian": "bos",
"Breton": "bre",
"Bulgarian": "bul",
"Burmese": "mya",
"Canadian Aboriginal script": "Canadian_Aboriginal",
"Catalan": "cat",
"Cebuano": "ceb",
"Cherokee": "chr",
"Cherokee script": "Cherokee",
"Chinese - Simplified": "chi_sim",
"Chinese - Simplified (vertical)": "chi_sim_vert",
"Chinese - Traditional": "chi_tra",
"Chinese - Traditional (vertical)": "chi_tra_vert",
"Corsican": "cos",
"Croatian": "hrv",
"Cyrillic script": "Cyrillic",
"Czech": "ces",
"Danish": "dan",
"Devanagari script": "Devanagari",
"Divehi": "div",
"Dutch": "nld",
"Dzongkha": "dzo",
"English": "eng",
"English, Middle (1100-1500)": "enm",
"Esperanto": "epo",
"Estonian": "est",
"Ethiopic script": "Ethiopic",
"Faroese": "fao",
"Filipino": "fil",
"Finnish": "fin",
"Fraktur script": "Fraktur",
"Frankish": "frk",
"French": "fra",
"French, Middle (ca.1400-1600)": "frm",
"Frisian (Western)": "fry",
"Gaelic (Scots)": "gla",
"Galician": "glg",
"Georgian": "kat",
"Georgian script": "Georgian",
"German": "deu",
"Greek": "ell",
"Greek script": "Greek",
"Gujarati": "guj",
"Gujarati script": "Gujarati",
"Gurmukhi script": "Gurmukhi",
"Hangul script": "Hangul",
"Hangul (vertical) script": "Hangul_vert",
"Han - Simplified script": "HanS",
"Han - Simplified (vertical) script": "HanS_vert",
"Han - Traditional script": "HanT",
"Han - Traditional (vertical) script": "HanT_vert",
"Hatian": "hat",
"Hebrew": "heb",
"Hebrew script": "Hebrew",
"Hindi": "hin",
"Hungarian": "hun",
"Icelandic": "isl",
"Indonesian": "ind",
"Inuktitut": "iku",
"Irish": "gle",
"Italian": "ita",
"Italian - Old": "ita_old",
"Japanese": "jpn",
"Japanese script": "Japanese",
"Japanese (vertical)": "jpn_vert",
"Japanese (vertical) script": "Japanese_vert",
"Javanese": "jav",
"Kannada": "kan",
"Kannada script": "Kannada",
"Kazakh": "kaz",
"Khmer": "khm",
"Khmer script": "Khmer",
"Korean": "kor",
"Korean (vertical)": "kor_vert",
"Kurdish (Arabic)": "kur_ara",
"Kyrgyz": "kir",
"Lao": "lao",
"Lao script": "Lao",
"Latin": "lat",
"Latin script": "Latin",
"Latvian": "lav",
"Lithuanian": "lit",
"Luxembourgish": "ltz",
"Macedonian": "mkd",
"Malayalam": "mal",
"Malayalam script": "Malayalam",
"Malay": "msa",
"Maltese": "mlt",
"Maori": "mri",
"Marathi": "mar",
"Mongolian": "mon",
"Myanmar script": "Myanmar",
"Nepali": "nep",
"Norwegian": "nor",
"Occitan (post 1500)": "oci",
"Old Georgian": "kat_old",
"Oriya (Odia) script": "Oriya",
"Oriya": "ori",
"Pashto": "pus",
"Persian": "fas",
"Polish": "pol",
"Portuguese": "por",
"Punjabi": "pan",
"Quechua": "que",
"Romanian": "ron",
"Russian": "rus",
"Sanskrit": "san",
"script and orientation": "osd",
"Serbian (Latin)": "srp_latn",
"Serbian": "srp",
"Sindhi": "snd",
"Sinhala script": "Sinhala",
"Sinhala": "sin",
"Slovakian": "slk",
"Slovenian": "slv",
"Spanish, Castilian - Old": "spa_old",
"Spanish": "spa",
"Sundanese": "sun",
"Swahili": "swa",
"Swedish": "swe",
"Syriac script": "Syriac",
"Syriac": "syr",
"Tajik": "tgk",
"Tamil script": "Tamil",
"Tamil": "tam",
"Tatar": "tat",
"Telugu script": "Telugu",
"Telugu": "tel",
"Thaana script": "Thaana",
"Thai script": "Thai",
"Thai": "tha",
"Tibetan script": "Tibetan",
"Tibetan Standard": "bod",
"Tigrinya": "tir",
"Tonga": "ton",
"Turkish": "tur",
"Ukrainian": "ukr",
"Urdu": "urd",
"Uyghur": "uig",
"Uzbek (Cyrillic)": "uzb_cyrl",
"Uzbek": "uzb",
"Vietnamese script": "Vietnamese",
"Vietnamese": "vie",
"Welsh": "cym",
"Yiddish": "yid",
"Yoruba": "yor"
}