dangerzone/share/ocr-languages.json
deeplow 58332fdd6e
tesseract: add new lanaguages and others
Tagalo was replaced with filipino [1] in newer tesseract versions, so it
doesn't make sense for us to use the new name and map it to the old
"tgl" name (Tagalo) under the hood.

Language names obtained from tesseract's man page [2].

[1]: 58f7a72f00
[2]: https://github.com/tesseract-ocr/tesseract/blob/main/doc/tesseract.1.asc
2023-03-16 14:23:30 +00:00

66 lines
1.5 KiB
JSON

{
"Afrikaans": "arf",
"Arabic": "ara",
"Assamese": "asm",
"Azerbaijani": "aze",
"Belarusian": "bel",
"Bengali": "ben",
"Bulgarian": "bul",
"Catalan": "cat",
"Czech": "ces",
"Chinese - Simplified": "chi_sim",
"Chinese - Traditional": "chi_tra",
"Cherokee": "chr",
"Danish": "dan",
"German": "deu",
"Greek": "ell",
"English": "eng",
"English, Middle (1100-1500)": "enm",
"Esperanto": "epo",
"Estonian": "est",
"Basque": "eus",
"Finnish": "fin",
"French": "fra",
"Frankish": "frk",
"French, Middle (ca.1400-1600)": "frm",
"Galician": "glg",
"Greek, Ancient, to 1453": "grc",
"Hebrew": "heb",
"Hindi": "hin",
"Croatian": "hrv",
"Hungarian": "hun",
"Indonesian": "ind",
"Icelandic": "isl",
"Italian": "ita",
"Italian - Old": "ita_old",
"Japanese": "jpn",
"Kannada": "kan",
"Georgian": "kat",
"Korean": "kor",
"Latvian": "lav",
"Lithuanian": "lit",
"Malayalam": "mal",
"Macedonian": "mkd",
"Maltese": "mlt",
"Malay": "msa",
"Dutch": "nld",
"Norwegian": "nor",
"Polish": "pol",
"Portuguese": "por",
"Romanian": "ron",
"Russian": "rus",
"Slovakian": "slk",
"Spanish": "spa",
"Spanish; Castilian - Old": "spa_old",
"Albanian": "sqi",
"Serbian": "srp",
"Swahili": "swa",
"Swedish": "swe",
"Tamil": "tam",
"Telugu": "tel",
"Filipino": "tgl",
"Thai": "tha",
"Turkish": "tur",
"Ukrainian": "ukr",
"Vietnamese": "vie"
}