local rsubn = mw.ustring.gsub
local U = mw.ustring.char
local export = {}
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = U(0x652) -- sukoon
local he = "ه"
local zwnj = U(0x200C)
local highhmz = U(0x654)
local convert_consonants = {
-- STOP! fa-IPA should remove incorrect characters,
-- if an incorrect character is appearing, check fa_IPA not here
["b"] = "ب",
["č"] = "چ",
["d"] = "د",
["f"] = "ف",
["g"] = "گ",
["ğ"] = "غ",
["h"] = he,
["j"] = "ج",
["k"] = "ک",
["l"] = "ل",
["m"] = "م",
["n"] = "ن",
["p"] = "پ",
["q"] = "ق",
["r"] = "ر",
["s"] = "س",
["š"] = "ش",
["t"] = "ت",
["ɖ"] = "د", --only for Hazaragi
["ʈ"] = "ت", --only for Hazaragi
["w"] = "و",
["x"] = "خ",
["y"] = "ی",
["z"] = "ز",
["ž"] = "ژ",
["'"] = "ئ",
}
local convert_vowels = {
["a"] = zabar, ["â"] = "ا", ["e"] = zer,
["o"] = pesh, ["u"] = "و", ["i"] = "ی",
}
local vowels = "aeoiu" --including â causes issues
local consonants = "bptjčxdrzžsš'ğfqkglmnwvwhy"
local dc_consonants = "âdrwvuzž"..jazm..""
function export.tr(text, lang, sc)
text = rsubn(text, " | ", "# | #")
text = "##" .. rsubn(text, " ", "# #") .. "##"
text = rsubn(text, "`", "")
text = rsubn(text, ",".." ", ",")
text = rsubn(text, ",", "] ,[")
text = rsubn(text, "%]", "#]#")
text = rsubn(text, "%[", "#[#")
-- remove unpronounced or incorrect letters
text = rsubn(text, "[.]", "")
text = rsubn(text, "v", "w")
-- prevent ezafe from being processed
text = rsubn(text, "(["..consonants.."])([-])e#", "%1_e")
text = rsubn(text, "([âu])([-])ye#", "%1_ye_#")
text = rsubn(text, "([i])([-])ye#", "%1yye_#")
text = rsubn(text, "([y])([-])ye#", "%1ye_#")
text = rsubn(text, "iy", "ey")
text = rsubn(text, "(["..consonants.."])%1", "%1"..tashdid.."")
text = rsubn(text, "(["..consonants.."])(["..consonants.."])", "%1"..jazm.."%2")
text = rsubn(text, "#â", "#آ")
text = rsubn(text, "o'", "oؤ")
text = rsubn(text, "e'", "eئ")
text = rsubn(text, "'â", "آ")
text = rsubn(text, "([aeo])([-])", "%1h-")
text = rsubn(text, "(["..dc_consonants.."])([-])â", "%1"..jazm.."آ")
text = rsubn(text, "([^"..dc_consonants.."])([-])â", "%1"..zwnj.."آ")
text = rsubn(text, "(["..dc_consonants.."])([-])(["..vowels.."])", "%1"..jazm.."â%3")
text = rsubn(text, "([^"..dc_consonants.."])([-])(["..vowels.."])", "%1"..zwnj.."â%3")
text = rsubn(text, "(["..dc_consonants.."])([-])(["..consonants.."])", "%1"..jazm.."%3")
text = rsubn(text, "([^"..dc_consonants.."])([-])(["..consonants.."])", "%1"..zwnj.."%3")
text = rsubn(text, "#(["..vowels.."])", "#â%1")
text = rsubn(text, "([aeo])#", "%1h#")
-- try to find ezafe markings
text = rsubn(text, "([aeo]h)("..zwnj.."yeh)#", "%1"..highhmz.."")
text = rsubn(text, "([aeo]h)("..zwnj.."âeh)#", "%1"..highhmz.."")
text = rsubn(text, "([âu])_ye_#", "%1ye#")
text = rsubn(text, "%_", "")
text = rsubn(text, "(['])#", "ء#")
text = mw.ustring.gsub(text, '.', convert_consonants)
text = mw.ustring.gsub(text, '.', convert_vowels)
text = rsubn(text, "[-]", "")
text = rsubn(text, "#", "")
text = rsubn(text, "%[".." ", "[") --this prevents weird spacing
return text
end
return export