local data = {}
local U = require("Module:string/char")
local anusvAra = U(0x11081)
local visarga = U(0x11082)
local virAma = U(0x110B9)
local nuktA = U(0x110BA)
local candrabindu = U(0x11080)
local avagraha = "ऽ"
local consonants = "𑂍-𑂯"
local consonant = "[" .. consonants .. "]" .. nuktA .. "?"
local acute = U(0x301) -- combining acute
data["bho"] = {
-- Vowels and modifiers. Do the diphthongs and diaereses first.
{"ai", "𑂊"},
{"au", "𑂌"},
{"ä", "𑂃"},
{"ö", "𑂋"},
{"ï", "𑂅"},
{"ü", "𑂇"},
{"a", "𑂃"},
{"ā", "𑂄"},
{"i", "𑂅"},
{"ī", "𑂆"},
{"u", "𑂇"},
{"ū", "𑂈"},
{"e", "𑂉"},
{"o", "𑂋"},
-- {"ṝ", ""},
-- {"ṛ", "𑂩𑂱"},
-- {"r̥", "𑂩𑂱"},
-- {"ḹ", ""},
-- {"ḷ", ""},
{"(𑂃)[%-/]([𑂅𑂇])", "%1%2"}, -- a-i, a-u for अइ, अउ; must follow rules for "ai", "au"
-- Two-letter consonants must go before h.
{"kh", "𑂎"},
{"gh", "𑂐"},
{"ch", "𑂓"},
{"jh", "𑂕"},
{"ṭh", "𑂘"},
{"ḍh", "𑂛"},
{"ɽh", "𑂜"},
{"th", "𑂟"},
{"dh", "𑂡"},
{"ph", "𑂤"},
{"bh", "𑂦"},
{"h", "𑂯"},
-- Other stops.
{"k", "𑂍"},
{"g", "𑂏"},
{"c", "𑂒"},
{"j", "𑂔"},
{"ṭ", "𑂗"},
{"ḍ", "𑂙"},
{"ɽ", "𑂚"},
{"t", "𑂞"},
{"d", "𑂠"},
{"p", "𑂣"},
{"b", "𑂥"},
-- Nasals.
{"ṅ", "𑂑"},
{"ñ", "𑂖"},
{"ṇ", "𑂝"},
{"n", "𑂢"},
{"n", "𑂢"},
{"m", "𑂧"},
-- Remaining consonants.
{"y", "𑂨"},
{"r", "𑂩"},
{"l", "𑂪"},
{"v", "𑂫"},
{"ś", "𑂬"},
{"ṣ", "𑂭"},
{"s", "𑂮"},
{"ṃ", anusvAra},
{"ḥ", visarga},
{"'", avagraha},
{"~", candrabindu},
-- This rule must be applied twice because a consonant may only be in one capture per operation,
-- so "CCC" will only recognize the first two consonants. Must follow all consonant conversions.
{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
{"(" .. consonant .. ")$", "%1" .. virAma},
{acute, ""},
}
local vowels = {
["𑂅"] = U(0x110B1),
["𑂆"] = U(0x110B2),
["𑂇"] = U(0x110B3),
["𑂈"] = U(0x110B4),
["𑂉"] = U(0x110B5),
["𑂊"] = U(0x110B6),
["𑂋"] = U(0x110B7),
["𑂌"] = U(0x110B8),
["𑂄"] = U(0x110B0),
-- ["𑂩𑂱"] = U(0x110C2),
-- ["ॠ"] = "",
}
-- Convert independent vowels to diacritics after consonants. Must go after all consonant conversions.
for independentForm, diacriticalForm in pairs(vowels) do
table.insert(data["bho"], {"(" .. consonant .. ")" .. independentForm, "%1" .. diacriticalForm})
end
-- This must go last, after independent vowels are converted to diacritics, or "aï", "aü" won't work.
table.insert(data["bho"], {"(" .. consonant .. ")𑂃", "%1"})
-- [[w:Harvard-Kyoto]] to [[w:International Alphabet of Sanskrit Transliteration]]
data["bho-tr"] = {
[1] = {
["A"] = "ā",
["I"] = "ī",
["U"] = "ū",
["J"] = "ñ",
["T"] = "ṭ",
["D"] = "ḍ",
["N"] = "ṇ",
["G"] = "ṅ",
["z"] = "ś",
["S"] = "ṣ",
["M"] = "ṃ",
["H"] = "ḥ",
--["lRR"] = "ḹ",
["/"] = acute,
},
[2] = {
["_rh_"] = "ɽh",
-- ["lR"] = "ḷ",
-- ["RR"] = "ṝ",
},
[3] = {
["_r_"] = "ɽ",
["R"] = "ṛ",
},
}
return data