Modul:Brah-translit
Pendokumenan untuk modul ini boleh diciptakan di Modul:Brah-translit/doc
local export = {}
local u = mw.ustring.char
local consonants = {
--consonants
['๐']='k', ['๐']='kh', ['๐']='g', ['๐']='gh', ['๐']='แน
',
['๐']='c', ['๐']='ch', ['๐']='j', ['๐']='jh', ['๐']='รฑ',
['๐']='แนญ', ['๐']='แนญh', ['๐']='แธ', ['๐ ']='แธh', ['๐ก']='แน',
['๐ข']='t', ['๐ฃ']='th', ['๐ค']='d', ['๐ฅ']='dh', ['๐ฆ']='n',
['๐ง']='p', ['๐จ']='ph', ['๐ฉ']='b', ['๐ช']='bh', ['๐ซ']='m',
['๐ฌ']='y', ['๐ญ']='r', ['๐ฎ']='l', ['๐ฏ']='v', ['๐ด']='แธท',
['๐ฐ']='ล', ['๐ฑ']='แนฃ', ['๐ฒ']='s', ['๐ณ']='h',
-- Old Tamil
[u(0x11075)] = 'แธท',
['๐ต']='แธป', ['๐ถ']='แน', ['๐ท'] ='แน',
}
local diacritics = {
--matras
['๐ธ']='ฤ', ['๐บ']='i', ['๐ป']='ฤซ', ['๐ผ']='u', ['๐ฝ']='ลซ', ['๐พ']='แน', ['๐ฟ']='แน',
['๐']='lฬฅ', ['๐']='lฬฅฬ', ['๐']='e', ['๐']='ai', ['๐']='o', ['๐
']='au', ['๐']='',
--bhattiprolu aa
['๐น']='ฤ',
--Old Tamil
['๐ณ']='ฤ', ['๐ด']='ล', [u(0x11070)]='',
-- Old Tamil up to and including Unicode 13.0
['๐๐']='ฤ', ['๐๐']='ล', -- Two character vowels!
}
local diatrema = {
['๐']='รฏ', ['๐']='รผ',
}
local tt = {
--vowels
['๐
']='a', ['๐']='ฤ', ['๐']='i', ['๐']='ฤซ', ['๐']='u', ['๐']='ลซ', ['๐']='แน', ['๐']='แน',
['๐']='lฬฅ', ['๐']='lฬฅฬ', ['๐']='e', ['๐']='ai', ['๐']='o', ['๐']='au',
['๐ฑ']='ฤ', ['๐ฒ']='ล', --Old Tamil
-- chandrabindu
['๐']='mฬ', --until a better method is found
-- anusvara
['๐']='แน', --until a better method is found
-- visarga
['๐']='แธฅ',
--numerals
['๐ฆ']='0', ['๐ง']='1', ['๐จ']='2', ['๐ฉ']='3', ['๐ช']='4', ['๐ซ']='5', ['๐ฌ']='6', ['๐ญ']='7', ['๐ฎ']='8', ['๐ฏ']='9',
--punctuation
['๐']='.', --danda
['๐']='.' --double danda
}
function export.tr(text, lang, sc)
if sc ~= "Brah" then
return nil
end
if lang == "inc-pra" then -- Route contextually shortened Prakrit vowels through Old Tamil short vowels
text = mw.ustring.gsub(text, '(๐)([๐
-๐ณ]?)(๐)([๐
-๐ณ]?)', '๐ณ%2%3%4')
text = mw.ustring.gsub(text, '(๐)([๐
-๐ณ]?)(๐)([๐
-๐ณ]?)', '๐ฑ%2%3%4')
text = mw.ustring.gsub(text, '(๐)([๐
-๐ณ]?)(๐)([๐
-๐ณ]?)', '๐ด%2%3%4')
text = mw.ustring.gsub(text, '(๐)([๐
-๐ณ]?)(๐)([๐
-๐ณ]?)', '๐ฒ%2%3%4')
end
text = mw.ustring.gsub(
text,
'([๐-๐ท'..u(0x11075)..'])'..
'([๐ธ๐บ๐บ๐ป๐ผ๐ฝ๐พ๐ฟ๐๐๐๐๐๐
๐๐น๐ณ๐ด'..u(0x11070)..']?๐?)'..
'([๐๐]?)',
function(c, d, e)
if d == "" and e ~= "" then
return consonants[c] .. 'a' .. diatrema[e]
elseif e ~= "" then
return consonants[c] .. diacritics[d] .. tt[e]
elseif d == "" then
return consonants[c] .. 'a'
else
return consonants[c] .. diacritics[d]
end
end)
-- Adjacent vowel letters needing dieresis
text = mw.ustring.gsub(text, '([๐
])([๐๐])', function(a, b) return tt[a]..diatrema[b] end)
text = mw.ustring.gsub(text, '.', tt)
if (lang == 'sa' or lang == 'pi') and mw.ustring.match(text, 'lฬฅ') then
text = mw.ustring.gsub(text, 'lฬฅ', 'แธท')
text = mw.ustring.toNFC(text)
end
-- Old Tamil uses macron v. plain for 'e' and 'o'.
if (lang == 'oty') then
text = mw.ustring.gsub(text, '.', {e='ฤ', o='ล', ['ฤ']='e', ['ล']='o'})
end
return text
end
return export