Modul:Brah-translit

Pendokumenan untuk modul ini boleh diciptakan di Modul:Brah-translit/doc

local export = {}
local u = mw.ustring.char

local consonants = {
--consonants
	['๐‘€“']='k', ['๐‘€”']='kh', ['๐‘€•']='g', ['๐‘€–']='gh', ['๐‘€—']='แน…',
	['๐‘€˜']='c', ['๐‘€™']='ch', ['๐‘€š']='j', ['๐‘€›']='jh', ['๐‘€œ']='รฑ', 
	['๐‘€']='แนญ', ['๐‘€ž']='แนญh', ['๐‘€Ÿ']='แธ', ['๐‘€ ']='แธh', ['๐‘€ก']='แน‡', 
	['๐‘€ข']='t', ['๐‘€ฃ']='th', ['๐‘€ค']='d', ['๐‘€ฅ']='dh', ['๐‘€ฆ']='n', 
	['๐‘€ง']='p', ['๐‘€จ']='ph', ['๐‘€ฉ']='b', ['๐‘€ช']='bh', ['๐‘€ซ']='m',
	['๐‘€ฌ']='y', ['๐‘€ญ']='r', ['๐‘€ฎ']='l', ['๐‘€ฏ']='v', ['๐‘€ด']='แธท',
	['๐‘€ฐ']='ล›', ['๐‘€ฑ']='แนฃ', ['๐‘€ฒ']='s', ['๐‘€ณ']='h',
-- Old Tamil
	[u(0x11075)] = 'แธท',
	['๐‘€ต']='แธป', ['๐‘€ถ']='แนŸ', ['๐‘€ท'] ='แน‰',
}

local diacritics = {
--matras
	['๐‘€ธ']='ฤ', ['๐‘€บ']='i', ['๐‘€ป']='ฤซ', ['๐‘€ผ']='u', ['๐‘€ฝ']='ลซ', ['๐‘€พ']='แน›', ['๐‘€ฟ']='แน', 
	['๐‘€']='lฬฅ', ['๐‘']='lฬฅฬ„', ['๐‘‚']='e', ['๐‘ƒ']='ai', ['๐‘„']='o', ['๐‘…']='au',  ['๐‘†']='',

    --bhattiprolu aa
    ['๐‘€น']='ฤ',

     --Old Tamil
    ['๐‘ณ']='ฤ•', ['๐‘ด']='ล', [u(0x11070)]='',
    
    -- Old Tamil up to and including Unicode 13.0
    ['๐‘‚๐‘†']='ฤ•', ['๐‘„๐‘†']='ล', -- Two character vowels!
}

local diatrema = {
	['๐‘€‡']='รฏ', ['๐‘€‰']='รผ',
}

local tt = {

--vowels
	['๐‘€…']='a', ['๐‘€†']='ฤ', ['๐‘€‡']='i', ['๐‘€ˆ']='ฤซ', ['๐‘€‰']='u', ['๐‘€Š']='ลซ', ['๐‘€‹']='แน›', ['๐‘€Œ']='แน',
	['๐‘€']='lฬฅ', ['๐‘€Ž']='lฬฅฬ„', ['๐‘€']='e', ['๐‘€']='ai', ['๐‘€‘']='o', ['๐‘€’']='au', 
    ['๐‘ฑ']='ฤ•', ['๐‘ฒ']='ล', --Old Tamil

	-- chandrabindu    
	['๐‘€€']='mฬ', --until a better method is found
	-- anusvara    
	['๐‘€']='แนƒ', --until a better method is found
	-- visarga    
	['๐‘€‚']='แธฅ',
	--numerals
	['๐‘ฆ']='0', ['๐‘ง']='1', ['๐‘จ']='2', ['๐‘ฉ']='3', ['๐‘ช']='4', ['๐‘ซ']='5', ['๐‘ฌ']='6', ['๐‘ญ']='7', ['๐‘ฎ']='8', ['๐‘ฏ']='9',
	--punctuation        
	['๐‘‡']='.', --danda
    ['๐‘ˆ']='.' --double danda
}

function export.tr(text, lang, sc)
	if sc ~= "Brah" then
		return nil
	end

	if lang == "inc-pra" then -- Route contextually shortened Prakrit vowels through Old Tamil short vowels
		text = mw.ustring.gsub(text, '(๐‘‚)([๐‘€…-๐‘€ณ]?)(๐‘†)([๐‘€…-๐‘€ณ]?)', '๐‘ณ%2%3%4')
		text = mw.ustring.gsub(text, '(๐‘€)([๐‘€…-๐‘€ณ]?)(๐‘†)([๐‘€…-๐‘€ณ]?)', '๐‘ฑ%2%3%4')
		text = mw.ustring.gsub(text, '(๐‘„)([๐‘€…-๐‘€ณ]?)(๐‘†)([๐‘€…-๐‘€ณ]?)', '๐‘ด%2%3%4')
		text = mw.ustring.gsub(text, '(๐‘€‘)([๐‘€…-๐‘€ณ]?)(๐‘†)([๐‘€…-๐‘€ณ]?)', '๐‘ฒ%2%3%4')
	end

	text = mw.ustring.gsub(
		text,
		'([๐‘€“-๐‘€ท'..u(0x11075)..'])'..
		'([๐‘€ธ๐‘€บ๐‘€บ๐‘€ป๐‘€ผ๐‘€ฝ๐‘€พ๐‘€ฟ๐‘€๐‘๐‘‚๐‘ƒ๐‘„๐‘…๐‘†๐‘€น๐‘ณ๐‘ด'..u(0x11070)..']?๐‘†?)'..
		'([๐‘€‡๐‘€‰]?)',
		function(c, d, e)
			if d == "" and e ~= "" then
				return consonants[c] .. 'a' .. diatrema[e]
			elseif e ~= "" then
				return consonants[c] .. diacritics[d] .. tt[e]
			elseif d == "" then        
				return consonants[c] .. 'a'
			else
				return consonants[c] .. diacritics[d]
			end
		end)

-- Adjacent vowel letters needing dieresis
	text = mw.ustring.gsub(text, '([๐‘€…])([๐‘€‡๐‘€‰])', function(a, b) return tt[a]..diatrema[b] end)

	text = mw.ustring.gsub(text, '.', tt)
	if (lang == 'sa' or lang == 'pi') and mw.ustring.match(text, 'lฬฅ') then
		text = mw.ustring.gsub(text, 'lฬฅ', 'แธท')
		text = mw.ustring.toNFC(text)
	end
-- Old Tamil uses macron v. plain for 'e' and 'o'.
	if (lang == 'oty') then
		text = mw.ustring.gsub(text, '.', {e='ฤ“', o='ล', ['ฤ•']='e', ['ล']='o'})
	end

	return text
end
 
return export