Modul:typing-aids/testcases

local tests = require('Module:UnitTests')
local m_typing = require('Module:typing-aids')
local get_by_code = require('Module:languages').getByCode

local decompose = mw.ustring.toNFD

local langs = {}
local tag_funcs = {}

-- Assumes one script per language.
local function tag_gen(test_text, langCode)
	local func = tag_funcs[langCode]
	if func then
		return func
	else
		if not langs[langCode] then
			langs[langCode] = get_by_code(langCode) or error('The language code ' .. langCode .. ' is invalid.')
		end
		
		local scCode = langs[langCode]:findBestScript(test_text):getCode() or
			error('No script could be found for the text ' .. test_text .. ' and the language code ' .. langCode .. '.')
		
		local before, after = '<span class="' .. scCode .. '" lang="' .. langCode .. '">', '</span>'
		function func(text)
			return before .. text .. after
		end
		tag_funcs[langCode] = func
		return func	
	end
end

local options_cache = {}
function tests:check_output(code, expected, lang, transliteration, sc)
	local result
	
	if lang then
		result = m_typing.replace{ lang, code, sc = sc }
	else
		result = m_typing.replace{code, sc = sc}
	end
	
	result = decompose(result)
	expected = decompose(expected)
	
	local options = options_cache[lang]
	if not options and lang and not transliteration then
		options = { display = tag_gen(result, lang) }
		options_cache[lang] = options
	end
	
	self:equals(
		code,
		result,
		expected,
		options
	)
end

function tests:do_tests(examples, lang, sc)
	local transliteration = lang ~= nil and lang:find("%-tr$") ~= nil
	for _, example in ipairs(examples) do
		if #example == 3 and not transliteration then
			self:check_output(example[1], example[3], lang, nil, sc)
			if example[2] ~= example[1] then
				self:check_output(example[2], example[3], lang, nil, sc)
			end
		else
			self:check_output(example[1], example[2], lang, transliteration, sc)
		end
	end
end

function tests:test_all()
	local examples = {
		{ "*dye_'ws", "*dyḗws" },
		{ "*n0mr0to's", "*n̥mr̥tós" },
		{ "*tk'e'yti", "*tḱéyti" },
		{ "*h1es-", "*h₁es-" },
		{ "*t_ep-e'h1(ye)-ti", "*tₔp-éh₁(ye)-ti" },
		{ "*h1e'k'wos", "*h₁éḱwos" },
		{ "*bhebho'ydhe", "*bʰebʰóydʰe" },
		{ "*dh3to's", "*dh₃tós" },
		{ "*t'a_ko^`", "*þākǫ̂" },
		{ "*T'eudo_balt'az", "*Þeudōbalþaz" },
		{ "*bo_kijo_`", "*bōkijǭ" },
		{ "*tat^t^o_", "*taťťō" },
		{ "*d^o_'yyon", "*ďṓyyon" },
	}
	
	self:do_tests(examples)
end

local ae_examples = {
	{ "ap", "ap", "𐬀𐬞" },
	{ "xs.^uuas^", "xṣ̌uuaš", "𐬑𐬴𐬎𐬎𐬀𐬱" },
	{ "v@hrka_na", "vəhrkāna", "𐬬𐬆𐬵𐬭𐬐𐬁𐬥𐬀" },
	{ "nae_za", "naēza", "𐬥𐬀𐬉𐬰𐬀" },
	{ "zaaO", "zā̊", "𐬰𐬃"},
	{ "hizwaO", "hizuuå", "𐬵𐬌𐬰𐬎𐬎𐬂"},
}

function tests:test_Avestan()
	self:do_tests(ae_examples, "ae")
end

function tests:test_Avestan_tr()
	self:do_tests(ae_examples, "ae-tr")
end

function tests:test_Akkadian()
	local examples = { 
		{ "ša", "𒊭" },
		-- { "transliteration", "result" },
	}
	self:do_tests(examples, "akk")
end

local hy_examples = {
	{ "azgaynac`um", "azgaynacʿum", "ազգայնացում" },
	{ "terew", "terew", "տերև" },
	{ "burz^uazia", "buržuazia", "բուրժուազիա" },
	{ "kol_mnaki", "kołmnaki", "կողմնակի" },
}

function tests:test_Armenian()
	self:do_tests(hy_examples, "hy")
end

function tests:test_Armenian_tr()
	self:do_tests(hy_examples, "hy-tr")
end

function tests:test_Arabic()
	local examples = {
		{ "al-Huruuf al-qamariyyat'", "الْحُرُوف الْقَمَرِيَّة" },
		{ "al-Huruuf al-xamsiyyat'", "الْحُرُوف الشَّمْسِيَّة" },
		{ "ealifu WlwaSli", "أَلِفُ ٱلْوَصْلِ" },
		{ "maae", "مَاء" },
		{ "muemin", "مُؤْمِن" },
		{ "eiDaafat'", "إِضَافَة" },
		{ "eaab", "آب" },
		{ "qureaan", "قُرْآن" },
		{ "qiTTat'", "قِطَّة" },
		{ "faEEaal", "فَعَّال" },
		{ "xayeu", "شَيْءُ" },
		{ "xayeaN", "شَيْءً" },
		{ "daaeimaN", "دَائِمًا" },
		{ "mabduueat'", "مَبْدُوءَة" },
		{ "mabduu'at'", "مَبْدُوءَة" },
		{ "badaaeiyyuN", "بَدَائِيٌّ" },
		{ "badaaeat'", "بَدَاءَة" },
		{ "maktuub", "مَكْتُوب" },
		{ "taHriir", "تَحْرِير" },
		{ "EuZmaaa", "عُظْمَى" },
		{ "ean0", "أَنْ" },
		{ "law0", "لَوْ" },
		{ "xay'aN", "شَيْءً" },
		{ "ta7riir", "تَحْرِير" },
		{ "3axarat'", "عَشَرَة" },
	}
	
	self:do_tests(examples, "ar")
end

function tests:test_Persian()
	local examples = {
		{ "brAdr", "برادر" },
	}
	
	self:do_tests(examples, "fa")
end

function tests:test_PIE()
	local examples = {
		{ "*dye_'ws", "*dyḗws" },
		{ "*n0mr0to's", "*n̥mr̥tós" },
		{ "*tk'e'yti", "*tḱéyti" },
		{ "*h1es-", "*h₁es-" },
		{ "*t_ep-e'h1(ye)-ti", "*tₔp-éh₁(ye)-ti" },
		{ "*h1e'k'wos", "*h₁éḱwos" },
		{ "*bhebho'ydhe", "*bʰebʰóydʰe" },
		{ "*dh3to's", "*dh₃tós" },
		{ "*dhewg'h-", "*dʰewǵʰ-" },
	}
	
	self:do_tests(examples, "ine-pro")
end

function tests:test_Germanic()
	local examples = {
		{ "*t'a_ko^`", "*þākǫ̂" },
		{ "*T'eudo_balt'az", "*Þeudōbalþaz" },
		{ "*bo_kijo_`", "*bōkijǭ" },
	}
	
	self:do_tests(examples, "gem-pro")
end

function tests:test_Gothic()
	local examples = {
		{ "ƕaiwa", "𐍈𐌰𐌹𐍅𐌰" },
		{ "anþar", "𐌰𐌽𐌸𐌰𐍂" },
		{ "fidwōr", "𐍆𐌹𐌳𐍅𐍉𐍂" },
		{ "fidwor", "𐍆𐌹𐌳𐍅𐍉𐍂" },
		{ "mikils", "𐌼𐌹𐌺𐌹𐌻𐍃" },
		{ "hēr", "𐌷𐌴𐍂" },
		{ "her", "𐌷𐌴𐍂" },
		{ "vac", "𐍈𐌰𐌸" },
--		{ "", "" },
	}
	
	self:do_tests(examples, "got")
end

function tests:test_Hellenic()
	local examples = {
		{ "*tat^t^o_", "*taťťō" },
		{ "*d^o_'yyon", "*ďṓyyon" },
		{ "*gw@n'n'o_", "*gʷəňňō" },
		{ "*gw@n^n^o_", "*gʷəňňō" },
		{ "*kwhe_r", "*kʷʰēr" },
		{ "*khwe_r", "*kʷʰēr" },
	}
	
	self:do_tests(examples, "grk-pro")
end

function tests:test_Greek()
	local examples = {
		{ "a__i", "ᾱͅ" },
		{ "a)lhqh/s", "ἀληθής" },
		{ "a)lhqhs*", "ἀληθησ" },
		{ "a)lhqhs-", "ἀληθησ-" },
		{ "a^)nh/r", "ᾰ̓νήρ" },
		{ "Phlhi+a/dhs", "Πηληϊάδης" },
		{ "Phlhi^+a^/dhs", "Πηληῐ̈ᾰ́δης" },
		{ "Πηληϊ^ά^δης", "Πηληῐ̈ᾰ́δης" },
		{ "e)a_/n", "ἐᾱ́ν" },
		{ "ἐά_ν", "ἐᾱ́ν" },
		{ "pa=sa^", "πᾶσᾰ" },
		{ "u_(mei=s", "ῡ̔μεῖς" },
		{ "a/)^ner", "ᾰ̓́νερ" },
		{ "a/^)ner", "ᾰ̓́νερ" },
		{ "a)/^ner", "ᾰ̓́νερ" },
		{ "a)^/ner", "ᾰ̓́νερ" },
		{ "dai+/frwn", "δαΐφρων" },
		{ "dai/+frwn", "δαΐφρων" },
	}
	
	self:do_tests(examples, "grc")
end

function tests:test_Hittite()
	local examples = {
		{ "a-ku", "𒀀𒆪" },
		{ "an-tu-wa-ah-ha-as", "𒀭𒌅𒉿𒄴𒄩𒀸" },
		{ "an-tu-wa-aḫ-ḫa-aš", "𒀭𒌅𒉿𒄴𒄩𒀸" },
		{ "<sup>DINGIR</sup>IŠKUR", "𒀭𒅎" }, -- Akkadian actually?
	}
	
	self:do_tests(examples, "hit")
end

function tests:test_Kannada()
	local examples = {
		{ "yaMtra", "ಯಂತ್ರ" },
		{ "sadāśiva", "ಸದಾಶಿವ" },
		{ "muṣṭi", "ಮುಷ್ಟಿ" },
		{ "dhairya", "ಧೈರ್ಯ" },
		{ "ELu", "ಏಳು" },
		{ "iMguzETiyA", "ಇಂಗುಶೇಟಿಯಾ" },
		{ "upayOga", "ಉಪಯೋಗ" },
	}
	
	self:do_tests(examples, "kn")
end

local sa_examples = {
	{ "saMskRta/", "saṃskṛtá", "संस्कृत" },
	{ "kSatri/ya", "kṣatríya", "क्षत्रिय" },
	{ "rAja suprabuddha", "rāja suprabuddha", "राज सुप्रबुद्ध"},
	{ "zAkyamuni", "śākyamuni", "शाक्यमुनि"},
	{ "siMha", "siṃha", "सिंह"},
	{ "nAman", "nāman", "नामन्"},
	{ "anA/", "anā́", "अना" },
	{ "ayuSmAn", "ayuṣmān", "अयुष्मान्"},
	{ "ghatsyati", "ghatsyati", "घत्स्यति"},
	{ "tApa-i", "tāpa-i", "तापइ" },
	{ "tApaï", "tāpaï", "तापइ" },
}

function tests:test_Sanskrit()
	self:do_tests(sa_examples, "sa")
end

function tests:test_Sanskrit_tr()
	self:do_tests(sa_examples, "sa-tr")
end

function tests:test_Maithili()
	local examples = {
		{ "maithilI", "𑒧𑒻𑒟𑒱𑒪𑒲" },
		{ "ghO_r_A", "𑒒𑒼𑒛𑓃𑒰" },
		{ "ga_rh_a", "𑒑𑒜𑓃" },
		{ "mokAma", "𑒧𑒽𑒏𑒰𑒧" },
		{ "pa~cakhaNDI", "𑒣𑒿𑒔𑒐𑒝𑓂𑒛𑒲" },
		{ "heraba", "𑒯𑒺𑒩𑒥" },
	}
	
	self:do_tests(examples, "mai")
end

function tests:test_Marwari()
	local examples = {
		{ "mahAjanI", "𑅬𑅱𑅛𑅧𑅑" },
		{ "mukAMm", "𑅬𑅒𑅕𑅧𑅬" },
		{ "AvalA", "𑅐𑅯𑅮" },
		{ "AgarA", "𑅐𑅗𑅭" },
		{ "upama", "𑅒𑅨𑅬" },
		{ "iMdaura", "𑅑𑅧𑅥𑅒𑅭" },
	}
	
	self:do_tests(examples, "mwr")
end

function tests:test_Old_Persian()
	local examples = {
		{ "aitiiy", "𐎠𐎡𐎫𐎡𐎹" },
		{ "raucah", "𐎼𐎢𐎨𐏃" },
		{ "ham", "𐏃𐎶" },
		{ "jiva", "𐎪𐎺"},
		{ "daraniyakara", "𐎭𐎼𐎴𐎹𐎣𐎼" },
		{ "daragama", "𐎭𐎼𐎥𐎶" },
	}

	self:do_tests(examples, "peo")
end

function tests:test_Parthian()
	local examples = {
		{ "tšynd", "𐫤𐫢𐫏𐫗𐫅" },
		{ "xʾrtʾg", "𐫟𐫀𐫡𐫤𐫀𐫃" },
		{ "hʾmhyrz", "𐫍𐫀𐫖𐫍𐫏𐫡𐫉" },
		{ "ʿšnwhr", "𐫙𐫢𐫗𐫇𐫍𐫡"},
		{ "hʾwsʾr", "𐫍𐫀𐫇𐫘𐫀𐫡" },
	}

	self:do_tests(examples, "xpr", "Mani")
end

function tests:test_Japanese()
	local examples = {
		{ "iro ha nihoheto", "いろ は にほへと" },
		{ "uwyi no okuyama", "うゐ の おくやま" },
		{ "FAMIRI-MA-TO", "ファミリーマート" },
		{ "altu", "あっ" },
		{ "hi/mi/tu", "ひ・み・つ" },
		{ "han'i", "はんい" },
		{ "hanni", "はんい" },
		{ "konnyou", "こんよう" },
		{ "mannnaka", "まんなか" },
		{ "attiike", "あっちいけ" },
		{ "acchiike", "あっちいけ" },
		{ "upnusi", "うpぬし" },
	}

	self:do_tests(examples, "ja")
end

function tests:test_Old_Church_Slavonic()
	local examples = {
		{ "ljudije", "людиѥ" },
		{ "azuh", "азъ" },
		{ "buky", "боукꙑ" },
		{ "mŭčati", "мъчати" },
		{ "Iosija", "Иосиꙗ" },
	}
	
	self:do_tests(examples, "cu")
end


local omr_examples = {
	{ "kuhA", "kuhā", "𑘎𑘳𑘮𑘰" },
	{ "nibara", "nibara", "𑘡𑘲𑘤𑘨" },
	{ "nIbara", "nībara", "𑘡𑘲𑘤𑘨" },
	{ "Ai", "āi", "𑘁𑘃" },
	{ "AI", "āī", "𑘁𑘃" },
	{ "suta", "suta", "𑘭𑘳𑘝" },
	{ "sUta", "suta", "𑘭𑘳𑘝" },
	{ "uta", "uta", "𑘄𑘝" },
	{ "Uta", "uta", "𑘄𑘝" },
	{ "na-i", "na-i", "𑘡𑘃" },
	{ "naï", "naï", "𑘡𑘃" },
	{ "a-ila", "a-ila", "𑘀𑘃𑘩" },
	{ "aïla", "aïla", "𑘀𑘃𑘩" },
	{ "bhavai", "bhavai", "𑘥𑘪𑘺" },
	{ "cauka", "cauka", "𑘓𑘼𑘎" },
	{ "ca-utha", "ca-utha", "𑘓𑘄𑘞" },
	{ "caütha", "caütha", "𑘓𑘄𑘞" },
	{ "a-ukSa", "a-ukṣa", "𑘀𑘄𑘎𑘿𑘬" },
	{ "aükSa", "aükṣa", "𑘀𑘄𑘎𑘿𑘬" },
	{ "AThoLI", "āṭhoḷī", "𑘁𑘙𑘻𑘯𑘲" },
	{ "raMbhA", "raṃbhā", "𑘨𑘽𑘥𑘰" },
	{ "hRdA", "hṛdā", "𑘮𑘵𑘟𑘰" },
	{ "Rkha", "ṛkha", "𑘆𑘏" },
	{ "SaDa", "ṣaḍa", "𑘬𑘚" },
	{ "kSeNa", "kṣeṇa", "𑘎𑘿𑘬𑘹𑘜" },
	{ "zobhaNe", "śobhaṇe", "𑘫𑘻𑘥𑘜𑘹" },
	{ "arha", "arha", "𑘀𑘨𑘿𑘮" },
	{ "mar_hATI", "maṟhāṭī", "𑘦𑘨𑘿‍𑘮𑘰𑘘𑘲" },
}

function tests:test_Old_Marathi()
	self:do_tests(omr_examples, "omr")
end

function tests:test_Old_Marathi_tr()
	self:do_tests(omr_examples, "omr-tr")
end

function tests:test_Ossetian()
	local examples = {
		{ "fynʒ", "фындз" },
		{ "æxsæv", "ӕхсӕв" },
		{ "c’æx", "цъӕх" },
		{ "biræǧ", "бирӕгъ" },
		{ "Ræstʒinad", "Рӕстдзинад" },
	}
	
	self:do_tests(examples, "os")
end

function tests:test_Imperial_Aramaic()
	local examples = {
		{ "'nḥn", "𐡀𐡍𐡇𐡍" },
	}
	
	self:do_tests(examples, "arc", "Armi")
end

function tests:test_Old_South_Arabian()
	local examples = {
		{ "s²ms¹", "𐩦𐩣𐩪" },
	}
	
	self:do_tests(examples, "xsa")
end

function tests:test_Siddham()
	local examples = {
		{ "kanta", "𑖎𑖡𑖿𑖝" },
		{ "purAna", "𑖢𑖲𑖨𑖯𑖡"},
		{ "Na-i", "𑖜𑖂"},
		{ "kaNNa", "𑖎𑖜𑖿𑖜"},
		{ "samAia", "𑖭𑖦𑖯𑖂𑖀"},
		{ "tujjhu", "𑖝𑖲𑖕𑖿𑖖𑖲"},
		{ "kahante", "𑖎𑖮𑖡𑖿𑖝𑖸"},
	}

	self:do_tests(examples, "inc-kam")
end

function tests:test_Kaithi()
	local examples = {
		{ "hanU", "𑂯𑂢𑂴" },
		{ "pa_rh_ahi", "𑂣𑂜𑂯𑂱" },
		{ "siya~", "𑂮𑂱𑂨𑂀" },
		{ "jhara-i", "𑂕𑂩𑂅" },
		{ "jharaï", "𑂕𑂩𑂅" },
		{ "Agi", "𑂄𑂏𑂱" },
		{ "āgi", "𑂄𑂏𑂱" },
	}
	
	self:do_tests(examples, "bho")
end

function tests:test_Saurashtra()
	local examples = {
		{ "pani", "ꢦꢥꢶ" },
		{ "vAg", "ꢮꢵꢔ꣄" },
		{ "ghoDo", "ꢕꣁꢞꣁ" },
		{ "dukkar", "ꢣꢸꢒ꣄ꢒꢬ꣄" },
		{ "l:ovo", "ꢭꢴꣁꢮꣁ" },
	}
	
	self:do_tests(examples, "saz")
end

function tests:test_Sindhi()
	local examples = {
		{ "siMdhī", "𑋝𑋡𑋟𑋐𑋢" },
		{ "bhAGo", "𑋖𑋠𑊿𑋧" },
		{ "mAlu", "𑋗𑋠𑋚𑋣" },
		{ "jeko", "𑋂𑋥𑊺𑋧" },
		{ "xabara", "𑊻𑋩𑋔𑋙" },
		{ "muqAmu", "𑋗𑋣𑊺𑋩𑋠𑋗𑋣" },
		{ "meM", "𑋗𑋥𑋟" },
		{ "gunAhu", "𑊼𑋣𑋑𑋠𑋞𑋣" },
		{ "_gh_araza", "𑊼𑋩𑋙𑋂𑋩" },
		{ "_gh_ufA", "𑊼𑋩𑋣𑋓𑋩𑋠" },
		{ "bA_gh_u", "𑋔𑋠𑊼𑋩𑋣" },
		{ "ba_gh_adAdu", "𑋔𑊼𑋩𑋏𑋠𑋏𑋣" },
		{ "ghaTaNu", "𑊾𑋆𑋌𑋣" },
	}
	
	self:do_tests(examples, "sd")
end


--[[
function tests:test_Old_North_Arabian()
	-- We need tests to verify that letters with diacritics or modifiers
	-- transliterate correctly.
	local examples = {
		{ "'lšdy", "𐪑𐪁𐪆𐪕𐪚" },
	}
	
	self:do_tests(examples, "sem-tha")
end
--]]

--[[
To add another example, place the following code
	within the braces of an "examples" table:
		{ "shortcut", "expected result" },
		{ "", "" },
or for Sanskrit,
	{ "Harvard-Kyoto", "IAST", "Devanagari" },
	{ "", "", "" },
]]

return tests