local export = {}
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local sub = mw.ustring.sub
local toNFC = mw.ustring.toNFC
local u = mw.ustring.char
-- Finds the main stack of a given syllable, which allows all other components to be determined (and is the basis for sorting and transliteration). Currently defaults to Classical Tibetan, but if a ruleset for a specific language exists, it will use that instead. Once the main stack has been located, the process of sorting and transliteration is the same. Because of this, [[Module:Tibt-sortkey]] and [[Module:Tibt-translit]] only need to be pointed at this common function.
-- Uses a (somewhat expanded) implementation of the algorithm found in "Algorithmic description of the decomposition and checking of a Classical Tibetan syllable" by Roux, Hildt & Drupchen: https://escholarship.org/uc/item/70z8069f
function export.findMainStack(text, langCode)
-- If a language-specific module exists, use the ruleset in that. If not, fall back on the Tibetan module [[Module:bo-common]].
local langModuleCheck, langModule = pcall(function() langModule = require("Module:" .. langCode .. "-common") return langModule end)
if not langModuleCheck then
langModule = require("Module:bo-common")
end
local sc = require("Module:scripts").getByCode("Tibt")
text = sc:fixDiscouragedSequences(text)
text = sc:toFixedNFC(text)
local origText, mainStack = text
-- If halantas are present, the the input must be modified so as to treat the parent consonant + any that follow as a pseudo-stack before being processed by the rules. The locations are then stored, so that the pseudo-stack can be converted back again if it is found to be the main stack.
local halantaSubs, halantas = {}, {}
if match(text, "྄") and match(text, "[^྄]$") then
halantaSubs = {
{"྄ཀ", "ྐ"}, {"྄ཁ", "ྑ"}, {"྄ག", "ྒ"}, {"྄ང", "ྔ"}, {"྄ཅ", "ྕ"}, {"྄ཆ", "ྖ"}, {"྄ཇ", "ྗ"}, {"྄ཉ", "ྙ"}, {"྄ཊ", "ྚ"}, {"྄ཋ", "ྛ"}, {"྄ཌ", "ྜ"}, {"྄ཎ", "ྞ"}, {"྄ཏ", "ྟ"}, {"྄ཐ", "ྠ"}, {"྄ད", "ྡ"}, {"྄ན", "ྣ"}, {"྄པ", "ྤ"}, {"྄ཕ", "ྥ"}, {"྄བ", "ྦ"}, {"྄མ", "ྨ"}, {"྄ཙ", "ྩ"}, {"྄ཚ", "ྪ"}, {"྄ཛ", "ྫ"}, {"྄ཝ", "ྭ"}, {"྄ཞ", "ྮ"}, {"྄ཟ", "ྯ"}, {"྄འ", "ྰ"}, {"྄ཡ", "ྱ"}, {"྄ར", "ྲ"}, {"྄ལ", "ླ"}, {"྄ཤ", "ྴ"}, {"྄ཥ", "ྵ"}, {"྄ས", "ྶ"}, {"྄ཧ", "ྷ"}, {"྄ཨ", "ྸ"}, {"྄ཪ", "ྼ"}
}
local convHalantas = {}
for _, halantaSub in pairs(halantaSubs) do
convHalantas[halantaSub[1]] = halantaSub[2]
end
for halanta in gmatch(text, "྄.") do
halantaSub = u(0xF000) .. (gsub(halanta, ".*", convHalantas))
text = gsub(text, halanta, halantaSub, 1)
table.insert(halantas, find(text, u(0xF000)))
text = gsub(text, u(0xF000), "")
end
halantas = require("Module:table").compressSparseArray(halantas)
end
local function err()
return error("Invalid syllable " .. toNFC(origText) .. ".")
end
text = langModule.preconvert(text)
for _, check in pairs(langModule.mainStackChecks(text)) do
if check then
mainStack = check
if match(origText, "྄") then
local convHalantas = {}
for _, halantaSub in pairs(halantaSubs) do
convHalantas[halantaSub[2]] = halantaSub[1]
end
local offset = find(text, mainStack)
for i, halanta in ipairs(halantas) do
mainStack = gsub(mainStack, sub(mainStack, (halanta-offset)+i, (halanta-offset)+i), convHalantas, 1)
end
end
return mainStack
end
end
-- If ambiguous, return the most likely stack, along with a second value (true) so that this can be taken into account.
for syllable, mainStack in pairs(langModule.ambiguousSyllables) do
if match(text, "^" .. syllable .. "$") then
return langModule.postconvert(mainStack), true
end
end
return err()
end
function export.getWords(text)
return gmatch(text, "[ༀ་-༒" .. u(0xF35) .. u(0xF37) .. u(0xF39) .. "-ྼ]+")
end
function export.getSyllables(text)
return gmatch(text, "[ༀ" .. u(0xF35) .. u(0xF37) .. u(0xF39) .. "-ྼ]+")
end
return export