export = {}
local match = mw.ustring.match
local function ugsub(text, regex, replacement)
local out = mw.ustring.gsub(text, regex, replacement)
return out
end
local alphabet = "ⲁⲃⲅⲇⲉⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱϣϥⳉϧϩϫϭw"
local vowels = "ⲁⲉⲏⲓⲟⲩⲱ"
local vowel = "[" .. vowels .. "]"
local consonants = ugsub(alphabet, vowel, "")
local consonant = "[" .. consonants .. "]"
local replacements = {
["ⲟⲩ"] = "ⲩ",
["ⳤ"] = "ⲕⲉ",
["ⲉⲓ"] = "ⲓ",
["ϯ"] = "ⲧⲓ",
["-"] = "",
["⸗"] = "",
["ˋ"] = "",
}
local CopticToGreek = {
["ⲁ"] = "α",
["ⲃ"] = "β",
["ⲅ"] = "γ",
["ⲇ"] = "δ",
["ⲉ"] = "ε",
["ⲍ"] = "ζ",
["ⲏ"] = "η",
["ⲑ"] = "θ",
["ⲓ"] = "ι",
["ⲕ"] = "κ",
["ⲗ"] = "λ",
["ⲙ"] = "μ",
["ⲛ"] = "ν",
["ⲝ"] = "ξ",
["ⲟ"] = "ο",
["ⲡ"] = "π",
["ⲣ"] = "ρ",
["ⲥ"] = "σ",
["ⲧ"] = "τ",
["ⲩ"] = "υ",
["ⲫ"] = "φ",
["ⲭ"] = "χ",
["ⲯ"] = "ψ",
["ⲱ"] = "ω",
}
function export.makeSortKey(text, lang, sc)
if sc and sc ~= "Copt" then
return text
end
if not text then
return nil
end
local str_gsub = string.gsub
text = mw.ustring.lower(text)
for letter, replacement in pairs(replacements) do
text = str_gsub(text, letter, replacement)
end
local origText = text
text = ugsub(text, "ⲩ(" .. vowel .. ")", "w%1")
text = ugsub(text, "(" .. vowel .. ")ⲩ", "%1w")
-- mw.log(origText, text)
local sort = {}
for word in mw.ustring.gmatch(text, "%S+") do
-- Add initial vowel (if any).
table.insert(sort, match(word, "^" .. vowel) )
-- Add consonants (in order).
table.insert(sort, ugsub(word, vowel .. "+", ""))
--[[
Add the number "1" if word ends in consonant.
"1" sorts before Greek–Coptic and Coptic Unicode blocks.
]]
if mw.ustring.match(word, consonant .. "$") then
table.insert(sort, "1")
elseif mw.ustring.match(word, vowel .. "$") then
table.insert(sort, "2")
end
-- Get non-initial vowels (in order) by removing initial vowel and all consonants.
table.insert(sort, ugsub(ugsub(word, "^" .. vowel, ""), consonant, ""))
table.insert(sort, " ")
end
sort = table.concat(sort)
sort = str_gsub(sort, "w", "ⲩ")
--[[
Convert Greek-derived Coptic characters to Greek ones.
Otherwise, the uniquely Coptic letters would sort first, because
they were added to Unicode earlier.
ϣϥⳉϧϩϫϭ ⲁⲃⲅⲇⲉⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱ
⇓
αβγδεζηθικλμνξοπρστυφχψω ϣϥⳉϧϩϫϭ
]]
sort = str_gsub(sort, "[\194-\244][\128-\191]+", CopticToGreek)
return sort
end
local lang = require("Module:languages").getByCode("cop")
local sc = require("Module:scripts").getByCode("Copt")
local function tag(text)
return require("Module:script utilities").tag_text(text, lang, sc)
end
function export.showSorting(frame)
local terms = {}
for i, term in ipairs(frame.args) do
table.insert(terms, term)
end
local function comp(term1, term2)
return export.makeSortKey(term1) < export.makeSortKey(term2)
end
table.sort(terms, comp)
for i, term in pairs(terms) do
terms[i] = "\n* " .. tag(term) .. " (<code>" .. export.makeSortKey(term) .. "</code>)"
end
return table.concat(terms)
end
return export