Modul:category tree/topic cat

local label_data = require("Module:category tree/topic cat/data")
local topic_cat_utilities_module = "Module:category tree/topic cat/utilities"
local labels_utilities_module = "Module:labels/utilities"
local string_utilities_module = "Module:string utilities"

local m_str_utils = require(string_utilities_module)

local pattern_escape = m_str_utils.pattern_escape
local replacement_escape = m_str_utils.replacement_escape
local split = m_str_utils.split

local current_frame = mw.getCurrentFrame()
local current_title = mw.title.getCurrentTitle()

-- Category object

local Category = {}
Category.__index = Category


local type_data = {
	["berkenaan"] = {
		desc = "Perkataan berkenaan",
		additional = "'''NOTA''': Ini merupakan kategori \"berkenaan\". Ia sepatutnya mengandungi istilah " ..
		"berkenaan {{{topic}}}. Elak dari menambah istilah yang ada hubungan seleweng dengan {{{topic}}}. " ..
		"Berhati-hati dengan istilah jenis atau kejadian topik yang selalunya masuk dalam kategori berasingan.",
	},
	set = {
		desc = "Istilah untuk jenis atau keadaan",
		additional = "'''NOTA''': Ini merupakan kategori set. Ia sepatutnya mengandungi istilah untuk {{{topic}}}, " ..
		"bukan sekadar istilah berkaitan {{{topic}}}. Ia mungkin mengandungi lebih banyak istilah umum (seperti jenis " ..
		"{{{topic}}}) atau lebih banyak istilah spesifik (seperti nama {{{topic}}} spesifik), walau mungkin terdapat "..
		"kategori berkenaan khusus untuk istilah-istilah tersebut.",
	},
	nama = {
		desc = "Nama-nama spesifik",
		additional = "'''NOTA''': Ini merupakan kategori nama. Ia sepatutnya mengandungi nama {{{topic}}} khusus, " ..
		"bukannya sekadar istilah berkenaan {{{topic}}}, dan sepatutnya tidak mengandungi istilah umum untuk " ..
		"jenis {{{topic}}}.",
	},
	jenis = {
		desc = "Istilah penjenisan",
		additional = "'''NOTA''': Ini merupakan kategori jenis. Ia sepatutnya mengandungi istilah untuk jenis {{{topic}}}, " ..
		"bukannya sekadar istilah berkaitan {{{topic}}}, dan sepatutnya juga tidak mengandungi nama {{{topic}}} khusus.",
	},
	kumpulan = {
		desc = "Kategori berkenaan varian khusus",
		additional = "'''NOTE''': Ini merupakan kategori khusus. Ia tidak sepatutnya mengandungi apa-apa istilah secara " ..
		"langsung tetapi hanya subkategori. Jika terdapat sebarang istilah berkaitan kategori ini, sila alihkannya ke" ..
		"subkategori berkaitan.",
	},
	toplevel = {
		desc = "UNUSED", -- all categories of this type hardcode their description
		additional = "'''NOTA''': Kategori ini merupakan kategori senarai beraras tertinggi. Ia tidak sepatutnya " ..
		"mengandungi sebarang perkataan secara langsung, tetapi sekadar topik berkenaan {{{topic}}}.",
	},
}

local function invalid_type(types)
	local valid_types = {}
	for typ, _ in pairs(type_data) do
		table.insert(valid_types, ("'%s'"):format(typ))
	end
	error(("Invalid type '%s', should be one or more of %s, comma-separated")
		:format(types, require("Module:table").serialCommaJoin(valid_types, {dontTag = true})))
end

local function split_types(types)
	types = types or "berkenaan"
	local splitvals = split(types, "%s*,%s*")
	for i, typ in ipairs(splitvals) do
		-- FIXME: Temporary
		if typ == "topic" then
			typ = "berkenaan"
		end
		if not type_data[typ] then
			invalid_type(types)
		end
		splitvals[i] = typ
	end
	return splitvals
end

local function gsub_escaping_replacement(str, from, to)
	return (str:gsub(pattern_escape(from), replacement_escape(to)))
end

function Category.new(info)
	for key in pairs(info) do
		if not (key == "code" or key == "label") then
			error("The parameter “" .. key .. "” was not recognized.")
		end
	end

	local self = setmetatable({}, Category)
	self._info = info

	if not self._info.label then
		error("No label was specified.")
	end

	self:initCommon()

	if not self._data then
		error("The label “" .. self._info.label .. "” does not exist.")
	end

	return self
end


function Category:initCommon()
	if self._info.code then
		self._lang = require("Module:languages").getByCode(self._info.code, true)
	end

	-- Convert label to lowercase if possible
	local lowercase_label = mw.getContentLanguage():lcfirst(self._info.label)

	-- Check if the label exists
	local labels = label_data["LABELS"]

	if labels[lowercase_label] then
		self._info.label = lowercase_label
	end

	self._data = labels[self._info.label]

	-- Go through handlers
	if not self._data then
		for _, handler in ipairs(label_data["HANDLERS"]) do
			self._data = handler.handler(self._info.label)
			if self._data then
				self._data.module = handler.module
				break
			end
		end
	end
end


function Category:getInfo()
	return self._info
end


function ucfirst(txt)
	local italics, raw_txt = txt:match("^('*)(.-)$")
	return italics .. mw.getContentLanguage():ucfirst(raw_txt)
end


function Category:uclabel()
	return ucfirst(self._info.label)
end


function Category:format_displaytitle(include_lang_prefix, upcase)
	local displaytitle = self._data.displaytitle
	if not displaytitle then
		return nil
	end
	if type(displaytitle) == "function" then
		displaytitle = displaytitle(self._info.label, lang)
	end
	if upcase then
		displaytitle = ucfirst(displaytitle)
	end
	if include_lang_prefix and self._lang then
		displaytitle = ("%s:%s"):format(self._lang:getCode(), displaytitle)
	end

	return displaytitle
end


function Category:getBreadcrumbName()
	local ret

	if self._lang then
		ret = self._data.breadcrumb or self:format_displaytitle(false, "upcase")
	else
		ret = self._data.umbrella and self._data.umbrella.breadcrumb or
			self._data.breadcrumb or self:format_displaytitle(false, "upcase")
	end
	if not ret then
		ret = self._info.label
	end

	if type(ret) == "string" or type(ret) == "number" then
		ret = {name = ret}
	end

	local name = self:substitute_template_specs(ret.name)
	local nocap = ret.nocap

	return name, nocap
end


function Category:getDataModule()
	return self._data.module
end


function Category:canBeEmpty()
	if self._lang then
		return false
	else
		return true
	end
end


function Category:isHidden()
	return false
end




function Category:getCategoryName()
	if self._lang then
		return self._lang:getCode() .. ":" .. self:uclabel()
	else
		return self:uclabel()
	end
end


function Category:process_default(desc)
	local stripped_desc = desc
	local no_singularize, wikify, add_the
	while true do
		local new_stripped_desc = stripped_desc:match("^(.+) no singularize$")
		if new_stripped_desc then
			no_singularize = true
		end
		if not new_stripped_desc then
			new_stripped_desc = stripped_desc:match("^(.+) wikify$")
			if new_stripped_desc then
				wikify = true
			end
		end
		if not new_stripped_desc then
			new_stripped_desc = stripped_desc:match("^(.+) with the$")
			if new_stripped_desc then
				add_the = true
			end
		end
		if new_stripped_desc then
			stripped_desc = new_stripped_desc
		else
			break
		end
	end
	if stripped_desc == "default" then
		return true, no_singularize, wikify, add_the
	else
		return false
	end
end


function Category:replace_special_descriptions(desc)
	if not desc then
		return desc
	end

	local function format_desc(desc)
		local desc_parts = {}
		local types = split_types(self._data.type)
		for _, typ in ipairs(types) do
			table.insert(desc_parts, type_data[typ].desc .. " " .. desc)
		end
		return "" .. require("Module:table").serialCommaJoin(desc_parts) .. " untuk bahasa {{{langname}}}."
	end

	if desc:find("^=") then
		desc = desc:gsub("^=", "")
		return format_desc(desc)
	end

	local is_default, no_singularize, wikify, add_the = self:process_default(desc)
	if is_default then
		local linked_label = require(topic_cat_utilities_module).link_label(self._info.label, no_singularize, wikify)
		if add_the then
			linked_label = "the " .. linked_label
		end
		return format_desc(linked_label)
	else
		return desc
	end
end


function Category:substitute_template_specs(desc)
	if not desc then
		return desc
	end
	if type(desc) == "number" then
		desc = tostring(desc)
	end
	-- FIXME, when does this occur? It doesn't occur in the corresponding place in [[Module:category tree/poscatboiler]].
	if type(desc) ~= "string" then
		return desc
	end
	desc = gsub_escaping_replacement(desc, "{{PAGENAME}}", current_title.text)

	if desc:find("{{{umbrella_msg}}}") then
		local eninfo = mw.clone(self._info)
		eninfo.code = "ms"
		local en = Category.new(eninfo)
		desc = desc:gsub("{{{umbrella_msg}}}", "Kategori ini tidak mengandungi entri kamus, hanya kategori lain. Subkategori terdiri daripada dua jenis:\n\n" ..
			"* Subkategori dinamakan seperti \"aa:" .. self:uclabel() ..
			"\" (dengan awalan kod bahasa) merupakan kategori istilah dalam bahasa tertentu. " ..
			"Anda mungkin berminat terutamanya dengan [[:Kategori:" .. en:getCategoryName() .. "]], untuk perkataan bahasa Melayu.\n" ..
			"* Subkategori ini yang dinamakan tanpa awalan kod bahasa merupakan kategori lanjutan seperti kategori ini, tetapi dikhaskan untuk topik yang lebih halus."
		)
	end
	if self._lang then
		desc = gsub_escaping_replacement(desc, "{{{langname}}}", self._lang:getCanonicalName())
		desc = gsub_escaping_replacement(desc, "{{{langcode}}}", self._lang:getCode())
		desc = gsub_escaping_replacement(desc, "{{{langcat}}}", self._lang:getCategoryName())
		desc = gsub_escaping_replacement(desc, "{{{langlink}}}", self._lang:makeCategoryLink())
	end

	if desc:find("{{{topic}}}") then
		local function get_displaytitle_or_label()
			return self:format_displaytitle(false) or self._info.label
		end

		local function process_default_add_the(topic)
			local is_default, _, _, add_the = self:process_default(topic)
			if is_default then
				topic = get_displaytitle_or_label()
				if add_the then
					topic = "" .. topic
				end
			end
			return topic, is_default
		end

		-- Compute the value for {{{topic}}}. If the user specified `topic`, use it. (If we're an umbrella category,
		-- allow a separate value for `umbrella.topic`, falling back to `topic`.) Otherwise, see if the description
		-- was specified as 'default' or a variant; if so, parse it to determine whether to add "the" to the label.
		-- Otherwise, just use the label directly.
		local topic = not self._lang and self._data.umbrella and self._data.umbrella.topic or self._data.topic
		if topic then
			topic = process_default_add_the(topic)
		else
			local desc
			if not self._lang then
				desc = self._data.umbrella and self._data.umbrella.description or self._data.umbrella_description
			end
			desc = desc or self._data.description
			local defaulted_desc, is_default = process_default_add_the(desc)
			if is_default then
				topic = defaulted_desc
			else
				topic = get_displaytitle_or_label()
			end
		end

		desc = gsub_escaping_replacement(desc, "{{{topic}}}", topic)
	end

	return current_frame:preprocess(desc)
end


function Category:substitute_template_specs_in_args(args)
	if not args then
		return args
	end
	local pinfo = {}
	for k, v in pairs(args) do
		k = self:substitute_template_specs(k)
		v = self:substitute_template_specs(v)
		pinfo[k] = v
	end
	return pinfo
end


function Category:getTopright()
	local def_topright_parts = {}
	local function process_box(val, pattern)
		if not val then
			return
		end
		local defval = self:uclabel()
		if type(val) ~= "table" then
			val = {val}
		end
		for _, v in ipairs(val) do
			if v == true then
				table.insert(def_topright_parts, pattern:format(defval))
			else
				table.insert(def_topright_parts, pattern:format(v))
			end
		end
	end

	process_box(self._data.wp, "{{wikipedia|%s}}")
	process_box(self._data.wpcat, "{{wikipedia|category=%s}}")
	process_box(self._data.commonscat, "{{commonscat|%s}}")

	local def_topright
	if #def_topright_parts > 0 then
		def_topright = table.concat(def_topright_parts, "\n")
	end

	if self._lang then
		return self:substitute_template_specs(self._data.topright or def_topright)
	else
		return self._data.umbrella and self:substitute_template_specs(self._data.umbrella.topright) or
			self:substitute_template_specs(def_topright)
	end
end


local function remove_lang_params(desc)
	desc = desc:gsub("^{{{langname}}} ", "")
	desc = desc:gsub("{{{langcode}}}:", "")
	desc = desc:gsub("^{{{langcode}}} ", "")
	desc = desc:gsub("^{{{langcat}}} ", "")
	return desc
end


function Category:getDescription(isChild)
	-- Allows different text in the list of a category's children
	local isChild = isChild == "child"

	local function display_title()
		local displaytitle = self:format_displaytitle("include lang prefix", "upcase")
		if displaytitle then
			displaytitle = self:substitute_template_specs(displaytitle)
			current_frame:callParserFunction("DISPLAYTITLE", "Kategori:" .. displaytitle)
		end
	end

	if not isChild and self._data.displaytitle then
		display_title()
	end

	local function get_labels_categorizing()
		local m_labels_utilities = require(labels_utilities_module)
		return m_labels_utilities.format_labels_categorizing(
			m_labels_utilities.find_labels_for_category(self._info.label, "topic", self._lang), nil, self._lang)
	end

	local function get_additional_msg()
		local types = split_types(self._data.type)
		if #types > 1 then
			local parts = {}
			local function ins(txt)
				table.insert(parts, txt)
			end
			ins("'''NOTE''': This is a mixed category. It may contain terms of any of the following category types:")
			for i, typ in ipairs(types) do
				ins(("* %s {{{topic}}}%s"):format(type_data[typ].desc, i == #types and "." or ";"))
			end
			ins("'''WARNING''': Such categories are strongly dispreferred and should be split into separate per-type categories.")
			return table.concat(parts, "\n")
		elseif self._info.label == "semua topik" then
			return "'''NOTA''': Kategori ini merupakan kategori topik peringkat tertinggi untuk bahasa {{{langname}}}. Ia tidak sepatutnya mengandungi " ..
			"sebarang perkataan secara langsung, tetapi hanya mempunyai senarai kategori topik disusun mengikut jenis."
		else
			return type_data[types[1]].additional
		end
	end

	if self._lang then
		local desc = self._data.description

		desc = self:replace_special_descriptions(desc)
		if not isChild and desc then
			if self._data.preceding then
				desc = self._data.preceding .. "\n\n" .. desc
			end
			if self._data.additional then
				desc = desc .. "\n\n" .. self._data.additional
			end
			desc = desc .. "\n\n" .. get_additional_msg()
			local labels_msg = get_labels_categorizing()
			if labels_msg then
				desc = desc .. "\n\n" .. labels_msg
			end
		end

		return self:substitute_template_specs(desc)
	else
		if self._info.label == "semua topik" then
			return "Kategori ini diguna pakai pada kandungan bukannya bahan meta berkenaan Wiki ini."
		end

		local desc = self._data.umbrella and self._data.umbrella.description or self._data.umbrella_description
		local has_umbrella_desc = not not desc
		if not desc then
			 desc = self._data.description
			 if desc then
		 		desc = self:replace_special_descriptions(desc)
				desc = remove_lang_params(desc)
				desc = desc:gsub("%.$", "")
				desc = "Kategori ini berkenaan topik: " .. desc .. "."
			 end
		end
		if not desc then
			desc = "Kategori berkenaan " .. self._info.label .. " dalam pelbagai bahasa khusus."
		end

		if not isChild then
			local preceding = self._data.umbrella and self._data.umbrella.preceding or
				not has_umbrella_desc and self._data.preceding
			local additional = self._data.umbrella and self._data.umbrella.additional or
				not has_umbrella_desc and self._data.additional
			if preceding then
				desc = remove_lang_params(preceding) .. "\n\n" .. desc
			end
			if additional then
				desc = desc .. "\n\n" .. remove_lang_params(additional)
			end
			desc = desc .. "\n\n{{{umbrella_msg}}}"
			desc = desc .. "\n\n" .. get_additional_msg()
			local labels_msg = get_labels_categorizing()
			if labels_msg then
				desc = desc .. "\n\n" .. labels_msg
			end
		end
		desc = self:substitute_template_specs(desc)
		return desc
	end
end


function Category:getParents()
	local parents = self._data["parents"]
	local label = self._info.label

	if not self._lang and label == "semua topik" then
		return {{ name = "Kategori:Asas", sort = "topik" }}
	end

	if not parents or #parents == 0 then
		return nil
	end

	local ret = {}

	for _, parent in ipairs(parents) do
		parent = mw.clone(parent)

		if type(parent) ~= "table" then
			parent = {name = parent}
		end

		if not parent.sort then
			-- When defaulting sort key to label, strip 'The ' (e.g. in 'The Matrix', 'The Hunger Games')
			-- and 'A ' (e.g. in 'A Song of Ice and Fire', 'A Christmas Carol') from label.
			local stripped_sort = label:match("^[Tt]he (.*)$")
			if stripped_sort then
				parent.sort = stripped_sort
			end
			if not stripped_sort then
				stripped_sort = label:match("^[Aa] (.*)$")
				if stripped_sort then
					parent.sort = stripped_sort
				end
			end
			if not stripped_sort then
				parent.sort = label
			end
		end

		if self._lang then
			parent.sort = self:substitute_template_specs(parent.sort)
		elseif parent.sort:find("{{{langname}}}") or parent.sort:find("{{{langcat}}}") or parent.module then
			return nil
		end

		if not self._lang then
			parent.sort = " " .. parent.sort
		end

		if parent.name and parent.name:find("^Kategori:") then
			if self._lang then
				parent.name = self:substitute_template_specs(parent.name)
			elseif parent.name:find("{{{langname}}}") or parent.name:find("{{{langcat}}}") or parent.module then
				return nil
			end
		else
			local pinfo = mw.clone(self._info)
			pinfo.label = parent.name

			if parent.module then
				-- A reference to a category using another category tree module.
				if not parent.args then
					error("Missing .args in parent table with module=\"" .. parent.module .. "\" for '" ..
						label .. "' topic entry in module '" .. (self._data.module or "unknown") .. "'")
				end
				parent.name = require("Module:category tree/" .. parent.module).new(self:substitute_template_specs_in_args(parent.args))
			else
				parent.name = Category.new(pinfo)
			end
		end

		table.insert(ret, parent)
	end


	if self._data.type ~= "toplevel" then
		local types = split_types(self._data.type)
		for _, typ in ipairs(types) do
			local pinfo = mw.clone(self._info)
			pinfo.label = ("Senarai kategori %s"):format(typ)
			table.insert(ret, {name = Category.new(pinfo), sort = (not self._lang and " " or "") .. label})
		end
		if #types > 1 then
			local pinfo = mw.clone(self._info)
			pinfo.label = ("Senarai kategori gabungan"):format(typ)
			table.insert(ret, {name = Category.new(pinfo), sort = (not self._lang and " " or "") .. label})
		end
	end

	return ret
end


function Category:getChildren()
	return nil
end


function Category:getUmbrella()
	if not self._lang then
		return nil
	end

	-- We take advantage of the fact that this function (getUmbrella) is fully overridden in
	-- [[Module:category tree/ws topic cat]]. That code never calls this function. Moreover,
	-- this function is only called when attempting to display the category boilerplate, not
	-- simply when a category object is instantiated. This makes it a safe place to throw an
	-- error when a user tries to create a thesaurus-only category under a regular mainspace
	-- title.
	if self._data and self._data.thesaurusonly then
		error('This is a thesaurus-only category type; you cannot create non-thesaurus categories with it.')
	end

	local uinfo = mw.clone(self._info)
	uinfo.code = nil
	return Category.new(uinfo)
end


function Category:getTOCTemplateName()
	local lang = self._lang
	local code = lang and lang:getCode() or "ms"
	return "Templat:" .. code .. "-categoryTOC"
end


local export = {}

function export.main(info)
	local self = setmetatable({_info = info}, Category)
	
	self:initCommon()
	
	return self._data and self or nil
end

export.new = Category.new

return export