Modul:module documentation

local m_str_utils = require("Module:string utilities")

local codepoint = m_str_utils.codepoint
local concat = table.concat
local insert = table.insert
local u = m_str_utils.char
local rsplit = m_str_utils.split

local export = {}

--[===[ intro:
This module automatically generates documentation for other modules. It fetches in-line comments in Lua code and
converts them into a form that can be used on a documentation page via {{tl|module documentation}}. In fact, this
module's documentation is an example of it in action!

It's helpful to do documentation this way, as it means function/method documentation is available in two places:
at the top of the module page (as conventional Wikitext), and above the function itself (as a Lua comment). Each
suits a different kind of editing style, and doing it this way keeps them synchronised.

A section of documentation is given using Lua's multi-line comment syntax, which looks something like this:
{--[==[ ... ]==]}. The number of equal signs should normally be two in order for the documentation to be properly
snarfed by {{tl|module documentation}}. The following conventions can be used inside of the documentation:
# Long lines (both in regular paragraphs and in list items) can be broken by newlines optionally followed by spaces
  or tabs (especially useful in lists, to make the raw comment more readable). In such a case, the newline is converted
  to a space. Use two newlines in a row to break paragraphs. In general, it's recommended to break lines after at most
  120 characters, to facilitate reading the raw comment.
# Template calls (using two braces) can be inserted literally and will be expanded.
# Single braces can be used to surround literal text, and will automatically be syntax-highlighted as Lua code. Nested
  braces inside of this literal text will be properly handled as long as they're balanced. If the first character of the
  literal text is itelf a brace, put a space before it (but not at the end), and it will be ignored.
# Backquotes can be used to surround literal text, which will be displayed using {<code>...</code>}. The stuff inside of
  backquotes cannot contain a backquote (except for double-backquoted placeholder variable names) or extend to multiple
  lines.
# Double backquotes can be used to surround placeholder variable names, which will be displayed using {<var>...</var>}.
  The stuff inside of backquotes can only contain letters, numbers, underscores, hyphens and periods. Normally, such
  placeholders should be displayed in a monospace font, as if surrounded by {<code>...</code>}. This effect can be
  achieved by using triple backquotes, which effectively uses the double-backquote syntax inside the single-backquote
  syntax.

Certain special directives can follow the opening multiline comment indicator, if placed on the same line as the
indicator. In particular, the following directives are currently recognized:

* The directive `intro:` by itself signals introductory text, which will be placed at the beginning, prior to function
  documentation. This is useful to give a general introduction/overview of the module.
* The directive `func: export.<var>function</var>(<var>arg1</var>, <var>arg2</var>, ...)` can be used when documenting a
  function declared in a nonstandard way (e.g. through a metatable, through an anonymous or locally-declared function
  assigned to the `export` table, etc.). The directive indicates the desired way for the function to appear, and the
  remainder of the comment describes the function's operation, as usual.
]===]

local TEMP_LEFT_BRACE = u(0xFFF0)
local TEMP_NEWLINE = u(0xFFF1)

local function format_doc(str)
	local code_blocks = {}
	local code_blocks_i = 0
	local private_use_start = 0x100000
	local subbed_str = (str
		-- Multiline literal text between backquotes; you can't use <pre> or <syntaxhighlight> because that
		-- disables Wikitext parsing for <var>...</var>, italics, <span>...</span> etc. Instead use the trick of
		-- putting a space at the beginning of each line, which yields monospace text without disabling Wikitext
		-- interpretation.
		:gsub("```(.-)```", function(inside)
			return inside
				 -- placeholder variable names between double backquotes; we need to repeat this here to avoid
				 -- the following pattern for single backquotes from clobbering double backquotes
				:gsub("``([A-Za-z0-9_%-. ]+)``", "<var>%1</var>")
				 -- single backquotes undo monospacing
				:gsub("`([^`\n]+)`", '<span style="font-family: sans-serif;">%1</span>')
				 -- text on the first line should be monospaced
				:gsub("^([^\n])", " %1")
				 -- text after a newline should be monospaced, and temp-escape the newline so later replacements
				 -- to join continued lines in a paragraph don't take effect
				:gsub("\n", TEMP_NEWLINE .. " ")
				-- escape { so it won't be interpreted as a code block
				:gsub("{", TEMP_LEFT_BRACE)
		end)
		:gsub("``([A-Za-z0-9_%-. ]+)``", "<var>%1</var>") -- placeholder variable names between double backquotes
		:gsub("`([^`\n]+)`", function(inside) -- literal text between backquotes, set using <code>...</code>
			-- Escape { so it won't be interpreted as a code block.
			inside = inside:gsub("{", TEMP_LEFT_BRACE)
			return "<code>" .. inside .. "</code>"
		end)
		 -- {} blocks: blocks of code
		 -- Escape to avoid removing line breaks.
		:gsub("%b{}", function(m0)
			local next_char = m0:sub(2, 2)
			if next_char == "|" then
				-- Wikitable; don't try to parse it as code. But we do want to parse special syntax in them (in
				-- particular {...} syntax for embedded code snippets), and if we return nil that won't happen.
				-- Instead, we call format_doc() recursively on the innards.
				return "{" .. format_doc(m0:sub(2, -2)) .. "}"
			end
			if next_char == "{" and m0:sub(-2, -2) == "}" then return nil end
			local text = "<syntaxhighlight lang=lua" .. (m0:match("\n") and "" or " inline") .. ">" .. m0:sub(2, -2):gsub("^ +", "") .. "</syntaxhighlight>"
			-- Prevent any further processing by storing the desired text into the `code_blocks` array and replacing
			-- the whole thing with a single private-use-area character.
			code_blocks_i = code_blocks_i + 1
			code_blocks[code_blocks_i] = text
			return u(private_use_start + code_blocks_i)
		end)
		-- undo escaping of left brace to prevent code block interpretation
		:gsub(TEMP_LEFT_BRACE, "{")
		-- Join continued lines in a paragraph. We don't want to do that if there are two newlines in a row,
		-- and not if the second line begins with whitespace or a certain special characters (#, * or : indicating
		-- a list item; | indicating a wikitable item; semicolon for bolded items).
    	:gsub("([^\n])\n[ \t]*([^ \t\n#*:;|])", "%1 %2")
    	-- Repeat the previous in case of a single-character line (admittedly rare).
    	:gsub("([^\n])\n[ \t]*([^ \t\n#*:;|])", "%1 %2")
		:gsub("\n[ \t]+%f[*#:;]", "\n") -- remove indentation for list items
		:gsub("%f[\n,{]\n%f[^\n*#:;]", "\n\n") -- wiki newlines
		:gsub("(\n[ *#:]*)(|?[_%w]+=?):", "%1<code><b>%2</b></code>:") -- parameter names
		-- double-underline to indicate types (displayed as italicized underlined)
		:gsub("__(.-)__", function(inside)
			return "<u><i>" .. inside .. "</i></u>"
		end)
		-- undo escaping of newline to prevent joining of continued lines
		:gsub(TEMP_NEWLINE, "\n"))

	-- Put <code>...</code> around <var>...</var> invocations that don't already occur inside of
	-- <code>...</code> blocks.
	local split_on_code = rsplit(subbed_str, "(<code>.-</code>)")
	for i = 1, #split_on_code, 2 do
		split_on_code[i] = split_on_code[i]:gsub("(<var>.-</var>)", "<code>%1</code>")
	end
	subbed_str = concat(split_on_code)
	return (subbed_str
		-- Undo code-block stashing.
		:gsub("\244[\128-\191][\128-\191][\128-\191]", function(char)
			return code_blocks[codepoint(char) - private_use_start]
		end))
end

--[===[
The main entrypoint for {{tl|module documentation}}. The frame object can contain 3 optional arguments:
* |comment_level=: The number of equals signs (=) a given section uses. Default: 2 (i.e. {--[==[ ... (comment block) ]==]})
*: e.g. The value 4 means {--[====[ ... (comment block) ]====]}.
* |section_level=: The header level used for each function/method. Default: 2 (i.e. L2: {== ... ==}).
* |identifier=: A Lua string pattern. Only the comments of functions whose names match this pattern are used. When not given, all function are accepted.
*: This is useful when giving object methods, using a pattern such as {^object_name:}.
]===]
function export.show(frame)
	local args = frame:getParent().args or {}
	
	local comment_level = tonumber(args["comment_level"]) or 2
	local function make_comment_pattern(typeid)
		if typeid then
			typeid = "%s*" .. typeid
		else
			typeid = ""
		end
		return "%-%-%[" .. ("="):rep(comment_level) .. "%[" .. typeid .. "\n?(.-)[\t\n]*]" .. ("="):rep(comment_level) .. "]()"
	end
	local fn_comment_pattern = make_comment_pattern(nil)
	local intro_comment_pattern = make_comment_pattern("intro:")
	local metafunc_comment_pattern = make_comment_pattern("func:%s*(([^\n(]+)[^\n)]+%))")
	local section_mark = ("="):rep(tonumber(args["section_level"]) or 2)
	local pattern_identifier = args["identifier"] or ""
	
	local mod_title = mw.title.getCurrentTitle()
	if mod_title.text:match("/doc$") then return "(<i>Pendokumenan yang dijana terletak pada laman modul tersebut.</i>)" end
	local mod_text = mod_title:getContent()
	if not mod_text then return "(<i>Laman modul tersebut sekarang tidak wujud.</i>)" end

	-- This contains function and intro documentation. Each element is a two-element list of {POSITION, DOCS} specifying
	-- the generated documentation for a function and the character position in the file where it was found (for sorting
	-- purposes).
	local docs
	
	local intro_comment = mod_text:match("^.-" .. intro_comment_pattern)
	if intro_comment then
		docs = { {1, format_doc(intro_comment) }}
	else
		docs = {}
	end

	-- Look for actual functions.
	for p0, f, fn in mod_text:gmatch("()\n[ \t]*function +(([^\n(]+)[^\n)]+%))") do
		if fn:match(pattern_identifier) then			
			local c = mod_text:sub(1, p0 - 1):match("^.*" .. fn_comment_pattern .. "%s*$")
			insert(docs, {p0, section_mark .. fn .. section_mark .. "\n\n" .. "<syntaxhighlight lang=lua inline>function " ..
				f .. "</syntaxhighlight>\n\n" .. format_doc(c or
				'<strong class="error">This function lacks documentation. Please add a description of its usages, inputs and outputs, ' ..
				"or its difference from similar functions, or make it local to remove it from the function list.</strong>" ..
				"[[Kategori:Templat dan modul yang memerlukan pendokumenan]]")})
		end
	end

	-- Now look for comments with the function declaration inside them (used for metatable functions etc.).
	for p0, f, fn, comment in mod_text:gmatch("()" .. metafunc_comment_pattern) do
		insert(docs, {p0, section_mark .. fn .. section_mark .. "\n\n" .. "<syntaxhighlight lang=lua inline>function " .. f .. "</syntaxhighlight>\n\n" .. format_doc(comment)})
	end

	table.sort(docs, function(a, b) return a[1] < b[1] end)
	
	local chunks = {}
	for i, decl in ipairs(docs) do
		insert(chunks, decl[2])
	end

	return frame:preprocess(concat(chunks, "\n\n"))
end

return export