Modul:zh-translit
- Berikut merupakan pendokumenan yang dijana oleh Modul:pendokumenan/functions/translit. [sunting]
- Pautan berguna: senarai sublaman • pautan • transklusi • kes ujian • kotak pasir
Modul ini akan mentransliterasi Bahasa Cina teks. Ia juga digunakan untuk mentransliterasi Eastern Min, Jin, Mandarin, Gan, Xiang, Middle Chinese, Literary Chinese, Northern Min, Teochew, Cina Kuno, Wu, Kantonis, Sichuanese, and Taishanese.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:zh-translit/testcases.
Functions
suntingtr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the transliteration fails, returns
nil
.
local m_str_utils = require("Module:string utilities")
local m_utils = require("Module:utilities")
local findTemplates = require("Module:template parser").findTemplates
local get_section = m_utils.get_section
local gsub = string.gsub
local insert = table.insert
local safe_require = m_utils.safe_require
local sub = string.sub
local toNFD = mw.ustring.toNFD
local trim = mw.text.trim
local ugsub = m_str_utils.gsub
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local usub = m_str_utils.sub
local uupper = m_str_utils.upper
local frame = mw.getCurrentFrame()
local tag
local lect_code = mw.loadData("Module:zh/data/lect codes").langcode_to_abbr
local export = {}
local function fail(lang, request)
require("Module:debug/track")("zh-translit/needs manual translit/" .. lang)
return nil
end
local function get_content(title)
local content = mw.title.new(title)
if not content then
return false
end
return get_section(content:getContent(), "Chinese", 2)
end
local function get_reading(readings, lang, i, i_end, start)
if i == i_end then
return sub(readings, start, i - 1)
end
local c = sub(readings, i, i)
if c == "," and (
lang == "cmn" or
lang == "csp" or
lang == "wuu" or
lang == "yue" or
lang == "zhx-tai"
) then
if sub(readings, i + 1, i + 1) ~= " " then
return sub(readings, start, i - 1)
end
elseif c == "/" then
return sub(readings, start, i - 1)
end
end
local function handle_readings(readings, lang, tr)
if lang == "ltc" or lang == "och" then
if tr and readings ~= tr then
return false
end
return readings
end
local tr_orig, i, start, i_end, reading = tr, 1, 1, #readings + 1
while i <= i_end do
reading = get_reading(readings, lang, i, i_end, start)
if not reading then
elseif not reading:find("=") then
if (
not tr or
tr == reading or
gsub(ulower(tr), "%^", "") == reading
) then
tr = reading
elseif ulower(reading) ~= tr then
return false
end
start = i + 1
elseif lang == "cmn" and reading == "cap=y" then
local tr_cap = "^" .. tr
if not tr_orig or tr_orig == tr_cap then
tr = tr_cap
end
end
i = i + 1
end
return tr
end
local function iterate_content(content, lang, see, seen, tr)
for template, args in findTemplates(content) do
if template == "zh-pron" then
for k, v in pairs(args) do
if (
#v > 0 and
type(k) == "string" and
frame:preprocess(k) == lect_code[lang]
) then
tr = handle_readings(frame:preprocess(v), lang, tr)
break
end
end
if tr == false then
return tr
end
elseif template == "zh-see" then
local arg = trim(frame:preprocess(args[1]))
if not seen[arg] then
insert(see, arg)
end
end
end
return tr
end
function export.tr(text, lang, sc)
if (not text) or text == "" then
return text
end
if lang == "zh" or lang == "lzh" then
lang = "cmn"
end
if not lect_code[lang] then
lang = require("Module:languages").getByCode(lang, nil, true):getFullCode()
end
local content = get_content(text)
if not content then
return fail(lang)
end
local see = {}
local seen = {
[text] = true
}
local tr = iterate_content(content, lang, see, seen)
if tr == nil then
local i, title = 1
while i <= #see do
title = see[i]
content = get_content(title)
if content then
tr = iterate_content(content, lang, see, seen, tr)
if tr == false then
return fail(lang)
end
seen[title] = true
end
i = i + 1
end
end
if not tr then
return fail(lang)
end
if lang == "cmn" then
tr = tr:gsub("#", "")
if tr:match("[\194-\244]") then
tag = tag or mw.loadData("Module:zh/data/cmn-tag").MT
tr = tr:gsub(".[\128-\191]*", function(m)
if m == "一" then
return "yī"
elseif m == "不" then
return "bù"
else
m = tag[m] and tag[m][1]
if m then
return toNFD(m):gsub("^[aeiou]", "'%0")
end
end
end)
:gsub("^'", "") --remove initial apostrophe inserted by previous function
end
tr = ugsub(tr, "%^(.)", uupper)
elseif lang == "csp" or lang == "yue" or lang == "zhx-tai" then
tr = tr:gsub("%d[%d%*%-]*%f[^%d%*]", "<sup>%0</sup>")
elseif lang == "hak" then
-- TODO
elseif lang == "ltc" or lang == "och" then
if tr == "n" then
return fail(lang)
end
local index = {}
if tr then
if lang == "ltc" then
index = mw.text.split(tr, ",")
else
index = mw.text.split(tr, ";")
end
end
for i = 1, ulen(text) do
local module_type = lang .. "-pron"
if lang == "och" then
module_type = module_type .. "-ZS"
end
local data_module = safe_require("Module:zh/data/" .. module_type .. "/" .. usub(text, i, i))
if not data_module or (((not index[i]) or index[i] == "y") and #data_module > 1) then
return fail(lang)
end
if index[i] == "y" then
index[i] = 1
elseif index[i] then
index[i] = tonumber(index[i])
end
index[i] = index[i] and data_module[index[i]] or data_module[1]
if lang == "ltc" then
local data = mw.loadData("Module:ltc-pron/data")
local initial, final, tone = require("Module:ltc-pron").infer_categories(index[i])
tone = tone ~= "" and ("<sup>" .. tone .. "</sup>") or tone
index[i] = data.initialConv["Zhengzhang"][initial] .. data.finalConv["Zhengzhang"][final] .. tone
else
index[i] = index[i][6]
end
end
tr = table.concat(index, " ")
if lang == "och" then
tr = "*" .. tr
end
elseif lang == "nan" then
-- TODO
elseif lang == "nan-tws" then
tr = require("Module:nan-pron").pengim_display(tr)
elseif lang == "wuu" then
local w_pron = require("Module:wuu-pron")
if tr:match(';') then
--TODO
return fail(lang)
elseif tr:match(':') then
tr = w_pron.wugniu_format(tr:sub(4))
else
tr = w_pron.wugniu_format(w_pron.wikt_to_wugniu(tr))
end
elseif lang == "zhx-sic" then
tr = ugsub(tr, "([%d-])(%a)", "%1 %2")
:gsub("%d[%d%*%-]*%f[^%d%*]", "<sup>%0</sup>")
else
tr = require("Module:" .. lang .. "-pron").rom(tr)
end
-- End with a space so that concurrent parts of running text that need to be transliterated separately (e.g. due to links) are still properly separated.
return tr .. " "
end
return export