local m_str_utils = require("Module:string utilities")
local byte = string.byte
local insert = table.insert
local set_nested = require("Module:table").setNested
local split = m_str_utils.split
local tonumber = tonumber
local tostring = tostring
local u = m_str_utils.char
local process_string = tostring
local process_decimal = tonumber
local function process_hexadecimal(v)
return tonumber(v, 16)
end
local function process_null()
return nil
end
local function get_return_val_keys(funcs, ...)
if ... then
local vals = {...}
return vals, #vals
end
local vals, n = {}, #funcs
for i = 1, n do
insert(vals, i)
end
return vals, n
end
local function iterate_UnicodeData(...)
local UnicodeData = require("Module:Unicode data/raw/UnicodeData.txt")
local funcs = {
function(v)
if type(v) == "string" then
return process_hexadecimal(v)
end
v[1], v[2] = process_hexadecimal(v[1]), process_hexadecimal(v[2])
return v
end,
process_string,
process_string,
process_decimal,
process_string,
function(v)
if v == "" then
return
end
local type, start = v:match("^<(.-)> *()")
v = split(start and v:sub(start) or v, " +")
v.type = type
return v
end,
process_decimal,
process_decimal,
function(v)
if v == "" then
return
end
local n, d = v:match("^(%-?%d+)/(%-?%d+)$")
if n then
return tonumber(n) / tonumber(d)
end
return tonumber(v)
end,
function(v)
if v == "Y" then
return true
elseif v == "N" then
return false
end
end,
process_string,
process_null,
process_hexadecimal,
process_hexadecimal,
process_hexadecimal
}
local start, vals, n, line = 1, get_return_val_keys(funcs, ...)
local function ordered_unpack(line, i)
i = i or 1
local k = vals[i]
local ret = funcs[k](line[k])
if i == n then
return ret
end
return ret, ordered_unpack(line, i + 1)
end
local function iter(prev) -- TODO: iterate ranges
line, start = UnicodeData:match("([^\n]+)()", start)
if not line then
return
end
line = split(line, ";")
if prev then
line[1] = {prev[1], line[1]}
elseif line[2]:sub(-8, -1) == ", First>" then
return iter(line)
end
return ordered_unpack(line)
end
return iter
end
local export = {}
local function compress(t, trailing)
for k, v in pairs(t) do
if type(v) == "table" then
v = compress(v, true)
t[k] = v
end
end
if not trailing then
return t
end
local check_v = t[128]
for i = 129, 191 do
if t[i] ~= check_v then
return t
end
end
return check_v
end
function export.categories()
local output = {}
for codepoint, category in iterate_UnicodeData(1, 3) do
if category and type(codepoint) ~= "table" then
local ch = u(codepoint)
set_nested(output, category, byte(ch, 1, -1))
end
end
return compress(output)
end
function export.combining_classes()
local output = {}
for codepoint, class in iterate_UnicodeData(1, 4) do
if class and class ~= 0 then
local ch = u(codepoint)
set_nested(output, class, byte(ch, 1, -1))
end
end
return compress(output)
end
return export