local u = require("Module:string utilities").char
local range = {}
range.kanji =
u(0x2E80) .. "-" .. u(0x2FDF) .. -- CJK Unified Ideographs
u(0x4E00) .. "-" .. u(0x9FFF) .. -- CJK Unified Ideographs
u(0x3400) .. "-" .. u(0x4DBF) .. -- CJK Unified Ideographs Extension A
u(0xF900) .. "-" .. u(0xFAFF) .. -- CJK Compatibility Ideographs
u(0x20000) .. "-" .. u(0x2A6DF) .. -- CJK Unified Ideographs Extension B
u(0x2A700) .. "-" .. u(0x2EE5F) .. -- CJK Unified Ideographs Extension C-F & I
u(0x2F800) .. "-" .. u(0x2FA1F) .. -- CJK Compatibility Ideographs Supplement
u(0x30000) .. "-" .. u(0x323AF) -- CJK Unified Ideographs Extension C-F & I
range.kana_combining_characters =
u(0x3099) .. "-" .. u(0x309C) .. -- Hiragana
u(0xFF9E) .. u(0xFF9F) .. -- Halfwidth and Fullwidth Forms
u(0x0305) .. u(0x0323) -- Combining Diacritical Marks
range.kana_overlap =
range.kana_combining_characters ..
"〰-〵" .. -- CJK Symbols and Punctuation
"ー" -- Katakana
local hiragana_exclusive =
"ぁ-ゖゝゞ" .. -- Hiragana
"𛀁𛀆𛄟" .. -- Kana Supplement + Kana Extended-A
"𛄲𛅐-𛅒" -- Small Kana Extension
range.hiragana = range.kana_overlap .. hiragana_exclusive
local katakana_exclusive =
"ァ-ヺヽヾ" .. -- Katakana
"ㇰ-ㇿ" .. -- Katakana Phonetic Extensions
u(0xFF66) .. "-" .. u(0xFF9D) .. -- Halfwidth and Fullwidth Forms
"𚿰-𚿾" .. -- Kana Extended-B
"𛀀𛄠-𛄢" .. -- Kana Supplement + Kana Extended-A
"𛅕𛅤-𛅧" -- Small Kana Extension
range.katakana = range.kana_overlap .. katakana_exclusive
range.hentaigana =
"𛀂-𛀅𛀇-𛄞" -- Kana Supplement + Kana Extended-A
range.kana = range.kana_overlap .. hiragana_exclusive .. katakana_exclusive .. range.hentaigana
-- Note: not other sutegana like っ, as they aren't submoraic.
range.submoraic_kana =
"ぁぃぅぇぉゃゅょゎ" .. -- Hiragana
"ァィゥェォャュョヮ" .. -- Katakana
"ァ-ョ" .. -- Halfwidth and Fullwidth Forms
"𛅐𛅑𛅒𛅤𛅥𛅦" -- Small Kana Extension
range.vowels = {
a = "ぁあかがさざただなはばぱまゃやらゎわァアカガサザタダナハバパマャヤラヮワヷ",
i = "ぃいきぎしじちぢにひびぴみ𛀆り𛅐ゐィイキギシジチヂニヒビピミ𛄠リ𛅤ヰヸ",
u = "ぅうゔくぐすずつづぬふぶぷむゅゆる𛄟ゥウヴクグスズツヅヌフブプムュユル𛄢",
e = "ぇえけげせぜてでねへべぺめ𛀁れ𛅑ゑェエ𛀀ケゲセゼテデネヘベペメ𛄡レ𛅥ヱヹ",
o = "ぉおこごそぞとどのほぼぽもょよろ𛅒をォオコゴソゾトドノホボポモョヨロ𛅦ヲヺ",
n = "んン"
}
range.ideograph =
"〃々-〇〱-〵〻〼" .. -- CJK Symbols and Punctuation
"㈠-㉟㊀-㋿" .. -- Enclosed CJK Letters and Months
"㍘-㏿" .. -- CJK Compatibility
"🈂-" -- Enclosed Ideographic Supplement
range.kana_graph =
"ゟヿ" .. -- Hiragana + Katakana
"㌀-㍗" .. -- CJK Compatibility
"🈀🈁" -- Enclosed Ideographic Supplement
range.punctuation =
" -。〈-】〔-〟〽" .. -- CJK Symbols and Punctuation
"゠・" .. -- Katakana
"!-/:-@[-`{-・¢-○" -- Halfwidth and Fullwidth Forms
range.latin = require("Module:scripts").getByCode("Latn"):getCharacters()
range.numbers =
"0-9" .. -- Basic Latin
"0-9" -- Halfwidth and Fullwidth Forms
return range