Wiktionary suwiktionary https://su.wiktionary.org/wiki/Tepas MediaWiki 1.46.0-wmf.22 case-sensitive Média Husus Obrolan Pamaké Obrolan pamaké Wiktionary Obrolan Wiktionary Gambar Obrolan gambar MédiaWiki Obrolan MédiaWiki Citakan Obrolan citakan Pitulung Obrolan pitulung Kategori Obrolan kategori TimedText TimedText talk Modul Pembicaraan Modul Acara Pembicaraan Acara bali 0 1578 23805 11960 2026-04-01T12:10:57Z Deepturquoise 2456 23805 wikitext text/x-wiki =={{sunda}}== {{-k b-|su}} * organ témporal nu ukur aya di jero [[awak]] wanoja (atawa sato bikang) nalika reuneuh, napel dina pinding rahim pikeun nyerep zat-zat gizi ti getih indung ka anak nu dikandungna ngaliwatan tali ari-ari. ==== Babasan ==== * ''bali geusan ngajadi'': tempat/wewengkon dilahirkeunana hiji jalma. {{tarjemahan}} {{(}} * {{id}} : {{trad-|id|plasénta}} {{-}} * {{en}} : {{trad-|en|placénta}} {{)}} === Patempatan === * ngaran hiji pulo (sakaligus propinsi) di wétaneun pulo Jawa. {{Wikipedia}} r55ntdum8q03me57vktdm31h35tqfab dahar 0 2763 23815 19604 2026-04-01T13:22:35Z Deepturquoise 2456 23815 wikitext text/x-wiki =={{sunda}}== {{-verba-|su}} {{suword|hb=tuang|hs=neda|ch=nyatu, lelebok, lolodok}} #peta ngasupkeun barang nu bisa ditampa sarta digiling ku beuteung. =={{batawi}}== {{-verba-|bew}} * dahar :* ''Enni pentèran ènaknya tuh '''dahar''' naon nya?'' :* Beurang ieu ènakna téh '''dahar''' naon nya? {{tarjemahan}} {{(}} * {{id}} : {{trad-|id|makan}} {{-}} * {{en}} : {{trad-|en|eat}} {{)}} [[kategori:Kecap basa Batawi]] gvdqfqdsolx0r8yaygkcexpyth4b55j Citakan:jawa 10 4993 23806 14741 2026-04-01T12:12:28Z Deepturquoise 2456 23806 wikitext text/x-wiki Basa Jawa<includeonly>[[Kategori:Kecap {{jv}}]]</includeonly><noinclude>[[Kategori:{{kat:basa}}|jv]]</noinclude> h01111vs2de8uulrky0vw1grcb9zre7 ajul 0 5208 23816 15035 2026-04-02T04:20:53Z Deepturquoise 2456 23816 wikitext text/x-wiki =={{sunda}}== {{-k k-|su}} * pagawéan notog naon waé anu ngagantung ku gantar sangkan ragrag. :* ''Hayu jang, urang '''ngajul''' tangkal mangga na Bu Haji.'' :* ''Ulah hilap mawa gantar keur '''pangajul''' buah di imah Bu Haji.'' === Tingkatan basa === * '''ajul''': basa loma {{kamus|ver=danadibrata}} [[Kategori:Beasiswa Daring II - Wiktionary Sunda]] [[Kategori:Beasiswa Daring II-Serigalakampus]] ajvkl9k1b4ez4lop2h5c58ac95lcy56 kul 0 6855 23811 19665 2026-04-01T12:52:40Z Deepturquoise 2456 23811 wikitext text/x-wiki =={{batawi}}== {{-verba-|bew}} * [[tuang]], [[dahar]], [[neda]] :* ''Énté udè '''kul''' belon?'' :* Anjeun atos '''tuang'''? [[kategori:Kecap basa Batawi]] 3k53x5yszk23mfe0osq8j70bw6jxv56 ranyéd 0 8103 23804 22967 2026-04-01T12:09:08Z Deepturquoise 2456 23804 wikitext text/x-wiki =={{sunda}}== {{-k b-|su}} {{audio|su|LL-Q34002 (sun)-Hayati Mayang Arum-ranyéd.wav|''Pangucapan''}} #jinah, pagawéan dosa #:''Ngaranyéd téh pagawéan anu dilarang agama.'' #::''Zinah adalah perilaku yang dilarang agama.'' === Tingkatan basa === * '''ranyéd''': basa loma {{tarjamahan}} {{(}} * {{id}} : {{trad-|id|zina}} {{)}} [[Kategori:WikiTutur 3.0 - Sunda]] [[Kategori:WikiTutur 3.0 - Kopdar Bandung]] bdoy17z48f79pn5w3j3vqg0rk5uagw1 Modul:su 828 8463 23807 2026-04-01T12:16:41Z Deepturquoise 2456 anyar 23807 Scribunto text/plain local export = {} local links = require("Modul:links") local lang = require("Modul:languages") function export.set(frame) local result = {} for term in mw.text.gsplit(frame.args[1], ", ") do table.insert(result, links.full_link({ lang = lang.getByCode("su"), term = term })) end return table.concat(result, ", ") end return export epwle7ffu45kv8bt1z93xchqyd3wlkw Modul:languages/doSubstitutions 828 8464 23808 2026-04-01T12:20:00Z Deepturquoise 2456 anyar 23808 Scribunto text/plain local m_str_utils = require("Module:string utilities") local codepoint = m_str_utils.codepoint local gsub = m_str_utils.gsub local safe_require = require("Module:utilities").safe_require local u = m_str_utils.char local function doRemoveExceptions(text, sc, remove_exceptions) local substitutes, i = {}, 0 local function insert_substitute(m) i = i + 1 table.insert(substitutes, m) return u(0x80000 + i) end for _, exception in ipairs(remove_exceptions) do exception = sc:toFixedNFD(exception) text = gsub(text, exception, insert_substitute) end return text, substitutes end local function undoRemoveExceptions(text, substitutes) return text:gsub("\242[\128-\191]*", function(m) return substitutes[codepoint(m) - 0x80000] end) end local function doSubstitutions(text, self, sc, substitution_data, function_name, recursed) local fail, cats = nil, {} -- If there are language-specific substitutes given in the data module, use those. if type(substitution_data) == "table" then -- If a script is specified, run this function with the script-specific data before continuing. local sc_code = sc:getCode() if substitution_data[sc_code] then text, fail, cats = doSubstitutions(text, self, sc, substitution_data[sc_code], function_name, true) -- Hant, Hans and Hani are usually treated the same, so add a special case to avoid having to specify each one separately. elseif sc_code:match("^Han") and substitution_data.Hani then text, fail, cats = doSubstitutions(text, self, sc, substitution_data.Hani, function_name, true) -- Substitution data with key 1 in the outer table may be given as a fallback. elseif substitution_data[1] then text, fail, cats = doSubstitutions(text, self, sc, substitution_data[1], function_name, true) end -- Iterate over all strings in the "from" subtable, and gsub with the corresponding string in "to". We work with the NFD decomposed forms, as this simplifies many substitutions. if substitution_data.from then for i, from in ipairs(substitution_data.from) do -- We normalize each loop, to ensure multi-stage substitutions work correctly. text = sc:toFixedNFD(text) -- Check whether specific magic characters are present, as they rely on UTF-8 compatibility. If not, just use string.gsub. In most cases, doing this is faster than using mw.ustring.gsub every time. text = gsub(text, sc:toFixedNFD(from), substitution_data.to[i] or "") end end if substitution_data.remove_diacritics then text = sc:toFixedNFD(text) -- Convert exceptions to PUA. local substitutes if substitution_data.remove_exceptions then text, substitutes = doRemoveExceptions(text, sc, substitution_data.remove_exceptions) end -- Strip diacritics. text = gsub(text, "[" .. substitution_data.remove_diacritics .. "]", "") -- Convert exceptions back. if substitution_data.remove_exceptions then text = undoRemoveExceptions(text, substitutes) end end elseif type(substitution_data) == "string" then -- If there is a dedicated function module, use that. local module = safe_require("Module:" .. substitution_data) if module then if function_name == "tr" then text, fail, cats = module[function_name](text, self:getCode(), sc:getCode()) else text, fail, cats = module[function_name](sc:toFixedNFD(text), self:getCode(), sc:getCode()) end else error("Substitution data '" .. substitution_data .. "' does not match an existing module.") end end -- Don't normalize to NFC if this is the inner loop or if a module returned nil. if recursed or not text then return text, fail, cats else -- Fix any discouraged sequences created during the substitution process, and normalize into the final form. text = sc:fixDiscouragedSequences(text) return sc:toFixedNFC(text), fail, cats end end -- This avoids calling into globals with require when the main function recurses. return function (text, self, sc, substitution_data, function_name) return doSubstitutions(text, self, sc, substitution_data, function_name) end mjjhpxy2ma918nayg388mj7luaq5l6p Modul:string/encode entities 828 8465 23809 2026-04-01T12:21:13Z Deepturquoise 2456 anyar 23809 Scribunto text/plain -- TO BE REPLACED BY encode_entities in [[Module:string utilities]]. This function decodes on input by default to prevent double-encoding, which the new function does not, so implementations need to take this into account when being converted. local debug_track_module = "Module:debug/track" local string_decode_entities_module = "Module:string/decodeEntities" local string_utilities_module = "Module:string utilities" local require = require local function decode_entities(...) decode_entities = require(string_decode_entities_module) return decode_entities(...) end local function encode_entities(...) encode_entities = require(string_utilities_module).encode_entities return encode_entities(...) end local function track(...) track = require(debug_track_module) return track(...) end return function(str, charset, raw) if not raw then local decoded = decode_entities(str) if decoded ~= str then track("string/encode entities/decoded first") end str = decoded end return encode_entities(str, charset, nil, true) end bvh4hqobw96dy8gcuv3yoq2nlvj06si Citakan:suword 10 8466 23810 2026-04-01T12:26:44Z Deepturquoise 2456 anyar, nyandak ti id-wikt 23810 wikitext text/x-wiki <includeonly><table class="wikitable floatright" style="text-align: left;"><!-- --><tr><th>[[w:id:Tatakrama bahasa Sunda|Tatakrama basa Sunda]]</th></tr><!-- -->{{#if:{{{1|}}}{{{sk|}}}|<tr><td>'''Sunda Kuno''': {{#invoke:su|set|{{{1|}}}{{{sk|}}}}}</td></tr>}}<!-- -->{{#if:{{{2|}}}{{{hb|}}}|<tr><td>'''[[:w:Hormat#Hormat ka batur|hormat ka batur]]''': {{#invoke:su|set|{{{2|}}}{{{hb|}}}}}</td></tr>}}<!-- -->{{#if:{{{3|}}}{{{hs|}}}|<tr><td>'''[[:w:Hormat#Hormat ka sorangan|hormat ka sorangan]]''': {{#invoke:su|set|{{{3|}}}{{{hs|}}}}}</td></tr>}}<!-- -->{{#if:{{{4|}}}{{{h|}}}|<tr><td>'''[[:w:Hormat|hormat]]''': {{#invoke:su|set|{{{4|}}}{{{h|}}}}}</td></tr>}}<!-- -->{{#if:{{{5|}}}{{{hl|}}}|<tr><td>'''[[:w:Loma#Loma|hormat-loma]]''': {{#invoke:su|set|{{{5|}}}{{{hl|}}}}}</td></tr>}}<!-- -->{{#if:{{{6|}}}{{{hsl|}}}|<tr><td>'''[[:w:Hormat#Penggunaan kata loma|hormat ka sorangan-loma]]''': {{#invoke:su|set|{{{6|}}}{{{hsl|}}}}}</td></tr>}}<!-- -->{{#if:{{{7|}}}{{{l|}}}|<tr><td>'''[[:w:Loma|loma]]''': {{#invoke:su|set|{{{7|}}}{{{l|}}}}}</td></tr>}}<!-- -->{{#if:{{{8|}}}{{{ch|}}}|<tr><td>'''[[:w:Loma#Kasar pisan|cohag]]''': {{#invoke:su|set|{{{8|}}}{{{ch|}}}}}</td></tr>}}<!-- --></table></includeonly><noinclude> == Pituduh == === Cara pamakéanana === <pre> {{suword|sk=kecap 1|hb=kecap 2|hs=kecap 3|h=kecap 4|hl=kecap 5|hsl=kecap 6|l=kecap 7|ch=kecap 8}} </pre> === Hasilna === {{suword|sk=kecap 1|hb=kecap 2|hs=kecap 3|h=kecap 4|hl=kecap 5|hsl=kecap 6|l=kecap 7|ch=kecap 8}} [[Kategori:Citakan basa Sunda]]</noinclude> rkfqx1d9z9hg82mjlyz4z73tp4qsa4w Modul:fun/isCallable 828 8467 23812 2026-04-01T13:18:27Z Deepturquoise 2456 anyar 23812 Scribunto text/plain local debug_track_module = "Module:debug/track" local table_get_metamethod_module = "Module:table/getMetamethod" local require = require local type = type local function debug_track(...) debug_track = require(debug_track_module) return debug_track(...) end local function get_metamethod(...) get_metamethod = require(table_get_metamethod_module) return get_metamethod(...) end --[==[ Return {true} if the input is a function or functor (an object which can be called like a function, because it has a {__call} metamethod). Note: if the input is an object with a {__call} metamethod, but this function is not able to find it because the object's metatable is protected with {__metatable}, then it will return {false} by default, or {nil} if the {allow_maybe} flag is set.]==] return function(obj, allow_maybe) if type(obj) == "function" then return true end -- An object is callable if it has a __call metamethod, so try to get it -- with get_metamethod(). local success, __call = get_metamethod(obj, "__call") -- If this succeeds, `obj` will only be callable if the __call metamethod is -- a function (i.e. it can't itself be a callable table), so don't recurse -- to check it. if __call and type(__call) == "function" then return true -- If not, then the metatable is protected, so it's not possible to know if -- `obj` is callable without actually calling it. elseif not success then debug_track("fun/isCallable/protected metatable") if allow_maybe then return nil end end return false end nr6fmojt64uvmsick8bst0i74ec4d2z Modul:headword/page 828 8468 23813 2026-04-01T13:19:59Z Deepturquoise 2456 anyar 23813 Scribunto text/plain local export = {} local languages_module = "Module:languages" local maintenance_category_module = "Module:maintenance category" local string_compare_module = "Module:string/compare" local string_decode_entities_module = "Module:string/decodeEntities" local string_remove_comments_module = "Module:string/removeComments" local string_utilities_module = "Module:string utilities" local table_module = "Module:table" local template_parser_module = "Module:template parser" local mw = mw local string = string local table = table local ustring = mw.ustring local concat = table.concat local find = string.find local format = string.format local gsub = string.gsub local insert = table.insert local load_data = mw.loadData local match = string.match local new_title = mw.title.new local pairs = pairs local require = require local sub = string.sub local toNFC = ustring.toNFC local toNFD = ustring.toNFD local ugsub = ustring.gsub local function class_else_type(...) class_else_type = require(template_parser_module).class_else_type return class_else_type(...) end local function decode_entities(...) decode_entities = require(string_decode_entities_module) return decode_entities(...) end local function encode_entities(...) encode_entities = require(string_utilities_module).encode_entities return encode_entities(...) end local function get_category(...) get_category = require(maintenance_category_module).get_category return get_category(...) end local function get_lang(...) get_lang = require(languages_module).getByCode return get_lang(...) end local function list_to_set(...) list_to_set = require(table_module).listToSet return list_to_set(...) end local function parse(...) parse = require(template_parser_module).parse return parse(...) end local function remove_comments(...) remove_comments = require(string_remove_comments_module) return remove_comments(...) end local function split(...) split = require(string_utilities_module).split return split(...) end local function string_compare(...) string_compare = require(string_compare_module) return string_compare(...) end local function uupper(...) uupper = require(string_utilities_module).upper return uupper(...) end --[==[ Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==] local langnames local function get_langnames() langnames, get_langnames = load_data("Module:languages/canonical names"), nil return langnames end -- Combining character data used when categorising unusual characters. These resolve into two patterns, used to find -- single combining characters (i.e. character + diacritic(s)) or double combining characters (i.e. character + -- diacritic(s) + character). -- Charsets are in the format used by Unicode's UnicodeSet tool: https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp. -- Single combining characters. -- Charset: [[:M:]&[:^Canonical_Combining_Class=/^Double_/:]&[:^subhead=Grapheme joiner:]&[:^Variation_Selector=Yes:]] -- Note: concatenating hundreds of lines at once gives an error, so () are used every 150 lines to break it up into chunks. local comb_chars_single = ("\204\128-\205\142" .. -- U+0300-U+034E "\205\144-\205\155" .. -- U+0350-U+035B "\205\163-\205\175" .. -- U+0363-U+036F "\210\131-\210\137" .. -- U+0483-U+0489 "\214\145-\214\189" .. -- U+0591-U+05BD "\214\191" .. -- U+05BF "\215\129" .. -- U+05C1 "\215\130" .. -- U+05C2 "\215\132" .. -- U+05C4 "\215\133" .. -- U+05C5 "\215\135" .. -- U+05C7 "\216\144-\216\154" .. -- U+0610-U+061A "\217\139-\217\159" .. -- U+064B-U+065F "\217\176" .. -- U+0670 "\219\150-\219\156" .. -- U+06D6-U+06DC "\219\159-\219\164" .. -- U+06DF-U+06E4 "\219\167" .. -- U+06E7 "\219\168" .. -- U+06E8 "\219\170-\219\173" .. -- U+06EA-U+06ED "\220\145" .. -- U+0711 "\220\176-\221\138" .. -- U+0730-U+074A "\222\166-\222\176" .. -- U+07A6-U+07B0 "\223\171-\223\179" .. -- U+07EB-U+07F3 "\223\189" .. -- U+07FD "\224\160\150-\224\160\153" .. -- U+0816-U+0819 "\224\160\155-\224\160\163" .. -- U+081B-U+0823 "\224\160\165-\224\160\167" .. -- U+0825-U+0827 "\224\160\169-\224\160\173" .. -- U+0829-U+082D "\224\161\153-\224\161\155" .. -- U+0859-U+085B "\224\162\151-\224\162\159" .. -- U+0897-U+089F "\224\163\138-\224\163\161" .. -- U+08CA-U+08E1 "\224\163\163-\224\164\131" .. -- U+08E3-U+0903 "\224\164\186-\224\164\188" .. -- U+093A-U+093C "\224\164\190-\224\165\143" .. -- U+093E-U+094F "\224\165\145-\224\165\151" .. -- U+0951-U+0957 "\224\165\162" .. -- U+0962 "\224\165\163" .. -- U+0963 "\224\166\129-\224\166\131" .. -- U+0981-U+0983 "\224\166\188" .. -- U+09BC "\224\166\190-\224\167\132" .. -- U+09BE-U+09C4 "\224\167\135" .. -- U+09C7 "\224\167\136" .. -- U+09C8 "\224\167\139-\224\167\141" .. -- U+09CB-U+09CD "\224\167\151" .. -- U+09D7 "\224\167\162" .. -- U+09E2 "\224\167\163" .. -- U+09E3 "\224\167\190" .. -- U+09FE "\224\168\129-\224\168\131" .. -- U+0A01-U+0A03 "\224\168\188" .. -- U+0A3C "\224\168\190-\224\169\130" .. -- U+0A3E-U+0A42 "\224\169\135" .. -- U+0A47 "\224\169\136" .. -- U+0A48 "\224\169\139-\224\169\141" .. -- U+0A4B-U+0A4D "\224\169\145" .. -- U+0A51 "\224\169\176" .. -- U+0A70 "\224\169\177" .. -- U+0A71 "\224\169\181" .. -- U+0A75 "\224\170\129-\224\170\131" .. -- U+0A81-U+0A83 "\224\170\188" .. -- U+0ABC "\224\170\190-\224\171\133" .. -- U+0ABE-U+0AC5 "\224\171\135-\224\171\137" .. -- U+0AC7-U+0AC9 "\224\171\139-\224\171\141" .. -- U+0ACB-U+0ACD "\224\171\162" .. -- U+0AE2 "\224\171\163" .. -- U+0AE3 "\224\171\186-\224\171\191" .. -- U+0AFA-U+0AFF "\224\172\129-\224\172\131" .. -- U+0B01-U+0B03 "\224\172\188" .. -- U+0B3C "\224\172\190-\224\173\132" .. -- U+0B3E-U+0B44 "\224\173\135" .. -- U+0B47 "\224\173\136" .. -- U+0B48 "\224\173\139-\224\173\141" .. -- U+0B4B-U+0B4D "\224\173\149-\224\173\151" .. -- U+0B55-U+0B57 "\224\173\162" .. -- U+0B62 "\224\173\163" .. -- U+0B63 "\224\174\130" .. -- U+0B82 "\224\174\190-\224\175\130" .. -- U+0BBE-U+0BC2 "\224\175\134-\224\175\136" .. -- U+0BC6-U+0BC8 "\224\175\138-\224\175\141" .. -- U+0BCA-U+0BCD "\224\175\151" .. -- U+0BD7 "\224\176\128-\224\176\132" .. -- U+0C00-U+0C04 "\224\176\188" .. -- U+0C3C "\224\176\190-\224\177\132" .. -- U+0C3E-U+0C44 "\224\177\134-\224\177\136" .. -- U+0C46-U+0C48 "\224\177\138-\224\177\141" .. -- U+0C4A-U+0C4D "\224\177\149" .. -- U+0C55 "\224\177\150" .. -- U+0C56 "\224\177\162" .. -- U+0C62 "\224\177\163" .. -- U+0C63 "\224\178\129-\224\178\131" .. -- U+0C81-U+0C83 "\224\178\188" .. -- U+0CBC "\224\178\190-\224\179\132" .. -- U+0CBE-U+0CC4 "\224\179\134-\224\179\136" .. -- U+0CC6-U+0CC8 "\224\179\138-\224\179\141" .. -- U+0CCA-U+0CCD "\224\179\149" .. -- U+0CD5 "\224\179\150" .. -- U+0CD6 "\224\179\162" .. -- U+0CE2 "\224\179\163" .. -- U+0CE3 "\224\179\179" .. -- U+0CF3 "\224\180\128-\224\180\131" .. -- U+0D00-U+0D03 "\224\180\187" .. -- U+0D3B "\224\180\188" .. -- U+0D3C "\224\180\190-\224\181\132" .. -- U+0D3E-U+0D44 "\224\181\134-\224\181\136" .. -- U+0D46-U+0D48 "\224\181\138-\224\181\141" .. -- U+0D4A-U+0D4D "\224\181\151" .. -- U+0D57 "\224\181\162" .. -- U+0D62 "\224\181\163" .. -- U+0D63 "\224\182\129-\224\182\131" .. -- U+0D81-U+0D83 "\224\183\138" .. -- U+0DCA "\224\183\143-\224\183\148" .. -- U+0DCF-U+0DD4 "\224\183\150" .. -- U+0DD6 "\224\183\152-\224\183\159" .. -- U+0DD8-U+0DDF "\224\183\178" .. -- U+0DF2 "\224\183\179" .. -- U+0DF3 "\224\184\177" .. -- U+0E31 "\224\184\180-\224\184\186" .. -- U+0E34-U+0E3A "\224\185\135-\224\185\142" .. -- U+0E47-U+0E4E "\224\186\177" .. -- U+0EB1 "\224\186\180-\224\186\188" .. -- U+0EB4-U+0EBC "\224\187\136-\224\187\142" .. -- U+0EC8-U+0ECE "\224\188\152" .. -- U+0F18 "\224\188\153" .. -- U+0F19 "\224\188\181" .. -- U+0F35 "\224\188\183" .. -- U+0F37 "\224\188\185" .. -- U+0F39 "\224\188\190" .. -- U+0F3E "\224\188\191" .. -- U+0F3F "\224\189\177-\224\190\132" .. -- U+0F71-U+0F84 "\224\190\134" .. -- U+0F86 "\224\190\135" .. -- U+0F87 "\224\190\141-\224\190\151" .. -- U+0F8D-U+0F97 "\224\190\153-\224\190\188" .. -- U+0F99-U+0FBC "\224\191\134" .. -- U+0FC6 "\225\128\171-\225\128\190" .. -- U+102B-U+103E "\225\129\150-\225\129\153" .. -- U+1056-U+1059 "\225\129\158-\225\129\160" .. -- U+105E-U+1060 "\225\129\162-\225\129\164" .. -- U+1062-U+1064 "\225\129\167-\225\129\173" .. -- U+1067-U+106D "\225\129\177-\225\129\180" .. -- U+1071-U+1074 "\225\130\130-\225\130\141" .. -- U+1082-U+108D "\225\130\143" .. -- U+108F "\225\130\154-\225\130\157" .. -- U+109A-U+109D "\225\141\157-\225\141\159" .. -- U+135D-U+135F "\225\156\146-\225\156\149" .. -- U+1712-U+1715 "\225\156\178-\225\156\180" .. -- U+1732-U+1734 "\225\157\146" .. -- U+1752 "\225\157\147" .. -- U+1753 "\225\157\178" .. -- U+1772 "\225\157\179" .. -- U+1773 "\225\158\180-\225\159\147") .. -- U+17B4-U+17D3 ("\225\159\157" .. -- U+17DD "\225\162\133" .. -- U+1885 "\225\162\134" .. -- U+1886 "\225\162\169" .. -- U+18A9 "\225\164\160-\225\164\171" .. -- U+1920-U+192B "\225\164\176-\225\164\187" .. -- U+1930-U+193B "\225\168\151-\225\168\155" .. -- U+1A17-U+1A1B "\225\169\149-\225\169\158" .. -- U+1A55-U+1A5E "\225\169\160-\225\169\188" .. -- U+1A60-U+1A7C "\225\169\191" .. -- U+1A7F "\225\170\176-\225\171\142" .. -- U+1AB0-U+1ACE "\225\172\128-\225\172\132" .. -- U+1B00-U+1B04 "\225\172\180-\225\173\132" .. -- U+1B34-U+1B44 "\225\173\171-\225\173\179" .. -- U+1B6B-U+1B73 "\225\174\128-\225\174\130" .. -- U+1B80-U+1B82 "\225\174\161-\225\174\173" .. -- U+1BA1-U+1BAD "\225\175\166-\225\175\179" .. -- U+1BE6-U+1BF3 "\225\176\164-\225\176\183" .. -- U+1C24-U+1C37 "\225\179\144-\225\179\146" .. -- U+1CD0-U+1CD2 "\225\179\148-\225\179\168" .. -- U+1CD4-U+1CE8 "\225\179\173" .. -- U+1CED "\225\179\180" .. -- U+1CF4 "\225\179\183-\225\179\185" .. -- U+1CF7-U+1CF9 "\225\183\128-\225\183\140" .. -- U+1DC0-U+1DCC "\225\183\142-\225\183\187" .. -- U+1DCE-U+1DFB "\225\183\189-\225\183\191" .. -- U+1DFD-U+1DFF "\226\131\144-\226\131\176" .. -- U+20D0-U+20F0 "\226\179\175-\226\179\177" .. -- U+2CEF-U+2CF1 "\226\181\191" .. -- U+2D7F "\226\183\160-\226\183\191" .. -- U+2DE0-U+2DFF "\227\128\170-\227\128\175" .. -- U+302A-U+302F "\227\130\153" .. -- U+3099 "\227\130\154" .. -- U+309A "\234\153\175-\234\153\178" .. -- U+A66F-U+A672 "\234\153\180-\234\153\189" .. -- U+A674-U+A67D "\234\154\158" .. -- U+A69E "\234\154\159" .. -- U+A69F "\234\155\176" .. -- U+A6F0 "\234\155\177" .. -- U+A6F1 "\234\160\130" .. -- U+A802 "\234\160\134" .. -- U+A806 "\234\160\139" .. -- U+A80B "\234\160\163-\234\160\167" .. -- U+A823-U+A827 "\234\160\172" .. -- U+A82C "\234\162\128" .. -- U+A880 "\234\162\129" .. -- U+A881 "\234\162\180-\234\163\133" .. -- U+A8B4-U+A8C5 "\234\163\160-\234\163\177" .. -- U+A8E0-U+A8F1 "\234\163\191" .. -- U+A8FF "\234\164\166-\234\164\173" .. -- U+A926-U+A92D "\234\165\135-\234\165\147" .. -- U+A947-U+A953 "\234\166\128-\234\166\131" .. -- U+A980-U+A983 "\234\166\179-\234\167\128" .. -- U+A9B3-U+A9C0 "\234\167\165" .. -- U+A9E5 "\234\168\169-\234\168\182" .. -- U+AA29-U+AA36 "\234\169\131" .. -- U+AA43 "\234\169\140" .. -- U+AA4C "\234\169\141" .. -- U+AA4D "\234\169\187-\234\169\189" .. -- U+AA7B-U+AA7D "\234\170\176" .. -- U+AAB0 "\234\170\178-\234\170\180" .. -- U+AAB2-U+AAB4 "\234\170\183" .. -- U+AAB7 "\234\170\184" .. -- U+AAB8 "\234\170\190" .. -- U+AABE "\234\170\191" .. -- U+AABF "\234\171\129" .. -- U+AAC1 "\234\171\171-\234\171\175" .. -- U+AAEB-U+AAEF "\234\171\181" .. -- U+AAF5 "\234\171\182" .. -- U+AAF6 "\234\175\163-\234\175\170" .. -- U+ABE3-U+ABEA "\234\175\172" .. -- U+ABEC "\234\175\173" .. -- U+ABED "\239\172\158" .. -- U+FB1E "\239\184\160-\239\184\175" .. -- U+FE20-U+FE2F "\240\144\135\189" .. -- U+101FD "\240\144\139\160" .. -- U+102E0 "\240\144\141\182-\240\144\141\186" .. -- U+10376-U+1037A "\240\144\168\129-\240\144\168\131" .. -- U+10A01-U+10A03 "\240\144\168\133" .. -- U+10A05 "\240\144\168\134" .. -- U+10A06 "\240\144\168\140-\240\144\168\143" .. -- U+10A0C-U+10A0F "\240\144\168\184-\240\144\168\186" .. -- U+10A38-U+10A3A "\240\144\168\191" .. -- U+10A3F "\240\144\171\165" .. -- U+10AE5 "\240\144\171\166" .. -- U+10AE6 "\240\144\180\164-\240\144\180\167" .. -- U+10D24-U+10D27 "\240\144\181\169-\240\144\181\173" .. -- U+10D69-U+10D6D "\240\144\186\171" .. -- U+10EAB "\240\144\186\172" .. -- U+10EAC "\240\144\187\188-\240\144\187\191" .. -- U+10EFC-U+10EFF "\240\144\189\134-\240\144\189\144" .. -- U+10F46-U+10F50 "\240\144\190\130-\240\144\190\133" .. -- U+10F82-U+10F85 "\240\145\128\128-\240\145\128\130" .. -- U+11000-U+11002 "\240\145\128\184-\240\145\129\134" .. -- U+11038-U+11046 "\240\145\129\176" .. -- U+11070 "\240\145\129\179" .. -- U+11073 "\240\145\129\180" .. -- U+11074 "\240\145\129\191-\240\145\130\130" .. -- U+1107F-U+11082 "\240\145\130\176-\240\145\130\186" .. -- U+110B0-U+110BA "\240\145\131\130" .. -- U+110C2 "\240\145\132\128-\240\145\132\130" .. -- U+11100-U+11102 "\240\145\132\167-\240\145\132\180" .. -- U+11127-U+11134 "\240\145\133\133" .. -- U+11145 "\240\145\133\134" .. -- U+11146 "\240\145\133\179" .. -- U+11173 "\240\145\134\128-\240\145\134\130" .. -- U+11180-U+11182 "\240\145\134\179-\240\145\135\128" .. -- U+111B3-U+111C0 "\240\145\135\137-\240\145\135\140" .. -- U+111C9-U+111CC "\240\145\135\142" .. -- U+111CE "\240\145\135\143" .. -- U+111CF "\240\145\136\172-\240\145\136\183" .. -- U+1122C-U+11237 "\240\145\136\190" .. -- U+1123E "\240\145\137\129" .. -- U+11241 "\240\145\139\159-\240\145\139\170" .. -- U+112DF-U+112EA "\240\145\140\128-\240\145\140\131" .. -- U+11300-U+11303 "\240\145\140\187" .. -- U+1133B "\240\145\140\188" .. -- U+1133C "\240\145\140\190-\240\145\141\132" .. -- U+1133E-U+11344 "\240\145\141\135" .. -- U+11347 "\240\145\141\136" .. -- U+11348 "\240\145\141\139-\240\145\141\141" .. -- U+1134B-U+1134D "\240\145\141\151" .. -- U+11357 "\240\145\141\162" .. -- U+11362 "\240\145\141\163" .. -- U+11363 "\240\145\141\166-\240\145\141\172" .. -- U+11366-U+1136C "\240\145\141\176-\240\145\141\180" .. -- U+11370-U+11374 "\240\145\142\184-\240\145\143\128" .. -- U+113B8-U+113C0 "\240\145\143\130" .. -- U+113C2 "\240\145\143\133" .. -- U+113C5 "\240\145\143\135-\240\145\143\138" .. -- U+113C7-U+113CA "\240\145\143\140-\240\145\143\144" .. -- U+113CC-U+113D0 "\240\145\143\146" .. -- U+113D2 "\240\145\143\161" .. -- U+113E1 "\240\145\143\162" .. -- U+113E2 "\240\145\144\181-\240\145\145\134" .. -- U+11435-U+11446 "\240\145\145\158" .. -- U+1145E "\240\145\146\176-\240\145\147\131" .. -- U+114B0-U+114C3 "\240\145\150\175-\240\145\150\181" .. -- U+115AF-U+115B5 "\240\145\150\184-\240\145\151\128" .. -- U+115B8-U+115C0 "\240\145\151\156" .. -- U+115DC "\240\145\151\157" .. -- U+115DD "\240\145\152\176-\240\145\153\128" .. -- U+11630-U+11640 "\240\145\154\171-\240\145\154\183" .. -- U+116AB-U+116B7 "\240\145\156\157-\240\145\156\171" .. -- U+1171D-U+1172B "\240\145\160\172-\240\145\160\186" .. -- U+1182C-U+1183A "\240\145\164\176-\240\145\164\181" .. -- U+11930-U+11935 "\240\145\164\183" .. -- U+11937 "\240\145\164\184" .. -- U+11938 "\240\145\164\187-\240\145\164\190" .. -- U+1193B-U+1193E "\240\145\165\128") .. -- U+11940 ("\240\145\165\130" .. -- U+11942 "\240\145\165\131" .. -- U+11943 "\240\145\167\145-\240\145\167\151" .. -- U+119D1-U+119D7 "\240\145\167\154-\240\145\167\160" .. -- U+119DA-U+119E0 "\240\145\167\164" .. -- U+119E4 "\240\145\168\129-\240\145\168\138" .. -- U+11A01-U+11A0A "\240\145\168\179-\240\145\168\185" .. -- U+11A33-U+11A39 "\240\145\168\187-\240\145\168\190" .. -- U+11A3B-U+11A3E "\240\145\169\135" .. -- U+11A47 "\240\145\169\145-\240\145\169\155" .. -- U+11A51-U+11A5B "\240\145\170\138-\240\145\170\153" .. -- U+11A8A-U+11A99 "\240\145\176\175-\240\145\176\182" .. -- U+11C2F-U+11C36 "\240\145\176\184-\240\145\176\191" .. -- U+11C38-U+11C3F "\240\145\178\146-\240\145\178\167" .. -- U+11C92-U+11CA7 "\240\145\178\169-\240\145\178\182" .. -- U+11CA9-U+11CB6 "\240\145\180\177-\240\145\180\182" .. -- U+11D31-U+11D36 "\240\145\180\186" .. -- U+11D3A "\240\145\180\188" .. -- U+11D3C "\240\145\180\189" .. -- U+11D3D "\240\145\180\191-\240\145\181\133" .. -- U+11D3F-U+11D45 "\240\145\181\135" .. -- U+11D47 "\240\145\182\138-\240\145\182\142" .. -- U+11D8A-U+11D8E "\240\145\182\144" .. -- U+11D90 "\240\145\182\145" .. -- U+11D91 "\240\145\182\147-\240\145\182\151" .. -- U+11D93-U+11D97 "\240\145\187\179-\240\145\187\182" .. -- U+11EF3-U+11EF6 "\240\145\188\128" .. -- U+11F00 "\240\145\188\129" .. -- U+11F01 "\240\145\188\131" .. -- U+11F03 "\240\145\188\180-\240\145\188\186" .. -- U+11F34-U+11F3A "\240\145\188\190-\240\145\189\130" .. -- U+11F3E-U+11F42 "\240\145\189\154" .. -- U+11F5A "\240\147\145\128" .. -- U+13440 "\240\147\145\135-\240\147\145\149" .. -- U+13447-U+13455 "\240\150\132\158-\240\150\132\175" .. -- U+1611E-U+1612F "\240\150\171\176-\240\150\171\180" .. -- U+16AF0-U+16AF4 "\240\150\172\176-\240\150\172\182" .. -- U+16B30-U+16B36 "\240\150\189\143" .. -- U+16F4F "\240\150\189\145-\240\150\190\135" .. -- U+16F51-U+16F87 "\240\150\190\143-\240\150\190\146" .. -- U+16F8F-U+16F92 "\240\150\191\164" .. -- U+16FE4 "\240\150\191\176" .. -- U+16FF0 "\240\150\191\177" .. -- U+16FF1 "\240\155\178\157" .. -- U+1BC9D "\240\155\178\158" .. -- U+1BC9E "\240\156\188\128-\240\156\188\173" .. -- U+1CF00-U+1CF2D "\240\156\188\176-\240\156\189\134" .. -- U+1CF30-U+1CF46 "\240\157\133\165-\240\157\133\169" .. -- U+1D165-U+1D169 "\240\157\133\173-\240\157\133\178" .. -- U+1D16D-U+1D172 "\240\157\133\187-\240\157\134\130" .. -- U+1D17B-U+1D182 "\240\157\134\133-\240\157\134\139" .. -- U+1D185-U+1D18B "\240\157\134\170-\240\157\134\173" .. -- U+1D1AA-U+1D1AD "\240\157\137\130-\240\157\137\132" .. -- U+1D242-U+1D244 "\240\157\168\128-\240\157\168\182" .. -- U+1DA00-U+1DA36 "\240\157\168\187-\240\157\169\172" .. -- U+1DA3B-U+1DA6C "\240\157\169\181" .. -- U+1DA75 "\240\157\170\132" .. -- U+1DA84 "\240\157\170\155-\240\157\170\159" .. -- U+1DA9B-U+1DA9F "\240\157\170\161-\240\157\170\175" .. -- U+1DAA1-U+1DAAF "\240\158\128\128-\240\158\128\134" .. -- U+1E000-U+1E006 "\240\158\128\136-\240\158\128\152" .. -- U+1E008-U+1E018 "\240\158\128\155-\240\158\128\161" .. -- U+1E01B-U+1E021 "\240\158\128\163" .. -- U+1E023 "\240\158\128\164" .. -- U+1E024 "\240\158\128\166-\240\158\128\170" .. -- U+1E026-U+1E02A "\240\158\130\143" .. -- U+1E08F "\240\158\132\176-\240\158\132\182" .. -- U+1E130-U+1E136 "\240\158\138\174" .. -- U+1E2AE "\240\158\139\172-\240\158\139\175" .. -- U+1E2EC-U+1E2EF "\240\158\147\172-\240\158\147\175" .. -- U+1E4EC-U+1E4EF "\240\158\151\174" .. -- U+1E5EE "\240\158\151\175" .. -- U+1E5EF "\240\158\163\144-\240\158\163\150" .. -- U+1E8D0-U+1E8D6 "\240\158\165\132-\240\158\165\138") -- U+1E944-U+1E94A -- Double combining characters. -- Charset: [[:M:]&[:Canonical_Combining_Class=/^Double_/:]&[:^subhead=Grapheme joiner:]&[:^Variation_Selector=Yes:]] local comb_chars_double = "\205\156-\205\162" .. -- U+035C-U+0362 "\225\183\141" .. -- U+1DCD "\225\183\188" -- U+1DFC -- Variation selectors etc.; separated out so that we don't get categories for them. -- Charset: [[:M:]&[[:subhead=Grapheme joiner:][:Variation_Selector=Yes:]]]. local comb_chars_other = "\205\143" .. -- U+034F "\225\160\139-\225\160\141" .. -- U+180B-U+180D "\225\160\143" .. -- U+180F "\239\184\128-\239\184\143" .. -- U+FE00-U+FE0F "\243\160\132\128-\243\160\135\175" -- U+E0100-U+E01EF local comb_chars_all = comb_chars_single .. comb_chars_double .. comb_chars_other local comb_chars = { combined_single = "[^" .. comb_chars_all .. "][" .. comb_chars_single .. comb_chars_other .. "]+%f[^" .. comb_chars_all .. "]", combined_double = "[^" .. comb_chars_all .. "][" .. comb_chars_single .. comb_chars_other .. "]*[" .. comb_chars_double .. "]+[" .. comb_chars_all .. "]*.[" .. comb_chars_single .. comb_chars_other .. "]*", diacritics_single = "[" .. comb_chars_single .. "]", diacritics_double = "[" .. comb_chars_double .. "]", diacritics_all = "[" .. comb_chars_all .. "]" } -- Somewhat curated list from https://unicode.org/Public/emoji/16.0/emoji-sequences.txt. -- NOTE: There are lots more emoji sequences involving non-emoji Plane 0 symbols followed by 0xFE0F, which we don't -- (yet?) handle. local emoji_chars = "\226\140\154" .. -- U+231A (⌚) "\226\140\155" .. -- U+231B (⌛) "\226\140\168" .. -- U+2328 (⌨) "\226\143\143" .. -- U+23CF (⏏) "\226\143\169-\226\143\179" .. -- U+23E9-U+23F3 (⏩-⏳) "\226\143\184-\226\143\186" .. -- U+23F8-U+23FA (⏸-⏺) "\226\150\170" .. -- U+25AA (▪) "\226\150\171" .. -- U+25AB (▫) "\226\150\182" .. -- U+25B6 (▶) "\226\151\128" .. -- U+25C0 (◀) "\226\151\187-\226\151\190" .. -- U+25FB-U+25FE (◻-◾) "\226\152\128-\226\152\132" .. -- U+2600-U+2604 (☀-☄) "\226\152\142" .. -- U+260E (☎) "\226\152\145" .. -- U+2611 (☑) "\226\152\148" .. -- U+2614 (☔) "\226\152\149" .. -- U+2615 (☕) "\226\152\152" .. -- U+2618 (☘) "\226\152\157" .. -- U+261D (☝) "\226\152\160" .. -- U+2620 (☠) "\226\152\162" .. -- U+2622 (☢) "\226\152\163" .. -- U+2623 (☣) "\226\152\166" .. -- U+2626 (☦) "\226\152\170" .. -- U+262A (☪) "\226\152\174" .. -- U+262E (☮) "\226\152\175" .. -- U+262F (☯) "\226\152\184-\226\152\186" .. -- U+2638-U+263A (☸-☺) "\226\153\136-\226\153\147" .. -- U+2648-U+2653 (♈-♓) "\226\153\159" .. -- U+265F (♟) "\226\153\160" .. -- U+2660 (♠) "\226\153\163" .. -- U+2663 (♣) "\226\153\165" .. -- U+2665 (♥) "\226\153\166" .. -- U+2666 (♦) "\226\153\168" .. -- U+2668 (♨) "\226\153\187" .. -- U+267B (♻) "\226\153\190" .. -- U+267E (♾) "\226\153\191" .. -- U+267F (♿) "\226\154\146-\226\154\151" .. -- U+2692-U+2697 (⚒-⚗) "\226\154\153" .. -- U+2699 (⚙) "\226\154\155" .. -- U+269B (⚛) "\226\154\156" .. -- U+269C (⚜) "\226\154\160" .. -- U+26A0 (⚠) "\226\154\161" .. -- U+26A1 (⚡) "\226\154\170" .. -- U+26AA (⚪) "\226\154\171" .. -- U+26AB (⚫) "\226\154\176" .. -- U+26B0 (⚰) "\226\154\177" .. -- U+26B1 (⚱) "\226\154\189" .. -- U+26BD (⚽) "\226\154\190" .. -- U+26BE (⚾) "\226\155\132" .. -- U+26C4 (⛄) "\226\155\133" .. -- U+26C5 (⛅) "\226\155\136" .. -- U+26C8 (⛈) "\226\155\142" .. -- U+26CE (⛎) "\226\155\143" .. -- U+26CF (⛏) "\226\155\145" .. -- U+26D1 (⛑) "\226\155\147" .. -- U+26D3 (⛓) "\226\155\148" .. -- U+26D4 (⛔) "\226\155\169" .. -- U+26E9 (⛩) "\226\155\170" .. -- U+26EA (⛪) "\226\155\176-\226\155\181" .. -- U+26F0-U+26F5 (⛰-⛵) "\226\155\183-\226\155\186" .. -- U+26F7-U+26FA (⛷-⛺) "\226\155\189" .. -- U+26FD (⛽) "\226\156\130" .. -- U+2702 (✂) "\226\156\133" .. -- U+2705 (✅) "\226\156\136-\226\156\141" .. -- U+2708-U+270D (✈-✍) "\226\156\143" .. -- U+270F (✏) "\226\156\146" .. -- U+2712 (✒) "\226\156\148" .. -- U+2714 (✔) "\226\156\150" .. -- U+2716 (✖) "\226\156\157" .. -- U+271D (✝) "\226\156\161" .. -- U+2721 (✡) "\226\156\168" .. -- U+2728 (✨) "\226\156\179" .. -- U+2733 (✳) "\226\156\180" .. -- U+2734 (✴) "\226\157\132" .. -- U+2744 (❄) "\226\157\135" .. -- U+2747 (❇) "\226\157\140" .. -- U+274C (❌) "\226\157\142" .. -- U+274E (❎) "\226\157\147-\226\157\149" .. -- U+2753-U+2755 (❓-❕) "\226\157\151" .. -- U+2757 (❗) "\226\157\163" .. -- U+2763 (❣) "\226\157\164" .. -- U+2764 (❤) "\226\158\149-\226\158\151" .. -- U+2795-U+2797 (➕-➗) "\226\158\161" .. -- U+27A1 (➡) "\226\158\176" .. -- U+27B0 (➰) "\226\158\191" .. -- U+27BF (➿) "\226\164\180" .. -- U+2934 (⤴) "\226\164\181" .. -- U+2935 (⤵) "\226\172\133-\226\172\135" .. -- U+2B05-U+2B07 (⬅-⬇) "\226\172\155" .. -- U+2B1B (⬛) "\226\172\156" .. -- U+2B1C (⬜) "\226\173\144" .. -- U+2B50 (⭐) "\226\173\149" .. -- U+2B55 (⭕) "\227\128\176" .. -- U+3030 (〰) "\227\128\189" .. -- U+303D (〽) "\227\138\151" .. -- U+3297 (㊗) "\227\138\153" .. -- U+3299 (㊙) "\240\159\128\132" .. -- U+1F004 (🀄) "\240\159\131\143" .. -- U+1F0CF (🃏) "\240\159\133\176" .. -- U+1F170 (🅰) "\240\159\133\177" .. -- U+1F171 (🅱) "\240\159\133\190" .. -- U+1F17E (🅾) "\240\159\133\191" .. -- U+1F17F (🅿) "\240\159\134\142" .. -- U+1F18E (🆎) "\240\159\134\145-\240\159\134\154" .. -- U+1F191-U+1F19A (🆑-🆚) "\240\159\136\129" .. -- U+1F201 (🈁) "\240\159\136\130" .. -- U+1F202 (🈂) "\240\159\136\154" .. -- U+1F21A (🈚) "\240\159\136\175" .. -- U+1F22F (🈯) "\240\159\136\178-\240\159\136\186" .. -- U+1F232-U+1F23A (🈲-🈺) "\240\159\137\144" .. -- U+1F250 (🉐) "\240\159\137\145" .. -- U+1F251 (🉑) "\240\159\140\128-\240\159\153\143" .. -- U+1F300-U+1F64F (🌀-🙏) "\240\159\154\128-\240\159\155\151" .. -- U+1F680-U+1F6D7 (🚀-🛗) "\240\159\155\156-\240\159\155\172" .. -- U+1F6DC-U+1F6EC (🛜-🛬) "\240\159\155\176-\240\159\155\188" .. -- U+1F6F0-U+1F6FC (🛰-🛼) "\240\159\159\160-\240\159\159\171" .. -- U+1F7E0-U+1F7EB (🟠-🟫) "\240\159\159\176" .. -- U+1F7F0 (🟰) "\240\159\164\140-\240\159\169\147" .. -- U+1F90C-U+1FA53 (🤌-🩓) "\240\159\169\160-\240\159\169\173" .. -- U+1FA60-U+1FA6D (🩠-🩭) "\240\159\169\176-\240\159\169\188" .. -- U+1FA70-U+1FA7C (🩰-🩼) "\240\159\170\128-\240\159\170\137" .. -- U+1FA80-U+1FA89 (🪀-🪉) "\240\159\170\143-\240\159\171\134" .. -- U+1FA8F-U+1FAC6 (🪏-🫆) "\240\159\171\142-\240\159\171\156" .. -- U+1FACE-U+1FADC (🫎-🫜) "\240\159\171\159-\240\159\171\169" .. -- U+1FADF-U+1FAE9 (🫟-🫩) "\240\159\171\176-\240\159\171\184" -- U+1FAF0-U+1FAF8 (🫰-🫸) local unsupported_characters local function get_unsupported_characters() unsupported_characters, get_unsupported_characters = {}, nil for k, v in pairs(load_data("Module:links/data").unsupported_characters) do unsupported_characters[v] = k end return unsupported_characters end -- The list of unsupported titles and invert it (so the keys are pagenames and values are canonical titles). local unsupported_titles local function get_unsupported_titles() unsupported_titles, get_unsupported_titles = {}, nil for k, v in pairs(load_data("Module:links/data").unsupported_titles) do unsupported_titles[v] = k end return unsupported_titles end --[==[ Given a pagename (or {nil} for the current page), create and return a data structure describing the page. The returned object includes the following fields: * `comb_chars`: A table containing various Lua character class patterns for different types of combined characters (those that decompose into multiple characters in the NFD decomposition). The patterns are meant to be used with {mw.ustring.find()}. The keys are: ** `single`: Single combining characters (character + diacritic), without surrounding brackets; ** `double`: Double combining characters (character + diacritic + character), without surrounding brackets; ** `vs`: Variation selectors, without surrounding brackets; ** `all`: Concatenation of `single` + `double` + `vs`, without surrounding brackets; ** `diacritics_single`: Like `single` but with surrounding brackets; ** `diacritics_double`: Like `double` but with surrounding brackets; ** `diacritics_all`: Like `all` but with surrounding brackets; ** `combined_single`: Lua pattern for matching a spacing character followed by one or more single combining characters; ** `combined_double`: Lua pattern for matching a combination of two spacing characters separated by one or more double combining characters, possibly also with single combining characters; * `emoji_pattern`: A Lua character class pattern (including surrounding brackets) that matches emojis. Meant to be used with {mw.ustring.find()}. * `L2_list`: Ordered list of L2 headings on the page, with the extra key `n` that gives the length of the list. * `L2_sections`: Lookup table of L2 headings on the page, where the key is the section number assigned by the preprocessor, and the value is the L2 heading name. Once an invocation has got its actual section number from get_current_L2 in [[Module:pages]], it can use this table to determine its parent L2. TODO: We could expand this to include subsections, to check POS headings are correct etc. * `unsupported_titles`: Map from pagenames to canonical titles for unsupported-title pages. * `namespace`: Namespace of the pagename. * `ns`: Namespace table for the page from mw.site.namespaces (TODO: merge with `namespace` above). * `full_raw_pagename`: Full version of the '''RAW''' pagename (i.e. unsupported-title pages aren't canonicalized); including the namespace and the root (portion before the slash). * `pagename`: Canonicalized subpage portion of the pagename (unsupported-title pages are canonicalized). * `decompose_pagename`: Equivalent of `pagename` in NFD decomposition. * `pagename_len`: Length of `pagename` in Unicode chars, where combinations of spacing character + decomposed diacritic are treated as single characters. * `explode_pagename`: Set of characters found in `pagename`. The keys are characters (where combinations of spacing character + decomposed diacritic are treated as single characters). * `encoded_pagename`: FIXME: Document me. * `pagename_defaultsort`: FIXME: Document me. * `raw_defaultsort`: FIXME: Document me. * `wikitext_topic_cat`: FIXME: Document me. * `wikitext_langname_cat`: FIXME: Document me. `no_fetch_content` says to not fetch and parse the content or set a DEFAULTSORT sort key, in order to save time on test and documentation pages that have lots of template invocations that set `|pagename=`. It turns out nearly all the time of this function is contained in the line `frame:callParserFunction("DEFAULTSORT", data.pagename_defaultsort)`, so we skip it on test and documentation pages where it accomplishes nothing in any case. ]==] function export.process_page(pagename, no_fetch_content) local data = { comb_chars = comb_chars, emoji_pattern = "[" .. emoji_chars .. "]", unsupported_titles = unsupported_titles or get_unsupported_titles() } local cats = {} data.cats = cats -- We cannot store `raw_title` in `data` because it contains a metatable. local raw_title local function bad_pagename() if not pagename then error("Internal error: Something wrong, `data.pagename` not specified but current title contains illegal characters") else error(format("Bad value for `data.pagename`: '%s', which must not contain illegal characters", pagename)) end end if pagename then -- for testing, doc pages, etc. raw_title = new_title(pagename) if not raw_title then bad_pagename() end else raw_title = mw.title.getCurrentTitle() end local nsText = raw_title.nsText data.namespace = nsText data.ns = mw.site.namespaces[raw_title.namespace] local full_raw_pagename = raw_title.fullText data.full_raw_pagename = full_raw_pagename local frame = mw.getCurrentFrame() -- WARNING: `content` may be nil, e.g. if we're substing a template like {{ja-new}} on a not-yet-created page -- or if the module specifies the subpage as `data.pagename` (which many modules do) and we're in an Appendix -- or other non-mainspace page. We used to make the latter an error but there are too many modules that do it, -- and substing on a nonexistent page is totally legit, and we don't actually need to be able to access the -- content of the page. local content = not no_fetch_content and raw_title:getContent() or nil -- Get the pagename. pagename = gsub(raw_title.subpageText, "^Unsupported titles/(.+)", function(m) insert(cats, "Unsupported titles") local title = (unsupported_titles or get_unsupported_titles())[m] if title then return title end -- Substitute pairs of "`". Those not used for escaping should be escaped as "`grave`", but might not be, -- so if a pair don't form a match, the closing "`" should become the opening "`" of the next match attempt. -- This has to be done manually, instead of using gsub. local open_pos = find(m, "`") if not open_pos then return m end title = {sub(m, 1, open_pos - 1)} while true do local close_pos = find(m, "`", open_pos + 1) if not close_pos then -- Add "`" plus any remaining characters. insert(title, sub(m, open_pos)) break end local escape = sub(m, open_pos, close_pos) local ch = (unsupported_characters or get_unsupported_characters())[escape] -- Match found, so substitute the character and move to the first "`" after the match if found, or -- otherwise return. if ch then insert(title, ch) local nxt_pos = close_pos + 1 open_pos = find(m, "`", nxt_pos) -- Add any characters between the match and the next "`" or end. if open_pos then insert(title, sub(m, nxt_pos, open_pos - 1)) else insert(title, sub(m, nxt_pos)) break end -- Match not found, so make the closing "`" the opening "`" of the next attempt. else -- Add the failed match, except for the closing "`". insert(title, sub(m, open_pos, close_pos - 1)) open_pos = close_pos end end return concat(title) end) -- Save pagename, as local variable will be destructively modified. data.pagename = pagename -- Decompose the pagename in Unicode normalization form D. data.decompose_pagename = toNFD(pagename) -- Explode the current page name into a character table, taking decomposed combining characters into account. local explode_pagename = {} local pagename_len = 0 local function explode(char) explode_pagename[char] = true pagename_len = pagename_len + 1 return "" end pagename = ugsub(pagename, comb_chars.combined_double, explode) pagename = gsub(ugsub(pagename, comb_chars.combined_single, explode), ".[\128-\191]*", explode) data.explode_pagename = explode_pagename data.pagename_len = pagename_len -- Generate DEFAULTSORT. data.encoded_pagename = encode_entities(data.pagename) data.pagename_defaultsort = get_lang("mul"):makeSortKey(data.encoded_pagename) if not no_fetch_content then frame:callParserFunction("DEFAULTSORT", data.pagename_defaultsort) end data.raw_defaultsort = uupper(raw_title.text) -- Make `L2_list` and `L2_sections`, note raw wikitext use of {{DEFAULTSORT:}} and {{DISPLAYTITLE:}}, then add categories if any unwanted L1 headings are found, the L2 headings are in the wrong order, or they don't match a canonical language name. -- Note: HTML comments shouldn't be removed from `content` until after this step, as they can affect the result. do local L2_list, L2_list_len, L2_sections, sort_cache, prev, rc = {}, 0, {}, {} local new_cats, L2_wrong_order = {} local function get_weight(L2) if L2 == "Translingual" then return "\1" elseif L2 == "English" then return "\2" elseif match(L2, "^[%z\1-\b\14-!#-&(-,.-\127]+$") then return L2 end local weight = sort_cache[L2] if weight then return weight end weight = toNFC(ugsub(ugsub(toNFD(L2), "[" .. comb_chars_all .. "'\"ʻʼ]+", ""), "[%s%-]+", " ")) sort_cache[L2] = weight return weight end local function handle_heading(heading) local level = heading.level if level > 2 then return end local name = heading:get_name() -- heading:get_name() will return nil if there are any newline characters in the preprocessed heading name (e.g. from an expanded template). In such cases, the preprocessor section count still increments (since it's calculated pre-expansion), but the heading will fail, so the L2 count shouldn't be incremented. if name == nil then return end L2_list_len = L2_list_len + 1 L2_list[L2_list_len] = name L2_sections[heading.section] = name -- Also add any L1s, since they terminate the preceding L2, but add a maintenance category since it's probably a mistake. if level == 1 then new_cats["Pages with unwanted L1 headings"] = true end -- Check the heading is in the right order. -- FIXME: we need a more sophisticated sorting method which handles non-diacritic special characters (e.g. Magɨ). if prev and not ( L2_wrong_order or string_compare(get_weight(prev), get_weight(name)) ) then new_cats["Pages with language headings in the wrong order"] = true L2_wrong_order = true end -- Check it's a canonical language name. if not (langnames or get_langnames())[name] then new_cats["Pages with nonstandard language headings"] = true end prev = name end local function handle_template(template) local name = template:get_name() if name == "DEFAULTSORT:" then new_cats["Pages with DEFAULTSORT conflicts"] = true elseif name == "DISPLAYTITLE:" then new_cats["Pages with DISPLAYTITLE conflicts"] = true elseif name == "reconstructed" then rc = true end end if content then for node in parse(content):iterate_nodes() do local node_class = class_else_type(node) if node_class == "heading" then handle_heading(node) elseif node_class == "template" then handle_template(node) elseif node_class == "parameter" then new_cats["Pages with raw triple-brace template parameters"] = true end end end L2_list.n = L2_list_len data.L2_list = L2_list data.L2_sections = L2_sections insert(cats, get_category("Pages with entries")) insert(cats, get_category(format("Pages with %s entr%s", L2_list_len, L2_list_len == 1 and "y" or "ies"))) for cat in pairs(new_cats) do insert(cats, get_category(cat)) end if nsText == "Reconstruction" and not rc then local langname = match(full_raw_pagename, "^Reconstruction:([^/]+)/.") if langname then insert(cats, get_category(langname .. " entries missing Template:reconstructed")) end end end ------ 4. Parse page for maintenance categories. ------ -- Use of tab characters. if content and find(content, "\t", 1, true) then insert(cats, get_category("Pages with tab characters")) end -- Unencoded character(s) in title. local IDS = list_to_set{"⿰", "⿱", "⿲", "⿳", "⿴", "⿵", "⿶", "⿷", "⿸", "⿹", "⿺", "⿻", "⿼", "⿽", "⿾", "⿿", "㇯"} for char in pairs(explode_pagename) do if IDS[char] and char ~= data.pagename then insert(cats, "Terms containing unencoded characters") break end end -- Raw wikitext use of a topic or langname category. Also check if any raw sortkeys have been used. do local wikitext_topic_cat = {} local wikitext_langname_cat = {} local raw_sortkey -- If a raw sortkey has been found, add it to the relevant table. -- If there's no table (or the index is just `true`), create one first. local function add_cat_table(t, lang, sortkey) local t_lang = t[lang] if not sortkey then if not t_lang then t[lang] = true end return elseif t_lang == true or not t_lang then t_lang = {} t[lang] = t_lang end t_lang[uupper(decode_entities(sortkey))] = true end local function process_category(content, cat, colon, nxt) local pipe = find(cat, "|", colon + 1, true) -- Categories cannot end "|]]". if pipe == #cat then return end local title = new_title(pipe and sub(cat, 1, pipe - 1) or cat) if not (title and title.namespace == 14) then return end -- Get the sortkey (if any), then canonicalize category title. local sortkey = pipe and sub(cat, pipe + 1) or nil cat = title.text if sortkey then raw_sortkey = true -- If the sortkey contains "[", the first "]" of a final "]]]" is treated as part of the sortkey. if find(sortkey, "[", 1, true) and sub(content, nxt, nxt) == "]" then sortkey = sortkey .. "]" end end local code = match(cat, "^([%w%-.]+):") if code then add_cat_table(wikitext_topic_cat, code, sortkey) return end -- Split by word. cat = split(cat, " ", true, true) -- Formerly we looked for the language name anywhere in the category. This is simply wrong -- because there are no categories like 'Alsatian French lemmas' (only L2 languages -- have langname categories), but doing it this way wrongly catches things like [[Category:Shapsug Adyghe]] -- in [[Category:Adyghe entries with language name categories using raw markup]]. local n = #cat - 1 if n <= 0 then return end -- Go from longest to shortest and stop once we've found a language name. Going from shortest -- to longest or not stopping after a match risks falsely matching (e.g.) German Low German -- categories as German. repeat local name = concat(cat, " ", 1, n) if (langnames or get_langnames())[name] then add_cat_table(wikitext_langname_cat, name, sortkey) return end n = n - 1 until n == 0 end if content then -- Remove comments, then iterate over category links. content = remove_comments(content, "BOTH") local head = find(content, "[[", 1, true) while head do local close = find(content, "]]", head + 2, true) if not close then break end -- Make sure there are no intervening "[[" between head and close. local open = find(content, "[[", head + 2, true) while open and open < close do head = open open = find(content, "[[", head + 2, true) end local cat = sub(content, head + 2, close - 1) -- Locate the colon, and weed out most unwanted links. "[ _\128-\244]*" catches valid whitespace, and ensures any category links using the colon trick are ignored. We match all non-ASCII characters, as there could be multibyte spaces, and mw.title.new will filter out any remaining false-positives; this is a lot faster than running mw.title.new on every link. local colon = match(cat, "^[ _\128-\244]*[Cc][Aa][Tt][EeGgOoRrYy _\128-\244]*():") if colon then process_category(content, cat, colon, close + 2) end head = open end end data.wikitext_topic_cat = wikitext_topic_cat data.wikitext_langname_cat = wikitext_langname_cat if raw_sortkey then insert(cats, get_category("Pages with raw sortkeys")) end end return data end return export jgyih2b070q6631wf6xjiwzg6mtn4p3 Modul:languages/data/3/m 828 8469 23814 2026-04-01T13:21:05Z Deepturquoise 2456 anyar 23814 Scribunto text/plain local m_langdata = require("Module:languages/data") -- Loaded on demand, as it may not be needed (depending on the data). local function u(...) u = require("Module:string utilities").char return u(...) end local c = m_langdata.chars local p = m_langdata.puaChars local s = m_langdata.shared local m = {} m["maa"] = { "San Jerónimo Tecóatl Mazatec", 7692927, "omq-maz", "Latn", } m["mab"] = { "Yutanduchi Mixtec", 12645448, "omq-mxt", "Latn", } m["mad"] = { "Madurese", 36213, "poz-msa", "Latn, Java", } m["mae"] = { "Bo-Rukul", 34967, "nic-ple", "Latn", } m["maf"] = { "Mafa", 35819, "cdc-cbm", "Latn", } m["mag"] = { "Magahi", -- Not to be confused with Magadhi Prakrit (pra-mag) 33728, "inc-bih", "Deva, Kthi", translit = { Deva = "bho-translit", Kthi = "bho-Kthi-translit", }, } m["mai"] = { "Maithili", 36109, "inc-bih", "Deva, Tirh, Kthi, Newa", translit = { Deva = "mai-translit", Tirh = "mai-Tirh-translit", Kthi = "bho-Kthi-translit", }, } m["maj"] = { "Jalapa de Díaz Mazatec", 3915999, "omq-maz", "Latn", } m["mak"] = { "Makasar", 33643, "poz-ssw", "Latn, Bugi, Maka", } m["mam"] = { "Mam", 33467, "myn", "Latn", } m["man"] = { "Mandingo", 35772, "dmn-man", "Latn", } m["maq"] = { "Chiquihuitlán Mazatec", 5101757, "omq-maz", "Latn", } m["mas"] = { "Maasai", 35787, "sdv-lma", "Latn", } m["mat"] = { "Matlatzinca", 12953704, "omq", "Latn", } m["mau"] = { "Huautla Mazatec", 36230, "omq-maz", "Latn", } m["mav"] = { "Sateré-Mawé", 6794475, "tup", "Latn", } m["maw"] = { "Mampruli", 35804, "nic-wov", "Latn", } m["max"] = { "North Moluccan Malay", 7056136, "crp", "Latn", ancestors = "ms", } m["maz"] = { "Central Mazahua", 36228, "oto", "Latn", } m["mba"] = { "Higaonon", 5753411, "mno", "Latn", } m["mbb"] = { "Western Bukidnon Manobo", 7987643, "mno", "Latn", } m["mbc"] = { "Macushi", 56633, "sai-pem", "Latn", } m["mbd"] = { "Dibabawon Manobo", 18755523, "mno", "Latn", } m["mbe"] = { "Molale", 3319444, "nai-plp", "Latn", } m["mbf"] = { "Baba Malay", 18642798, "crp", "Latn", ancestors = "ms", } m["mbh"] = { "Mangseng", 6749147, "poz-ocw", "Latn", } m["mbi"] = { "Ilianen Manobo", 14916911, "mno", "Latn", } m["mbj"] = { "Nadëb", 3335011, "sai-nad", "Latn", } m["mbk"] = { "Malol", 6744477, "poz-ocw", "Latn", } m["mbl"] = { "Maxakalí", 3029682, "sai-mje", "Latn", } m["mbm"] = { "Ombamba", 36407, "bnt-mbt", "Latn", } m["mbn"] = { "Macaguán", 3273980, "sai-guh", "Latn", } m["mbo"] = { -- is, like 'bqz', 'bsi' and 'bss', a dialect of Manenguba "Mbo (Cameroon)", 36011, "bnt-mne", "Latn", } m["mbp"] = { "Wiwa", 3012604, "cba", "Latn", } m["mbq"] = { "Maisin", 3448149, nil, "Latn", } m["mbr"] = { "Nukak Makú", 3346228, "sai-nad", "Latn", } m["mbs"] = { "Sarangani Manobo", 7423093, "mno", "Latn", } m["mbt"] = { "Matigsalug Manobo", 6787447, "mno", "Latn", } m["mbu"] = { "Mbula-Bwazza", 3913324, "nic-jrn", "Latn", } m["mbv"] = { "Mbulungish", 36003, "alv-nal", "Latn", } m["mbw"] = { "Maring", 3293280, nil, "Latn", } m["mbx"] = { "Sepik Mari", 6760942, "paa-spk", "Latn", } m["mby"] = { "Memoni", 4180871, "inc-snd", "Gujr, ur-Arab", } m["mbz"] = { "Amoltepec Mixtec", 13583504, "omq-mxt", "Latn", } m["mca"] = { "Maca", 3281043, "sai-mtc", "Latn", } m["mcb"] = { "Machiguenga", 3915441, "awd", "Latn", } m["mcc"] = { "Bitur", 4919173, } m["mcd"] = { "Sharanahua", 12953881, "sai-pan", "Latn", } m["mce"] = { "Itundujia Mixtec", 12953727, "omq-mxt", "Latn", } m["mcf"] = { "Matsés", 2981620, "sai-pan", "Latn", } m["mcg"] = { "Mapoyo", 56946, "sai-map", "Latn", } m["mch"] = { "Ye'kwana", 3082027, "sai-car", "Latn", sort_key = { remove_diacritics = "%-%s", from = {"'", "ñ", "ö", "sh", "ü"}, to = {"’", "n" .. p[1], "o" .. p[1], "s" .. p[1], "u" .. p[1]} } } m["mci"] = { "Mese", 6821190, } m["mcj"] = { "Mvanip", 3913281, "nic-mmb", "Latn", } m["mck"] = { "Mbunda", 34170, "bnt-clu", "Latn", } m["mcl"] = { "Macaguaje", 6722435, "sai-tuc", "Latn", } m["mcm"] = { "Kristang", 2669169, "crp", "Latn", ancestors = "pt", } m["mcn"] = { "Masana", 56668, "cdc-mas", } m["mco"] = { "Coatlán Mixe", 25559716, "nai-miz", "Latn", } m["mcp"] = { "Makaa", 35803, "bnt-mka", } m["mcq"] = { "Ese", 5397551, "ngf", "Latn", } m["mcr"] = { "Menya", 11732444, "ngf", "Latn", } m["mcs"] = { "Mambai", 6748872, "alv-mbm", } m["mcu"] = { "Cameroon Mambila", 19359039, "nic-mmb", "Latn", } m["mcv"] = { "Minanibai", 6863167, "ngf", } m["mcw"] = { "Mawa", 3441333, "cdc-est", "Latn", } m["mcx"] = { "Mpiemo", 35908, "bnt-bek", } m["mcy"] = { "South Watut", 12953293, "poz-ocw", "Latn", } m["mcz"] = { "Mawan", 11732429, "ngf-mad", } m["mda"] = { "Mada (Nigeria)", 3915843, "nic-nin", "Latn", } m["mdb"] = { "Morigi", 6912195, "paa-kiw", } m["mdc"] = { "Male", 6742927, "ngf-mad", } m["mdd"] = { "Mbum", 36170, "alv-mbm", } m["mde"] = { "Bura Mabang", 35860, "ssa", "Arab, Latn", } m["mdf"] = { "Moksha", 13343, "urj-mdv", "Cyrl", translit = "mdf-translit", entry_name = {remove_diacritics = c.acute}, override_translit = true, sort_key = "mdf-sortkey", } m["mdg"] = { "Massalat", 759984, } m["mdh"] = { "Maguindanao", 33717, "phi", "Latn, Arab", } m["mdi"] = { "Mamvu", 3033594, "csu-mle", } m["mdj"] = { "Mangbetu", 56327, "csu-maa", "Latn", } m["mdk"] = { "Mangbutu", 6748877, "csu-mle", } m["mdl"] = { "Maltese Sign Language", 6744816, "sgn", } m["mdm"] = { "Mayogo", 6797580, "nic-nke", "Latn", } m["mdn"] = { "Mbati", 36165, "bnt-ngn", } m["mdp"] = { "Mbala", 6799583, "bnt-pen", } m["mdq"] = { "Mbole", 6799727, "bnt-mbe", } m["mdr"] = { "Mandar", 35995, "poz-ssw", "Bugi, Latn", } m["mds"] = { "Maria", 3448673, } m["mdt"] = { "Mbere", 36062, "bnt-mbt", } m["mdu"] = { "Mboko", 36058, "bnt-mbo", } m["mdv"] = { "Santa Lucía Monteverde Mixtec", 12953722, "omq-mxt", "Latn", } m["mdw"] = { "Mbosi", 36035, "bnt-mbo", } m["mdx"] = { "Dizin", 35313, "omv-diz", "Ethi, Latn", } m["mdy"] = { "Maale", 795327, "omv-ome", } m["mdz"] = { "Suruí Do Pará", 10322149, "tup-gua", "Latn", } m["mea"] = { "Menka", 36078, "nic-grs", "Latn", } m["meb"] = { "Ikobi-Mena", 11732241, "ngf", "Latn", } m["mec"] = { "Mara", 6772774, } m["med"] = { "Melpa", 36166, } m["mee"] = { "Mengen", 3305831, "poz-ocw", "Latn", } m["mef"] = { "Megam", 6808589, } m["meg"] = { "Mea", 12952836, "poz-cln", } m["meh"] = { "Southwestern Tlaxiaco Mixtec", 7070686, "omq-mxt", "Latn", } m["mei"] = { "Midob", 36007, "nub", "Latn", } m["mej"] = { "Meyah", 11732436, "paa-wpa", } m["mek"] = { "Mekeo", 3304803, "poz-ocw", "Latn", } m["mel"] = { "Central Melanau", 18638319, "poz-swa", "Latn", } m["mem"] = { "Mangala", 6748664, } m["men"] = { "Mende", 1478672, "dmn-msw", "Latn, Mend", } m["meo"] = { "Kedah Malay", 4925684, "poz-mly", "Latn, ms-Arab, Thai", entry_name = { from = {u(0xF70F)}, to = {"ญ"} }, sort_key = {Thai = "Thai-sortkey"}, } m["mep"] = { "Miriwung", 3111847, "aus-jar", "Latn", } m["meq"] = { "Merey", 3502314, "cdc-cbm", "Latn", } m["mer"] = { "Meru", 13313, "bnt-kka", "Latn", } m["mes"] = { "Masmaje", 3440448, } m["met"] = { "Mato", 3299190, "poz-ocw", "Latn", } m["meu"] = { "Motu", 33516, "poz-ocw", "Latn", } m["mev"] = { "Mano", 3913286, "dmn-mda", "Latn", } m["mew"] = { "Maaka", 3438764, "cdc-wst", "Latn", } m["mey"] = { "Hassaniya Arabic", 56231, "sem-arb", "Arab", } m["mez"] = { "Menominee", 13363, "alg", "Latn", sort_key = {remove_diacritics = "·"}, } m["mfa"] = { "Pattani Malay", 1199751, "poz-mly", "Latn, ms-Arab, Thai", entry_name = { from = {u(0xF70F)}, to = {"ญ"} }, sort_key = {Thai = "Thai-sortkey"}, } m["mfb"] = { "Bangka", 3258818, "poz-mly", "Latn, Arab", } m["mfc"] = { "Mba", 4286464, "nic-mbc", "Latn", } m["mfd"] = { "Mendankwe-Nkwen", 11129537, "nic-nge", "Latn", } m["mfe"] = { "Mauritian Creole", 33661, "crp", "Latn", ancestors = "fr", sort_key = s["roa-oil-sortkey"], } m["mff"] = { "Naki", 36083, "nic-bbe", "Latn", } m["mfg"] = { "Mixifore", 3914478, "dmn-mok", } m["mfh"] = { "Matal", 3501751, "cdc-cbm", "Latn", } m["mfi"] = { "Wandala", 3441249, "cdc-cbm", "Latn", } m["mfj"] = { "Mefele", 3501871, "cdc-cbm", } m["mfk"] = { "North Mofu", 56303, "cdc-cbm", "Latn", } m["mfl"] = { "Putai", 56291, } m["mfm"] = { "Marghi South", 56248, } m["mfn"] = { "Cross River Mbembe", 3915395, "nic-uce", "Latn", } m["mfo"] = { "Mbe", 36075, "nic-eko", "Latn", } m["mfp"] = { "Makassar Malay", 12952776, "qfa-mix", "Latn", ancestors = "ms, mak" } m["mfq"] = { "Moba", 19921578, "nic-grm", "Latn", } m["mfr"] = { "Marrithiyel", 6773014, "aus-dal", "Latn", } m["mfs"] = { "Mexican Sign Language", 3915511, "sgn", "Latn", -- when documented } m["mft"] = { "Mokerang", 3319387, "poz-aay", "Latn", } m["mfu"] = { "Mbwela", 11004988, "bnt-clu", ancestors = "lch", } m["mfv"] = { "Mandjak", 35822, "alv-pap", } m["mfw"] = { "Mulaha", 6933720, } m["mfx"] = { "Melo", 6813268, "omv-nom", } m["mfy"] = { "Mayo", 56729, "azc-trc", "Latn", sort_key = {remove_diacritics = c.acute}, } m["mfz"] = { "Mabaan", 20526385, "sdv", "Latn", } m["mga"] = { "Middle Irish", 36116, "cel-gae", "Latn", ancestors = "sga", entry_name = {remove_diacritics = c.dotabove .. c.diaer .. "·"}, sort_key = "mga-sortkey", } m["mgb"] = { "Mararit", 56359, "sdv-tmn", } m["mgc"] = { "Morokodo", 6913216, "csu-bbk", "Latn", } m["mgd"] = { "Moru", 6915014, "csu-mma", "Latn, Arab", } m["mge"] = { "Mango", 713659, "csu-sar", "Latn", } m["mgf"] = { "Maklew", 6739816, } m["mgg"] = { "Mpongmpong", 35924, "bnt-bek", } m["mgh"] = { "Makhuwa-Meetto", 33604, "bnt-mak", "Latn", ancestors = "vmw", } m["mgi"] = { "Jili", 3914497, "nic-pls", } m["mgj"] = { "Abureni", 3441256, "nic-cde", "Latn", } m["mgk"] = { "Mawes", 6794395, "paa", } m["mgl"] = { "Maleu-Kilenge", 3281884, } m["mgm"] = { "Mambae", 35774, "poz-tim", "Latn", } m["mgn"] = { "Mbangi", 11017443, "nic-ngd", "Latn", } m["mgo"] = { "Meta'", 36054, "nic-mom", "Latn", } m["mgp"] = { "Eastern Magar", 12952758, "sit-gma", "Deva, Latn", } m["mgq"] = { "Malila", 6743679, "bnt-mby", "Latn", } m["mgr"] = { "Mambwe-Lungu", 626210, "bnt-mwi", "Latn", } m["mgs"] = { "Manda (Tanzania)", 16939267, "bnt-bki", } m["mgt"] = { "Mongol", 11260674, "paa", "Latn", } m["mgu"] = { "Mailu", 3278246, "ngf", } m["mgv"] = { "Matengo", 6786446, "bnt-mbi", "Latn", } m["mgw"] = { "Matumbi", 6791974, "bnt-mbi", "Latn", } m["mgy"] = { "Mbunga", 6799817, "bnt-kil", } m["mgz"] = { "Mbugwe", 3426367, "bnt-mra", } m["mha"] = { "Manda (India)", 56760, "dra-kki", "Orya", translit = "kxv-translit", } m["mhb"] = { "Mahongwe", 35816, "bnt-kel", } m["mhc"] = { "Mocho", 1941682, "myn", } m["mhd"] = { "Mbugu", 36152, "qfa-mix", "Latn", ancestors = "asa", } m["mhe"] = { "Besisi", 2742262, "mkh-asl", "Latn", } m["mhf"] = { "Mamaa", 6745346, "ngf-fin", "Latn", } m["mhg"] = { "Marrgu", 6772812, } m["mhi"] = { "Ma'di", 56670, "csu-mma", "Latn", } m["mhj"] = { "Mogholi", 13336, "xgn", "fa-Arab, Latn", translit = "fa-cls-translit", entry_name = { ["fa-Arab"] = "ar-entryname", }, } m["mhk"] = { "Mungaka", 36068, "nic-nun", } m["mhl"] = { "Mauwake", 6794095, "ngf-mad", "Latn", } m["mhm"] = { "Makhuwa-Moniga", 6900145, "bnt-mak", } m["mhn"] = { "Mòcheno", 268130, "gmw-hgm", "Latn", ancestors = "bar", sort_key = {remove_diacritics = c.grave}, } m["mho"] = { "Mashi", 10962737, "bnt-kav", "Latn", } m["mhp"] = { "Balinese Malay", 12473441, "crp", "Latn, Bali, ms-Arab", } m["mhq"] = { "Mandan", 1957120, "sio", "Latn", } m["mhr"] = { "Eastern Mari", 3906614, "chm", "Cyrl", translit = "chm-translit", override_translit = true, entry_name = {remove_diacritics = c.grave .. c.acute}, sort_key = { from = {"ё", "ҥ", "ӧ", "ӱ"}, to = {"е" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1]} } } m["mhs"] = { "Buru (Indonesia)", 2928650, "poz-cma", "Latn", } m["mht"] = { "Mandahuaca", 6747924, "awd-nwk", } m["mhu"] = { "Taraon", 56400, "sit-gsi", "Latn", } m["mhw"] = { "Mbukushu", 2691548, "bnt", "Latn", } m["mhx"] = { "Lhao Vo", 11149315, "tbq-brm", "Latn", } m["mhy"] = { "Ma'anyan", 2328761, "poz-bre", "Latn", } m["mhz"] = { "Mor (Austronesian)", 2122792, "poz-hce", "Latn", } m["mia"] = { "Miami", 56523, "alg", "Latn", } m["mib"] = { "Atatláhuca Mixtec", 32093046, "omq-mxt", "Latn", } m["mic"] = { "Mi'kmaq", 13321, "alg-eas", "Latn", } m["mid"] = { "Mandaic", 6991742, "sem-ase", "Mand", ancestors = "myz", translit = { Mand = "Mand-translit", }, entry_name = { Mand = "Mand-entryname", } } m["mie"] = { "Ocotepec Mixtec", 25559575, "omq-mxt", "Latn", } m["mif"] = { "Mofu-Gudur", 1365132, "cdc-cbm", "Latn", } m["mig"] = { "San Miguel el Grande Mixtec", 12953719, "omq-mxt", "Latn", } m["mih"] = { "Chayuco Mixtec", 13583510, "omq-mxt", "Latn", } m["mii"] = { "Chigmecatitlán Mixtec", 12953724, "omq-mxt", "Latn", } m["mij"] = { "Mungbam", 34725, "nic-beb", "Latn", } m["mik"] = { "Mikasuki", 13316, "nai-mus", "Latn", } m["mil"] = { "Peñoles Mixtec", 42411307, "omq-mxt", "Latn", } m["mim"] = { "Alacatlatzala Mixtec", 14697894, "omq-mxt", "Latn", } m["min"] = { "Minangkabau", 13324, "poz-mly", "Latn, Arab", } m["mio"] = { "Pinotepa Nacional Mixtec", 7196415, "omq-mxt", "Latn", } m["mip"] = { "Apasco-Apoala Mixtec", 13583505, "omq-mxt", "Latn", } m["miq"] = { "Miskito", 1516803, "nai-min", "Latn", entry_name = {remove_diacritics = c.circ}, } m["mir"] = { "Isthmus Mixe", 6088873, "nai-miz", "Latn", } m["mit"] = { "Southern Puebla Mixtec", 7570345, "omq-mxt", "Latn", } m["miu"] = { "Cacaloxtepec Mixtec", 12953723, "omq-mxt", "Latn", } m["miw"] = { "Akoye", 3327462, "ngf", "Latn", } m["mix"] = { "Mixtepec Mixtec", 6884125, "omq-mxt", "Latn", } m["miy"] = { "Ayutla Mixtec", 13583508, "omq-mxt", "Latn", } m["miz"] = { "Coatzospan Mixtec", 3317290, "omq-mxt", "Latn", } m["mjb"] = { "Makalero", 35729, "ngf", "Latn", } m["mjc"] = { "San Juan Colorado Mixtec", 12953718, "omq-mxt", "Latn", } m["mjd"] = { "Northwest Maidu", 3198700, "nai-mdu", "Latn", } m["mje"] = { "Muskum", 3913334, } -- mjg "Monguor" is not recognized as a language, but it is a family code m["mji"] = { "Kim Mun", 1115317, "hmx-mie", "Latn", } m["mjj"] = { "Mawak", 11732427, "ngf-mad", } m["mjk"] = { "Matukar", 6791963, "poz-ocw", "Latn", } m["mjl"] = { "Mandeali", 6747931, "him", "Deva, Takr", translit = "hi-translit", } m["mjm"] = { "Medebur", 6805227, "poz-ocw", "Latn", } m["mjn"] = { "Mebu", 6804364, "ngf-fin", } m["mjo"] = { "Malankuravan", 14916887, "dra-mal", } m["mjp"] = { "Malapandaram", 10575729, "dra-tam", } m["mjq"] = { "Malaryan", 12952773, "dra-mal", } m["mjr"] = { "Malavedan", 12952775, "dra-mal", "Mlym", translit = "ml-translit", } m["mjs"] = { "Miship", 3441264, "cdc-wst", "Latn", } m["mjt"] = { "Sawriya Paharia", 33907, "dra-mlo", "Beng, Deva", } m["mju"] = { "Manna-Dora", 10576453, "dra-tel", } m["mjv"] = { "Mannan", 3286037, "dra-tam", "Mlym, Taml", translit = { Mlym = "ml-translit", Taml = "ta-translit", }, } m["mjw"] = { "Karbi", 56591, "tbq-kuk", "Latn", } m["mjx"] = { "Mahali", 12953686, "mun", } m["mjy"] = { "Mahican", 3182562, "alg-eas", "Latn", } m["mjz"] = { "Majhi", 6737786, "inc-bih", } m["mka"] = { "Mbre", 3450154, "nic", --unclassified within niger-congo tho } m["mkb"] = { "Mal Paharia", 6583595, "inc-eas", "Deva", } m["mkc"] = { "Siliput", 7515090, "qfa-tor", "Latn", } m["mke"] = { "Mawchi", 21403317, } m["mkf"] = { "Miya", 43328, "cdc-wst", "Latn", } m["mkg"] = { "Mak (China)", 3280623, "qfa-kms", } m["mki"] = { "Dhatki", 32480, "raj", "Deva, Mahj, Arab", } m["mkj"] = { "Mokilese", 2335528, "poz-mic", "Latn", } m["mkk"] = { "Byep", 35052, "bnt-mka", } m["mkl"] = { "Mokole", 36047, "alv-yor", "Latn", } m["mkm"] = { "Moklen", 3319380, } m["mkn"] = { "Kupang Malay", 18458203, "crp", "Latn", } m["mko"] = { "Mingang Doso", 3915382, "alv-bwj", } m["mkp"] = { "Moikodi", 6894594, "ngf", } m["mkq"] = { "Bay Miwok", 3460957, "nai-utn", "Latn", } m["mkr"] = { "Malas", 11732402, "ngf-mad", } m["mks"] = { "Silacayoapan Mixtec", 7514027, "omq-mxt", "Latn", } m["mkt"] = { "Vamale", 14916907, "poz-cln", "Latn", } m["mku"] = { "Konyanka Maninka", 11163298, "dmn-mnk", } m["mkv"] = { "Mav̋ea", 3073532, "poz-vnn", "Latn", } m["mkx"] = { "Cinamiguin Manobo", 12953697, "mno", "Latn", } m["mky"] = { "East Makian", 3512690, "poz-hce", "Latn", } m["mkz"] = { "Makasae", 35782, "ngf", "Latn", } m["mla"] = { "Tamambo", 1153276, "poz-vnn", "Latn", } m["mlb"] = { "Mbule", 35843, "nic-ymb", "Latn", } m["mlc"] = { "Caolan", 3446682, "tai-cho", "Latn, Hani", sort_key = {Hani = "Hani-sortkey"}, } m["mle"] = { "Manambu", 11732406, "paa-spk", "Latn", } m["mlf"] = { "Mal", 3281057, "mkh-khm", } m["mlh"] = { "Mape", 6753787, } m["mli"] = { "Malimpung", 12473435, } m["mlj"] = { "Miltu", 3441310, } m["mlk"] = { "Ilwana", 6001357, "bnt-sab", } m["mll"] = { "Malua Bay", 6744946, "poz-vnc", "Latn", } m["mlm"] = { "Mulam", 3092284, "qfa-kms", "Latn", } m["mln"] = { "Malango", 3281522, "poz-sls", "Latn", } m["mlo"] = { "Mlomp", 36009, "alv-bak", } m["mlp"] = { "Bargam", 4860543, "ngf-mad", "Latn", } m["mlq"] = { "Western Maninkakan", 11028033, "dmn-wmn", } m["mlr"] = { "Vame", 3515088, "cdc-cbm", "Latn", } m["mls"] = { "Masalit", 56557, "ssa", } m["mlu"] = { "To'abaita", 36645, "poz-sls", "Latn", } m["mlv"] = { "Mwotlap", 2475538, "poz-vnn", "Latn", } m["mlw"] = { "Moloko", 1965222, "cdc-cbm", "Latn", } m["mlx"] = { "Malfaxal", 2157421, "poz-vnc", "Latn", } m["mlz"] = { "Malaynon", 18755512, "phi", } m["mma"] = { "Mama", 3913963, "nic-jrn", } m["mmb"] = { "Momina", 6897297, } m["mmc"] = { "Michoacán Mazahua", 12953705, "oto", "Latn", } m["mmd"] = { "Maonan", 3092293, "qfa-kms", "Latn", } m["mme"] = { "Tirax", 3276286, "poz-vnc", "Latn", } m["mmf"] = { "Mundat", 56263, "cdc-wst", "Latn", } m["mmg"] = { "North Ambrym", 2842468, "poz-vnc", "Latn", } m["mmh"] = { "Mehináku", 3501838, "awd", "Latn", } m["mmi"] = { "Musar", 6940113, "ngf-mad", } m["mmj"] = { "Majhwar", 6737795, } m["mmk"] = { "Mukha-Dora", 6933447, } m["mml"] = { "Man Met", 3194984, "mkh-pal", } m["mmm"] = { "Maii", 6735599, "poz-vnc", "Latn", } m["mmn"] = { "Mamanwa", 3206623, "phi", "Latn", } m["mmo"] = { "Mangga Buang", 12952294, "poz-ocw", "Latn", } m["mmp"] = { "Musan", 2605703, "paa-asa", } m["mmq"] = { "Aisi", 6940074, "ngf-mad", "Latn", } m["mmr"] = { "Western Xiangxi Miao", 3307901, "hmn", "Latn", } m["mmt"] = { "Malalamai", 3281496, "poz-ocw", "Latn", } m["mmu"] = { "Mmaala", 13123461, "nic-ymb", "Latn", } m["mmv"] = { "Miriti", 6873567, "sai-tuc", "Latn", } m["mmw"] = { "Emae", 3051961, "poz-pnp", "Latn", } m["mmx"] = { "Madak", 3275205, "poz-ocw", "Latn", } m["mmy"] = { "Migaama", 56259, "cdc-est", "Latn", } m["mmz"] = { "Mabaale", 11003249, "bnt-ngn", } m["mna"] = { "Mbula", 3303572, "poz-ocw", "Latn", } m["mnb"] = { "Muna", 6935584, "poz-mun", "Latn", } m["mnc"] = { "Manchu", 33638, "tuw-jrc", "mnc-Mong, Latn", ancestors = "juc", translit = "mnc-translit", } m["mnd"] = { "Mondé", 6898840, "tup", "Latn", } m["mne"] = { "Naba", 760732, "csu-bgr", } m["mnf"] = { "Mundani", 35839, "nic-mom", "Latn", } m["mng"] = { "Eastern Mnong", 12953747, "mkh-ban", "Latn, Khmr", } m["mnh"] = { "Mono (Congo)", 33501, "bad-cnt", "Latn", } m["mni"] = { "Manipuri", 33868, "sit", "Mtei, Beng", ancestors = "omp", translit = {Mtei = "Mtei-translit"}, } m["mnj"] = { "Munji", 33639, "ira-mny", "Arab", } m["mnk"] = { "Mandinka", 33678, "dmn-wmn", "Latn, Arab, Nkoo", } m["mnl"] = { "Tiale", 6744350, "poz-vnn", "Latn", } m["mnm"] = { "Mapena", 11732415, } m["mnn"] = { "Southern Mnong", 23857582, "mkh-ban", } m["mnp"] = { "Northern Min", 36457, "zhx-inm", "Hants", generate_forms = "zh-generateforms", translit = "zh-translit", sort_key = "Hani-sortkey", } m["mnq"] = { "Minriq", 2742268, "mkh-asl", "Latn", } m["mnr"] = { "Mono (California)", 33591, "azc-num", "Latn", } m["mnt"] = { "Maykulan", 3915696, "aus-pam", "Latn", } m["mnu"] = { "Mer", 6817854, } m["mnv"] = { "Rennellese", 3397346, "poz-pnp", "Latn", } m["mnw"] = { "Mon", 13349, "mkh-mnc", "Mymr", ancestors = "mkh-mmn", sort_key = { from = {"ျ", "ြ", "ွ", "ှ", "ၞ", "ၟ", "ၠ", "ၚ", "ဿ"}, to = {"္ယ", "္ရ", "္ဝ", "္ဟ", "္န", "္မ", "္လ", "င", "သ္သ"} }, } m["mnx"] = { "Manikion", 3507964, "paa-wpa", } m["mny"] = { "Manyawa", 11002622, "bnt-mak", ancestors = "vmw", } m["mnz"] = { "Migani", 6899857, "ngf", "Latn" } m["moa"] = { "Mwan", 3320111, "dmn-nbe", "Latn", } m["moc"] = { "Mocoví", 3027906, "sai-guc", "Latn", } m["mod"] = { "Mobilian", 13333, "crp", "Latn", ancestors = "cho, cic", } m["moe"] = { "Montagnais", 13351, "alg", "Latn", ancestors = "cr", entry_name = {remove_diacritics = c.macron}, } m["mog"] = { "Mongondow", 3058458, "phi", "Latn", } m["moh"] = { "Mohawk", 13339, "iro-nor", "Latn", ancestors = "iro-omo", } m["moi"] = { "Mboi", 3914417, "alv-yun", } m["moj"] = { "Monzombo", 11154772, "nic-nkk", "Latn", } m["mok"] = { "Morori", 6913275, } m["mom"] = { "Monimbo", 56542, } m["moo"] = { "Monom", 6901726, "mkh-ban", } m["mop"] = { "Mopan Maya", 36183, "myn", "Latn", } m["moq"] = { "Mor (Papuan)", 11732468, "paa", } m["mor"] = { "Moro", 36172, "alv-hei", "Latn", } m["mos"] = { "Moore", 36096, "nic-mre", "Latn", } m["mot"] = { "Barí", 2886281, "cba", "Latn", } m["mou"] = { "Mogum", 3440473, "cdc-est", "Latn", } m["mov"] = { "Mojave", 56510, "nai-yuc", "Latn", } m["mow"] = { "Moi (Congo)", 11124792, "bnt-bmo", "Latn", } m["mox"] = { "Molima", 3319495, "poz-ocw", "Latn", } m["moy"] = { "Shekkacho", 56827, "omv-gon", } m["moz"] = { "Mukulu", 3440403, "cdc-est", } m["mpa"] = { "Mpoto", 6928303, "bnt-mbi", "Latn", } m["mpb"] = { "Mullukmulluk", 6741120, } m["mpc"] = { "Mangarayi", 6748829, } m["mpd"] = { "Machinere", 12953681, "awd", "Latn", } m["mpe"] = { "Majang", 56724, "sdv", } m["mpg"] = { "Marba", 56614, "cdc-mas", } m["mph"] = { "Maung", 6792550, "aus-wdj", "Latn", } m["mpi"] = { "Mpade", 3280670, "cdc-cbm", "Latn", } m["mpj"] = { "Martu Wangka", 3295916, "aus-pam", "Latn", } m["mpk"] = { "Mbara (Chad)", 3912770, "cdc-cbm", } m["mpl"] = { "Middle Watut", 15887910, "poz-ocw", "Latn", } m["mpm"] = { "Yosondúa Mixtec", 12953741, "omq-mxt", "Latn", } m["mpn"] = { "Mindiri", 6863842, "poz-ocw", "Latn", } m["mpo"] = { "Miu", 6883668, "poz-ocw", "Latn", } m["mpp"] = { "Migabac", 11732448, } m["mpq"] = { "Matís", 3299145, "sai-pan", "Latn", } m["mpr"] = { "Vangunu", 3554582, "poz-ocw", "Latn", } m["mps"] = { "Dadibi", 5208077, "ngf", "Latn", } m["mpt"] = { "Mian", 12952846, "ngf-okk", "Latn", } m["mpu"] = { "Makuráp", 3281037, "tup", "Latn", } m["mpv"] = { "Mungkip", 11732485, "ngf-fin", "Latn", } m["mpw"] = { "Mapidian", 6753812, "awd", "Latn", } m["mpx"] = { "Misima-Paneati", 6875666, "poz-ocw", "Latn", } m["mpy"] = { "Mapia", 3287224, "poz-mic", "Latn", } m["mpz"] = { "Mpi", 6928276, "tbq-bka", } m["mqa"] = { "Maba", 3273750, } m["mqb"] = { "Mbuko", 3502213, "cdc-cbm", "Latn", } m["mqc"] = { "Mangole", 6749097, "poz-cma", "Latn", } m["mqe"] = { "Matepi", 11732426, "ngf-mad", } m["mqf"] = { "Momuna", 6897518, } m["mqg"] = { "Kota Bangun Kutai Malay", 12952778, } m["mqh"] = { "Tlazoyaltepec Mixtec", 12953740, "omq-mxt", "Latn", } m["mqi"] = { "Mariri", 6765544, } m["mqj"] = { "Mamasa", 6745452, "poz-ssw", "Latn", } m["mqk"] = { "Rajah Kabunsuwan Manobo", 12953700, "mno", } m["mql"] = { "Mbelime", 4286473, "nic-eov", "Latn", } m["mqm"] = { "South Marquesan", 19694214, "poz-pep", "Latn", } m["mqn"] = { "Moronene", 642581, "poz-btk", "Latn", } m["mqo"] = { "Modole", 11732457, "paa-nha", "Latn", } m["mqp"] = { "Manipa", 6749799, "poz-cma", "Latn", } m["mqq"] = { "Minokok", 18642293, "poz-san", "Latn", } m["mqr"] = { "Mander", 6747979, "paa-tkw", } m["mqs"] = { "West Makian", 3033575, "paa-nha", "Latn", } m["mqt"] = { "Mok", 13018559, "mkh-pal", } m["mqu"] = { "Mandari", 3285426, "sdv-bri", } m["mqv"] = { "Mosimo", 11732478, "ngf-mad", "Latn", } m["mqw"] = { "Murupi", 11732486, "ngf-mad", "Latn", } m["mqx"] = { "Mamuju", 6746004, "poz-ssw", "Latn", } m["mqy"] = { "Manggarai", 3285748, "poz-cet", "Latn", } m["mqz"] = { "Malasanga", 14916889, "poz-ocw", "Latn", } m["mra"] = { "Mlabri", 3073465, "mkh", } m["mrb"] = { "Sungwadia", 3293299, "poz-vnn", "Latn", } m["mrc"] = { "Maricopa", 56386, "nai-yuc", "Latn", } m["mrd"] = { "Western Magar", 22303263, "sit-gma", "Deva", } m["mre"] = { "Martha's Vineyard Sign Language", 33494, "sgn", "Latn, Sgnw", } m["mrf"] = { "Elseng", 3915667, "paa-brd", "Latn", } m["mrg"] = { "Mising", 3316328, "sit-tan", "Latn, Beng, Deva", ancestors = "adi", } m["mrh"] = { "Mara Chin", 4175893, "tbq-kuk", "Latn", } m["mrj"] = { "Western Mari", 1776032, "chm", "Cyrl", translit = "chm-translit", sort_key = "mrj-sortkey", } m["mrk"] = { "Hmwaveke", 5873712, "poz-cln", "Latn", } m["mrl"] = { "Mortlockese", 3324598, "poz-mic", "Latn", } m["mrm"] = { "Mwerlap", 3331115, "poz-vnn", "Latn", } m["mrn"] = { "Cheke Holo", 2962165, "poz-ocw", "Latn", } m["mro"] = { "Mru", 1951521, "sit-mru", "Latn, Mroo", } m["mrp"] = { "Morouas", 6913299, "poz-vnn", "Latn", } m["mrq"] = { "North Marquesan", 2603808, "poz-pep", "Latn", } m["mrr"] = { "Hill Maria", 27602, "dra-mdy", "Deva", } m["mrs"] = { "Maragus", 6754640, "poz-vnc", "Latn", } m["mrt"] = { "Margi", 56241, "cdc-cbm", "Latn", } m["mru"] = { "Mono (Cameroon)", 11031964, "alv-mbm", "Latn", } m["mrv"] = { "Mangarevan", 36237, "poz-pep", "Latn", } m["mrw"] = { "Maranao", 33800, "phi", "Latn, Arab", } m["mrx"] = { "Dineor", 5278044, "paa-tkw", "Latn", } m["mry"] = { "Karaga Mandaya", 6747925, "phi", } m["mrz"] = { "Marind", 6763970, } m["msb"] = { "Masbatenyo", 33948, "phi", "Latn", } m["msc"] = { "Sankaran Maninka", 11155812, "dmn-mnk", } m["msd"] = { "Yucatec Maya Sign Language", 34281, "sgn", "Latn", -- when documented } m["mse"] = { "Musey", 56328, "cdc-mas", } m["msf"] = { "Mekwei", 4544752, "paa-nim", "Latn", } m["msg"] = { "Moraid", 6909020, } m["msi"] = { "Sabah Malay", 10867404, "crp", "Latn, Arab", } m["msj"] = { "Ma", 6720909, "nic-mbc", "Latn", } m["msk"] = { "Mansaka", 12952800, "phi", "Latn", } m["msl"] = { "Molof", 4300950, } m["msm"] = { "Agusan Manobo", 12953696, "mno", "Latn", } m["msn"] = { "Vurës", 3563857, "poz-vnn", "Latn", } m["mso"] = { "Mombum", 6897079, } m["msp"] = { "Maritsauá", 6765915, "tup", "Latn", } m["msq"] = { "Caac", 2932212, "poz-cln", "Latn", } m["msr"] = { "Mongolian Sign Language", 3915499, "sgn", } m["mss"] = { "West Masela", 12952816, "poz-tim", } m["msu"] = { "Musom", 6943041, "poz-ocw", "Latn", } m["msv"] = { "Maslam", 3502273, } m["msw"] = { "Mansoanka", 35814, } m["msx"] = { "Moresada", 11732475, } m["msy"] = { "Aruamu", 3501809, "paa", "Latn", } m["msz"] = { "Momare", 6897030, } m["mta"] = { "Cotabato Manobo", 12953698, "mno", "Latn", } m["mtb"] = { "Anyin Morofo", 3502338, "alv-ctn", "Latn", ancestors = "any", } m["mtc"] = { "Munit", 11732482, "ngf-mad", } m["mtd"] = { "Mualang", 3073458, "poz-mly", "Latn", } m["mte"] = { "Alu", 33503, "poz-ocw", "Latn", } m["mtf"] = { "Murik (New Guinea)", 7050035, "paa-lsp", "Latn", } m["mtg"] = { "Una", 5580728, } m["mth"] = { "Munggui", 6936018, "poz-hce", "Latn", } m["mti"] = { "Maiwa (New Guinea)", 6737223, "ngf", "Latn", } m["mtj"] = { "Moskona", 11288953, } m["mtk"] = { "Mbe'", 10964025, "nic-nka", "Latn", } m["mtl"] = { "Montol", 3440457, "cdc-wst", "Latn", } m["mtm"] = { "Mator", 20669419, "syd", "Cyrl", } m["mtn"] = { "Matagalpa", 3490756, "nai-min", } m["mto"] = { "Totontepec Mixe", 7828400, "nai-miz", "Latn", } m["mtp"] = { "Wichí Lhamtés Nocten", 5908756, "sai-wic", "Latn", } m["mtq"] = { "Muong", 3236789, "mkh-vie", "Latn", sort_key = "vi-sortkey", } m["mtr"] = { "Mewari", 2992857, "raj", "Deva", translit = "hi-translit", -- for now } m["mts"] = { "Yora", 3572572, "sai-pan", "Latn", } m["mtt"] = { "Mota", 3325052, "poz-vnn", "Latn", } m["mtu"] = { "Tututepec Mixtec", 7857069, "omq-mxt", "Latn", } m["mtv"] = { "Asaro'o", 3503684, "ngf-fin", "Latn", } m["mtw"] = { "Magahat", 6729600, "phi", } m["mtx"] = { "Tidaá Mixtec", 7800805, "omq-mxt", "Latn", } m["mty"] = { "Nabi", 6956858, "qfa-tor", "Latn", } m["mua"] = { "Mundang", 36032, "alv-mbm", } m["mub"] = { "Mubi", 3440518, "cdc-est", "Latn", } m["muc"] = { "Mbu'", 35868, "nic-beb", "Latn", } m["mud"] = { "Mednyj Aleut", 1977419, "qfa-mix", ancestors = "ale, ru" } m["mue"] = { "Media Lengua", 36066, "qfa-mix", "Latn", ancestors = "es, qu", } m["mug"] = { "Musgu", 3123545, "cdc-cbm", "Latn", } m["muh"] = { "Mündü", 35981, "nic-nke", "Latn", } m["mui"] = { "Musi", 615660, "poz-mly", "Latn", } m["muj"] = { "Mabire", 3440437, } m["mul"] = { "Translingual", 7834564, "qfa-not", "All", -- NOTE: The following sort keys are used in process_page() in [[Module:headword/page]], which generates -- the default sort key for the page (corresponding to {{DEFAULTSORT:...}}) by generating a sort key for -- the pagename using `makeSortKey()` called on language object "mul". Currently this just handles -- Japanese sort keys. -- -- FIXME: This should be smarter and use the language of the page if there's only one. sort_key = { Hani = "Hani-sortkey", Jpan = "Jpan-sortkey", Hrkt = "Hira-sortkey", -- Sort all kana as Hira. Hira = "Hira-sortkey", Kana = "Hira-sortkey", }, standardChars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz" .. c.punc, } m["mum"] = { "Maiwala", 12952764, "poz-ocw", "Latn", } m["muo"] = { "Nyong", 36373, "alv-lek", } m["mup"] = { "Malvi", 33413, "raj", "Deva", translit = "hi-translit" } m["muq"] = { "Eastern Xiangxi Miao", 27431376, "hmn", } m["mur"] = { "Murle", 56727, "sdv", } m["mus"] = { "Creek", 523014, "nai-mus", "Latn", } m["mut"] = { "Western Muria", 12952886, "dra-mur", } m["muu"] = { "Yaaku", 34222, "cus-eas", } m["muv"] = { "Muthuvan", 3327420, "dra-tam", } m["mux"] = { "Bo-Ung", 15831607, } m["muy"] = { "Muyang", 3502301, "cdc-cbm", "Latn", } m["muz"] = { "Mursi", 36013, "sdv", } m["mva"] = { "Manam", 6746851, "poz-ocw", "Latn", } m["mvb"] = { "Mattole", 20824, "ath-pco", "Latn", } m["mvd"] = { "Mamboru", 578815, "poz", "Latn", } m["mvg"] = { "Yucuañe Mixtec", 25562736, "omq-mxt", "Latn", } m["mvh"] = { "Mire", 3441359, } m["mvi"] = { "Miyako", 36218, "jpx-sry", "Jpan", translit = s["jpx-translit"], display_text = s["jpx-displaytext"], entry_name = s["jpx-entryname"], sort_key = s["jpx-sortkey"], } m["mvk"] = { "Mekmek", 6810592, "paa-yua", } m["mvl"] = { "Mbara (Australia)", 6799620, "aus-pam", } m["mvm"] = { "Muya", 2422759, "sit-qia", } m["mvn"] = { "Minaveha", 6863278, "poz-ocw", "Latn", } m["mvo"] = { "Marovo", 3294683, "poz-ocw", "Latn", } m["mvp"] = { "Duri", 3915414, "poz-ssw", "Latn", } m["mvq"] = { "Moere", 11732458, "ngf-mad", } m["mvr"] = { "Marau", 6755069, "poz-hce", "Latn", } m["mvs"] = { "Massep", 3502895, "paa-tkw", } m["mvt"] = { "Mpotovoro", 6928305, "poz-vnc", "Latn", } m["mvu"] = { "Marfa", 713633, } m["mvv"] = { "Tagal Murut", 7675300, "poz-san", "Latn", } m["mvw"] = { "Machinga", 12952754, "bnt-rvm", } m["mvx"] = { "Meoswar", 6817777, "poz-hce", "Latn", } m["mvy"] = { "Indus Kohistani", 33399, "inc-koh", "Arab", } m["mvz"] = { "Mesqan", 6821677, "sem-eth", } m["mwa"] = { "Mwatebu", 14916896, "poz-ocw", "Latn", } m["mwb"] = { "Juwal", 6319103, "qfa-tor", "Latn", } m["mwc"] = { "Are", 29277, "poz-ocw", "Latn", } m["mwe"] = { "Mwera", 6944725, "bnt-rvm", "Latn", } m["mwf"] = { "Murrinh-Patha", 2980398, "aus-dal", "Latn", } m["mwg"] = { "Aiklep", 3399652, "poz-ocw", "Latn", } m["mwh"] = { "Mouk-Aria", 3325498, "poz-ocw", "Latn", } m["mwi"] = { "Labo", 2157452, "poz-vnc", "Latn", } m["mwk"] = { "Kita Maninkakan", 3015523, "dmn-wmn", } m["mwl"] = { "Mirandese", 13330, "roa-asl", "Latn", } m["mwm"] = { "Sar", 56850, "csu-sar", "Latn", } m["mwn"] = { "Nyamwanga", 6944666, "bnt-mwi", "Latn", } m["mwo"] = { "Sungwadaga", 3276435, "poz-vnn", "Latn", } m["mwp"] = { "Kala Lagaw Ya", 2591262, "aus-pam", "Latn", } m["mwq"] = { "Mün Chin", 331340, "tbq-kuk", } m["mwr"] = { "Marwari", 56312, "raj", "Deva, Mahj", translit = { Deva = "hi-translit", -- for now Mahj = "Mahj-translit", }, } m["mws"] = { "Mwimbi-Muthambi", 15632357, "bnt-kka", "Latn", } m["mwt"] = { "Moken", 18648701, "poz", } m["mwu"] = { "Mittu", 6883573, "csu-bbk", "Latn", } m["mwv"] = { "Mentawai", 13365, "poz-nws", "Latn", } m["mww"] = { "White Hmong", 3138829, "hmn", "Latn, Hmng, Hmnp", } m["mwz"] = { "Moingi", 11011905, } m["mxa"] = { "Northwest Oaxaca Mixtec", 12953739, "omq-mxt", "Latn", } m["mxb"] = { "Tezoatlán Mixtec", 3317286, "omq-mxt", "Latn", } m["mxd"] = { "Modang", 6888037, "poz", "Latn", } m["mxe"] = { "Mele-Fila", 3305008, "poz-pnp", "Latn", } m["mxf"] = { "Malgbe", 3502224, } m["mxg"] = { "Mbangala", 6799612, "bnt-yak", } m["mxh"] = { "Mvuba", 6944591, "csu-mle", "Latn", } m["mxi"] = { "Mozarabic", 317044, "roa-ibe", "Arab, Hebr, Latn", translit = "mxi-translit", display_text = { Hebr = "Hebr-common", }, entry_name = { Arab = "ar-entryname", Hebr = "Hebr-common", }, sort_key = { Hebr = "Hebr-common", }, } m["mxj"] = { "Miju", 56332, "sit-mdz", "Latn, Deva", } m["mxk"] = { "Monumbo", 6906792, "qfa-tor", } m["mxl"] = { "Maxi Gbe", 35770, "alv-gbe", } m["mxm"] = { "Meramera", 6817936, "poz-ocw", "Latn", } m["mxn"] = { "Moi (Indonesia)", 11732459, "paa", } m["mxo"] = { "Mbowe", 10962309, "bnt-kav", } m["mxp"] = { "Tlahuitoltepec Mixe", 7810697, } m["mxq"] = { "Juquila Mixe", 25559721, } m["mxr"] = { "Murik (Malaysia)", 3328150, nil, "Latn", } m["mxs"] = { "Huitepec Mixtec", 12953729, "omq-mxt", "Latn", } m["mxt"] = { "Jamiltepec Mixtec", 12953730, "omq-mxt", "Latn", } m["mxu"] = { "Mada (Cameroon)", 3441206, "cdc-cbm", "Latn", } m["mxv"] = { "Metlatónoc Mixtec", 36363, "omq-mxt", "Latn", } m["mxw"] = { "Namo", 12952923, } m["mxx"] = { "Mahou", 11004334, "dmn-mnk", "Latn, Nkoo", } m["mxy"] = { "Southeastern Nochixtlán Mixtec", 7070684, "omq-mxt", "Latn", } m["mxz"] = { "Central Masela", 42575433, "poz-tim", "Latn", } m["myb"] = { "Mbay", 3033565, "csu-sar", "Latn", } m["myc"] = { "Mayeka", 11129517, "bnt-boa", } m["mye"] = { "Myene", 35832, "bnt-tso", "Latn", } m["myf"] = { "Bambassi", 56540, "omv-mao", "Latn", } m["myg"] = { "Manta", 35799, "nic-mom", "Latn", } m["myh"] = { "Makah", 3280640, "wak", "Latn", } m["myj"] = { "Mangayat", 35988, "nic-ser", } m["myk"] = { "Mamara Senoufo", 36187, "alv-sma", "Latn", } m["myl"] = { "Moma", 6897018, "poz", "Latn", } m["mym"] = { "Me'en", 3408516, "sdv", } m["myo"] = { "Anfillo", 34928, "omv-gon", } m["myp"] = { "Pirahã", 33825, "sai-mur", "Latn", } m["myr"] = { "Muniche", 3915654, } m["mys"] = { "Mesmes", 3508617, "sem-eth", } m["myu"] = { "Mundurukú", 746723, "tup", "Latn", } m["myv"] = { "Erzya", 29952, "urj-mdv", "Cyrl", translit = "myv-translit", override_translit = true, } m["myw"] = { "Muyuw", 3502878, "poz-ocw", "Latn", } m["myx"] = { "Masaba", 12952814, "bnt-msl", "Latn", } m["myy"] = { "Macuna", 3275059, "sai-tuc", "Latn", } m["myz"] = { "Classical Mandaic", 25559314, "sem-ase", "Mand", translit = { Mand = "Mand-translit", }, entry_name = { Mand = "Mand-entryname", } } m["mza"] = { "Santa María Zacatepec Mixtec", 8063756, "omq-mxt", "Latn", } m["mzb"] = { "Northern Saharan Berber", 11156769, "ber", "Arab, Latn, Tfng", } m["mzc"] = { "Madagascar Sign Language", 12715020, "sgn", } m["mzd"] = { "Malimba", 35806, "bnt-saw", } m["mze"] = { "Morawa", 6909384, } m["mzg"] = { "Monastic Sign Language", 3217333, "sgn", } m["mzh"] = { "Wichí Lhamtés Güisnay", 7998197, "sai-wic", "Latn", } m["mzi"] = { "Ixcatlán Mazatec", 6101049, "omq-maz", "Latn", } m["mzj"] = { "Manya", 11006832, "dmn-mnk", } m["mzk"] = { "Nigeria Mambila", 11004163, "nic-mmb", "Latn", } m["mzl"] = { "Mazatlán Mixe", 25559728, } m["mzm"] = { "Mumuye", 36021, "alv-mum", "Latn", } m["mzn"] = { "Mazanderani", 13356, "ira-msh", "mzn-Arab", } m["mzo"] = { "Matipuhy", 6787588, "sai-kui", "Latn", } m["mzp"] = { "Movima", 1659701, "qfa-iso", "Latn", } m["mzq"] = { "Mori Atas", 3324070, "poz-btk", "Latn", } m["mzr"] = { "Marúbo", 3296011, "sai-pan", "Latn", } m["mzs"] = { "Macanese", 35785, "crp", "Latn", ancestors = "pt", sort_key = {Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.cedilla}}, } m["mzt"] = { "Mintil", 6869641, "mkh-asl", } m["mzu"] = { "Inapang", 6013569, "paa", "Latn", } m["mzv"] = { "Manza", 36038, "gba-eas", } m["mzw"] = { "Deg", 35183, "nic-gnw", "Latn", } m["mzx"] = { "Mawayana", 6794377, "awd", } m["mzy"] = { "Mozambican Sign Language", 6927809, "sgn", } m["mzz"] = { "Maiadomu", 6735234, "poz-ocw", "Latn", } return require("Module:languages").finalizeData(m, "language") 6t3zj8qk724h265jjcektcc5wiix9ml