Wiktionary siwiktionary https://si.wiktionary.org/wiki/%E0%B7%80%E0%B7%92%E0%B6%9A%E0%B7%8A%E0%B7%82%E0%B6%B1%E0%B6%BB%E0%B7%92:%E0%B6%B8%E0%B7%94%E0%B6%BD%E0%B7%8A_%E0%B6%B4%E0%B7%92%E0%B6%A7%E0%B7%94%E0%B7%80 MediaWiki 1.46.0-wmf.24 case-sensitive මාධ්‍යය විශේෂ සාකච්ඡාව පරිශීලක පරිශීලක සාකච්ඡාව වික්ෂනරි වික්ෂනරි සාකච්ඡාව ගොනුව ගොනුව සාකච්ඡාව මාධ්‍යවිකි මාධ්‍යවිකි සාකච්ඡාව සැකිල්ල සැකිලි සාකච්ඡාව උදවු උදවු සාකච්ඡාව ප්‍රවර්ගය ප්‍රවර්ග සාකච්ඡාව TimedText TimedText talk Module Module talk Event Event talk Module:documentation 828 5004 231492 222216 2026-04-15T08:34:52Z en>Surjection 0 Changed protection settings for "[[Module:documentation]]" ([Edit=Allow only template editors and administrators] (indefinite) [Move=Allow only template editors and administrators] (indefinite)) 231492 Scribunto text/plain local export = {} local array_module = "Module:array" local debug_track_module = "Module:debug/track" local frame_module = "Module:frame" local fun_is_callable_module = "Module:fun/isCallable" local languages_module = "Module:languages" local links_module = "Module:links" local load_module = "Module:load" local module_categorization_module = "Module:module categorization" local number_list_show_module = "Module:number list/show" local pages_module = "Module:pages" local parameters_module = "Module:parameters" local scripts_module = "Module:scripts" local string_endswith_module = "Module:string/endswith" local string_gline_module = "Module:string/gline" local string_insert_module = "Module:string/insert" local string_startswith_module = "Module:string/startswith" local string_utilities_module = "Module:string utilities" local template_parser_module = "Module:template parser" local title_exists_module = "Module:title/exists" local title_new_title_module = "Module:title/newTitle" local concat = table.concat local error = error local full_url = mw.uri.fullUrl local get_current_title = mw.title.getCurrentTitle local insert = table.insert local ipairs = ipairs local list_to_text = mw.text.listToText local new_message = mw.message.new local pcall = pcall local require = require local tonumber = tonumber local tostring = tostring local type = type local unpack = unpack or table.unpack -- Lua 5.2 compatibility local function Array(...) Array = require(array_module) return Array(...) end local function categorize_module(...) categorize_module = require(module_categorization_module).categorize return categorize_module(...) end local function debug_track(...) debug_track = require(debug_track_module) return debug_track(...) end local function endswith(...) endswith = require(string_endswith_module) return endswith(...) end local function expand_template(...) expand_template = require(frame_module).expandTemplate return expand_template(...) end local function find_templates(...) find_templates = require(template_parser_module).find_templates return find_templates(...) end local function full_link(...) full_link = require(links_module).full_link return full_link(...) end local function get_lang(...) get_lang = require(languages_module).getByCode return get_lang(...) end local function get_pagetype(...) get_pagetype = require(pages_module).get_pagetype return get_pagetype(...) end local function get_script(...) get_script = require(scripts_module).getByCode return get_script(...) end local function gline(...) gline = require(string_gline_module) return gline(...) end local function is_callable(...) is_callable = require(fun_is_callable_module) return is_callable(...) end local function is_documentation(...) is_documentation = require(pages_module).is_documentation return is_documentation(...) end local function is_sandbox(...) is_sandbox = require(pages_module).is_sandbox return is_sandbox(...) end local function new_title(...) new_title = require(title_new_title_module) return new_title(...) end local function number_list_show_table(...) number_list_show_table = require(number_list_show_module).table return number_list_show_table(...) end local function preprocess(...) preprocess = require(frame_module).preprocess return preprocess(...) end local function process_params(...) process_params = require(parameters_module).process return process_params(...) end local function safe_load_data(...) safe_load_data = require(load_module).safe_load_data return safe_load_data(...) end local function split(...) split = require(string_utilities_module).split return split(...) end local function startswith(...) startswith = require(string_startswith_module) return startswith(...) end local function string_insert(...) string_insert = require(string_insert_module) return string_insert(...) end local function title_exists(...) title_exists = require(title_exists_module) return title_exists(...) end local function ugsub(...) ugsub = require(string_utilities_module).gsub return ugsub(...) end local function umatch(...) umatch = require(string_utilities_module).match return umatch(...) end local skins = { ["common"] = "", ["vector"] = "Vector", ["monobook"] = "Monobook", ["cologneblue"] = "Cologne Blue", ["modern"] = "Modern", } local function track(page) debug_track("documentation/" .. page) return true end local function compare_pages(page1, page2, text) return "[" .. tostring( full_url("Special:ComparePages", { page1 = page1, page2 = page2 })) .. " " .. text .. "]" end -- Avoid transcluding [[Module:languages/cache]] everywhere. local lang_cache = setmetatable({}, { __index = function(self, k) return require("Module:languages/cache")[k] end }) local function zh_link(word) return full_link { lang = lang_cache.zh, term = word } end local function make_languages_data_documentation(title, cats, division) local doc_template, module_cat if endswith(division, "/extra") then division = division:sub(1, -7) doc_template = "language extradata documentation" module_cat = "Language extra data modules" else doc_template = "language data documentation" module_cat = "Language data modules" end local sort_key if division == "exceptional" then sort_key = "x" else sort_key = division:gsub("/", "") end cats:insert(module_cat .. "|" .. sort_key) return { title = doc_template } end local function make_Unicode_data_documentation(title, cats) local subpage, first_three_of_code_point = title.fullText:match("^Module:Unicode data/([^/]+)/(%x%x%x)$") if subpage == "names" or subpage == "images" or subpage == "emoji images" then local low, high = tonumber(first_three_of_code_point .. "000", 16), tonumber(first_three_of_code_point .. "FFF", 16) local text, text_type if subpage == "names" then text_type = "titles of images" elseif subpage == "images" then text_type = "titles of images" elseif subpage == "emoji images" then text_type = "emoji-style images" end text = string.format( "This data module contains the " .. text_type .. " of " .. "[[Appendix:Unicode|Unicode]] code points within the range U+%04X to U+%04X.", low, high) if subpage == "images" and safe_load_data("Module:Unicode data/emoji images/" .. first_three_of_code_point) then text = text .. " This list includes the text variants of emojis. For the list of emoji variants of those characters, see [[Module:Unicode data/emoji images/" .. first_three_of_code_point .. "]]." elseif subpage == "emoji images" then text = text .. " For text-style images, see [[Module:Unicode data/images/" .. first_three_of_code_point .. "]]." end return text end end local function insert_lang_data_module_cats(cats, langcode, overall_data_module_cat) local lang = lang_cache[langcode] if lang then local langname if lang._fullCode then langname = lang_cache[lang._fullCode]:getCanonicalName() else langname = lang:getCanonicalName() end cats:insert(overall_data_module_cat .. "|" .. langname) cats:insert(langname .. " modules") cats:insert(langname .. " data modules") return lang, langname end end --[=[ This provides categories and documentation for various data modules, so that [[Category:Uncategorized modules]] isn't unnecessarily cluttered. It is a list of tables, each of which have the following possible fields: `regex` (required): A Lua pattern to match the module's title. If it matches, the data in this entry will be used. Any captures in the pattern can by referenced in the `cat` field using %1 for the first capture, %2 for the second, etc. (often used for creating the sortkey for the category). In addition, the captures are passed to the `process` function as the third and subsequent parameters. `process` (optional): This may be a function or a string. If it is a function, it is called as follows: `process(TITLE, CATS, CAPTURE1, CAPTURE2, ...)` where: * TITLE is a title object describing the module's title; see [https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Title_objects]. * CATS is an array object (see [[Module:array]]) of categories that the module will be added to. * CAPTURE1, CAPTURE2, ... contain any captures in the `regex` field. The return value of `process` should either be a string (which will be used as the module's documentation), or a table specifying the name of a template to expand to get the documentation, along with the arguments to that template. In the latter format, the template name (bare, without the "Template:" prefix) should be in the `title` field, and any arguments should be in `args; in this case, the template name will be listed above the generated documentation as the source of the documentation, along with an edit button to edit the template's contents. If, however, the return value of the `process` function is a string, any template invocations will be expanded using frame:preprocess(), and [[Module:documentation]] will be listed as the source of the documentation. If `process` itself is a string rather than a function, it should name a submodule under [[Module:documentation/functions/]] which returns a function, of the same type as described above. This submodule will be specified as the source of the documentation (unless it returns a table naming a template to expand to get the documentation, as described above). If `process` is omitted entirely, the module will have no documentation. `cat` (optional): A string naming the category into which the module should be placed, or a list of such strings. Captures specified in `regex` may be referenced in this string using %1 for the first capture, %2 for the second, etc. It is also possible to add categories in the `process` function by inserting them into the passed-in CATS array (the second parameter). ]=] local module_regex = { { regex = "^Module:languages/data/(3/%l/extra)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/data/(3/%l)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/data/(2/extra)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/data/(2)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/data/(exceptional/extra)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/data/(exceptional)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/.+$", cat = "Language and script modules", }, { regex = "^Module:scripts/.+$", cat = "Language and script modules", }, { regex = "^Module:data tables/data..?.?.?$", cat = "Reference module sharded data tables", }, { regex = "^Module:zh/data/dial%-pron/.+$", cat = "Chinese dialectal pronunciation data modules", process = "zh dial or syn", }, { regex = "^Module:zh/data/dial%-syn/.+$", cat = "Chinese dialect synonyms data modules", process = "zh dial or syn", }, { regex = "^Module:zh/data/glyph%-data/.+$", cat = "Chinese historical character forms data modules", process = function(title, cats) local character = title.fullText:match("^Module:zh/data/glyph%-data/(.+)") if character then return ("This module contains data on historical forms of the Chinese character %s.") :format(zh_link(character)) end end, }, { regex = "^Module:zh/data/ltc%-pron/(.+)$", cat = "Middle Chinese pronunciation data modules|%1", process = "zh data", }, { regex = "^Module:zh/data/och%-pron%-BS/(.+)$", cat = "Old Chinese (Baxter-Sagart) pronunciation data modules|%1", process = "zh data", }, { regex = "^Module:zh/data/och%-pron%-ZS/(.+)$", cat = "Old Chinese (Zhengzhang) pronunciation data modules|%1", process = "zh data", }, { -- capture rest of zh/data submodules regex = "^Module:zh/data/(.+)$", cat = "Chinese data modules|%1", }, { regex = "^Module:mul/guoxue%-data/cjk%-?(.*)$", process = "guoxue-data", }, { regex = "^Module:Unicode data/(.+)$", cat = "Unicode data modules|%1", process = make_Unicode_data_documentation, }, { regex = "^Module:number list/data/(.+)$", process = function(title, cats, lang_code) local lang = insert_lang_data_module_cats(cats, lang_code, "Number data modules") if lang then return ("This module contains data on various types of numbers in %s.\n%s") :format(lang:makeCategoryLink(), number_list_show_table() or "") end end, }, { regex = "^Module:accel/(.+)$", process = function(title, cats) local lang_code = title.subpageText local lang = lang_cache[lang_code] if lang then cats:insert(lang:getCanonicalName() .. " modules|accel") cats:insert(("Accel submodules|%s"):format(lang:getCanonicalName())) return ("This module contains new entry creation rules for %s; see [[WT:ACCEL]] for an overview, and [[Module:accel]] for information on creating new rules.") :format(lang:makeCategoryLink()) end end, }, { regex = "^Module:inc%-ash/dial/data/(.+)$", cat = "Ashokan Prakrit modules|%1", process = function(title, cats) local word = title.fullText:match("^Module:inc%-ash/dial/data/(.+)$") if word then local lang = lang_cache["inc-ash"] return ("This module contains data on the pronunciation of %s in dialects of %s.") :format(full_link({ term = word, lang = lang }, "term"), lang:makeCategoryLink()) end end, }, { regex = "^.+%-translit$", process = "translit", }, { regex = "^Module:form of/lang%-data/(.+)$", process = function(title, cats, lang_code) local lang, langname = insert_lang_data_module_cats(cats, lang_code, "Language-specific form-of modules") if lang then -- FIXME, display more info. return "This module contains language-specific form-of data (tags, shortcuts, base lemma params. etc.) for " .. langname .. "." end end }, { regex = "^Module:labels/data/lang/(.+)$", process = function(title, cats, lang_code) local lang = insert_lang_data_module_cats(cats, lang_code, "Language-specific label data modules") if lang then return { title = "label language-specific data documentation", args = { [1] = lang_code }, } end end }, { regex = "^Module:category tree/lang/(.+)$", process = function(title, cats, lang_code) local lang, langname = insert_lang_data_module_cats(cats, lang_code, "Category tree data modules/lang") if lang then return "This module handles generating the descriptions and categorization for " .. langname .. " category pages " .. "of the format \"" .. langname .. " LABEL\" where LABEL can be any text. Examples are " .. "[[:Category:Bulgarian conjugation 2.1 verbs]] and [[:Category:Russian velar-stem neuter-form nouns]]. " .. "This module is part of the category tree system, which is a general framework for generating the " .. "descriptions and categorization of category pages.\n\n" .. "For more information, see [[Module:category tree/lang/documentation]].\n\n" .. "'''NOTE:''' If you add a new language-specific module, you must add the language code to the " .. "list at the top of [[Module:category tree/lang]] in order for the module to be recognized." end end }, { regex = "^Module:category tree/topic/(.+)$", process = function(title, cats, submodule) cats:insert("Category tree data modules/topic| ") return { title = "topic cat data submodule documentation" } end }, { regex = "^Module:category tree/(.+)$", process = function(title, cats, submodule) cats:insert("Category tree data modules| ") return { title = "category tree data submodule documentation" } end }, { regex = "^Module:ja/data/(.+)$", cat = "Japanese data modules|%1", }, { regex = "^Module:fi%-dialects/data/feature/Kettunen1940 ([0-9]+)$", cat = "Finnish dialectal data atlas modules|%1", process = function(title, cats, shard) return "This module contains shard " .. shard .. " of the online version of Lauri Kettunen's 1940 work " .. "''Suomen murteet III A. Murrekartasto'' (\"Finnish dialects III A: Dialect atlas\"). " .. "It was imported and converted from urn:nbn:fi:csc-kata20151130145346403821, published by the " .. "''Kotimaisten kielten keskus'' under the CC BY 4.0 license." end }, { regex = "^Module:fi%-dialects/data/feature/(.+)", cat = "Finnish dialectal data modules|%1", }, { regex = "^Module:fi%-dialects/data/word/(.+)", cat = "Finnish dialectal data modules|%1", }, { regex = "^Module:Swadesh/data/([%l-]+)$", process = function(title, cats, lang_code) local lang, langname = insert_lang_data_module_cats(cats, lang_code, "Swadesh modules") if lang then return "This module contains the [[Swadesh list]] of basic vocabulary in " .. langname .. "." end end }, { regex = "^Module:Swadesh/data/([%l-]+)/([^/]*)$", process = function(title, cats, lang_code, variety) local lang, langname = insert_lang_data_module_cats(cats, lang_code, "Swadesh modules") if lang then local prefix = "This module contains the [[Swadesh list]] of basic vocabulary in the " local etym_lang = get_lang(variety, nil, "allow etym") if etym_lang then return ("%s %s variety of %s."):format(prefix, etym_lang:getCanonicalName(), langname) end local script = get_script(variety) if script then return ("%s %s %s script."):format(prefix, langname, script:getCanonicalName()) end return ("%s %s variety of %s."):format(prefix, variety, langname) end end }, { regex = "^Module:typing%-aids", process = function(title, cats) local data_suffix = title.fullText:match("^Module:typing%-aids/data/(.+)$") local sortkey if data_suffix then if data_suffix:find "^[%l-]+$" then local lang = get_lang(data_suffix) if lang then sortkey = lang:getCanonicalName() cats:insert(sortkey .. " data modules") end elseif data_suffix:find "^%u%l%l%l$" then local script = get_script(data_suffix) if script then sortkey = script:getCanonicalName() cats:insert(script:getCategoryName()) end end cats:insert("Character insertion data modules|" .. (sortkey or data_suffix)) end end, }, { regex = "^Module:R:([%l-]+):(.+)$", process = function(title, cats, lang_code, refname) local lang = lang_cache[lang_code] if lang then cats:insert(lang:getCanonicalName() .. " modules|" .. refname) cats:insert(("Reference modules|%s"):format(lang:getCanonicalName())) return "This module implements the reference template {{temp|R:" .. lang_code .. ":" .. refname .. "}}." end end, }, { regex = "^Module:Quotations/([%l-]+)/?(.*)", process = "Quotation", }, { regex = "^Module:affix/lang%-data/([%l-]+)", process = "affix lang-data", }, { regex = "^Module:dialect synonyms/([%l-]+)$", process = function(title, cats, lang_code) local lang = lang_cache[lang_code] if lang then local langname = lang:getCanonicalName() cats:insert("Dialect synonyms data modules|" .. langname) cats:insert(langname .. " dialect synonyms data modules| ") return "This module contains data on specific varieties of " .. langname .. ", for use by " .. "{{tl|dialect synonyms}}. The actual synonyms themselves are contained in submodules.\n\n" .. "==== Language data module structure ====\n" .. "* <code>export.title</code> — optional; table title template (e.g. \"Regional synonyms of %s\").\n" .. "* <code>export.columns</code> — optional; list of column headers for location hierarchy (e.g. {\"Dialect group\", \"Dialect\", \"Location\"}).\n" .. "* <code>export.notes</code> — optional; table of note keys to text.\n" .. "* <code>export.sources</code> — optional; table of source keys to text.\n" .. "* <code>export.note_aliases</code> — optional; alias map for notes.\n" .. "* <code>export.varieties</code> — required; nested table of variety nodes. Each node must have <code>name</code>; array part holds children. Node keys can include <code>text_display</code>, <code>color</code>, <code>code</code>, <code>wikidata</code>, <code>lat</code>, <code>long</code>, and language-specific keys (e.g. <code>persian</code>, <code>armenian</code>, <code>chinese</code>).\n\n" .. expand_template({ title = 'dial syn', args = { lang_code, ["demo mode"] = "y" } }) end end, }, { regex = "^Module:dialect synonyms/([%l-]+)/([^/]+)$", process = function(title, cats, lang_code, term) local lang = lang_cache[lang_code] if lang then local langname = lang:getCanonicalName() cats:insert("Dialect synonyms data modules|" .. langname) cats:insert(langname .. " dialect synonyms data modules|" .. term) return ("%s\n\n%s"):format( "==== Term/sense module structure ====\n" .. "* <code>export.title</code> — optional; custom table title (e.g. \"Realization of 'strong R' between vowels\"). Overrides the language default.\n" .. "* <code>export.meaning</code> — optional; meaning/gloss (alternative to <code>gloss</code>).\n" .. "* <code>export.gloss</code> — optional; short meaning for the table.\n" .. "* <code>export.note</code> — optional; single note key or string, or list of note keys.\n" .. "* <code>export.notes</code> — optional; list of note keys.\n" .. "* <code>export.source</code> / <code>export.sources</code> — optional; source keys.\n" .. "* <code>export.last_column</code> — optional; label for the data column (default \"Words\"; e.g. \"Realization\").\n" .. "* <code>export.syns</code> — required; table mapping variety/location names (keys from the language data module) to a list of term entries. Each entry can be a string or a table (e.g. <code>{ ipa = \"[ɽ]\" }</code> or <code>{ term = \"word\" }</code>).\n\n" .. "Example (custom title and data column, IPA realizations):\n" .. "<pre>\nlocal export = {}\n\nexport.title = \"Realization of 'strong R' between vowels\"\n" .. "export.meaning = \"\"\nexport.note = \"realization of 'strong R' between vowels\"\n" .. "export.last_column = \"Realization\"\n\nexport.syns = {\n\t[\"ALERS-158\"] = { { ipa = \"[ɽ]\" } },\n\t[\"ALERS-175\"] = { { ipa = \"[x]\" } },\n}\n\nreturn export\n</pre>\n\n", expand_template({ title = 'dial syn', args = { lang_code, term } })) end end, }, { regex = "^Module:dialect synonyms/([%l-]+)/([^/]+)/([^/]+)$", process = function(title, cats, lang_code, term, id) local lang = lang_cache[lang_code] if lang then local langname = lang:getCanonicalName() cats:insert("Dialect synonyms data modules|" .. langname) cats:insert(langname .. " dialect synonyms data modules|" .. term) return ("%s\n\n%s"):format( "==== Term/sense module structure ====\n" .. "* <code>export.title</code> — optional; custom table title (e.g. \"Realization of 'strong R' between vowels\"). Overrides the language default.\n" .. "* <code>export.meaning</code> — optional; meaning/gloss (alternative to <code>gloss</code>).\n" .. "* <code>export.gloss</code> — optional; short meaning for the table.\n" .. "* <code>export.note</code> — optional; single note key or string, or list of note keys.\n" .. "* <code>export.notes</code> — optional; list of note keys.\n" .. "* <code>export.source</code> / <code>export.sources</code> — optional; source keys.\n" .. "* <code>export.last_column</code> — optional; label for the data column (default \"Words\"; e.g. \"Realization\").\n" .. "* <code>export.syns</code> — required; table mapping variety/location names (keys from the language data module) to a list of term entries. Each entry can be a string or a table (e.g. <code>{ ipa = \"[ɽ]\" }</code> or <code>{ term = \"word\" }</code>).\n\n" .. "Example (custom title and data column, IPA realizations):\n" .. "<pre>\nlocal export = {}\n\nexport.title = \"Realization of 'strong R' between vowels\"\n" .. "export.meaning = \"\"\nexport.note = \"realization of 'strong R' between vowels\"\n" .. "export.last_column = \"Realization\"\n\nexport.syns = {\n\t[\"ALERS-158\"] = { { ipa = \"[ɽ]\" } },\n\t[\"ALERS-175\"] = { { ipa = \"[x]\" } },\n}\n\nreturn export\n</pre>\n\n", expand_template({ title = 'dial syn', args = { lang_code, term, id = id } })) end end, }, { regex = "^Module:bibliography/data/([%l-]+)$", process = function(title, cats, lang_code) if lang_code == "preload" then return 'Used as a base model for other languages when the button "create new language submodule" is clicked.' end local page = require(title.fullText).bib_page if not page then page = lang_cache[lang_code]:getCanonicalName() if page then cats:insert(page .. " modules") end end cats:insert("Reference modules") return "This module holds bibliographical data for " .. page .. ". For the formatted bibliography see '''[[Appendix:Bibliography/" .. page .. "]]'''." end, }, } function export.show(frame) local boolean_default_false = { type = "boolean", default = false } local args = process_params(frame.args, { ["hr"] = true, ["for"] = true, ["from"] = true, ["allowondoc"] = boolean_default_false, -- Don't throw an error if used on a documentation subpage. ["notsubpage"] = boolean_default_false, ["nodoc"] = boolean_default_false, ["nolinks"] = boolean_default_false, -- suppress all "Useful links" ["nosandbox"] = boolean_default_false, -- supress sandbox }) local output = Array('\n<div class="documentation" style="display:block; clear:both">\n') local cats = Array() local nodoc = args.nodoc if (not args.hr) or (args.hr == "above") then output:insert("----\n") end local title = args["for"] and new_title(args["for"]) or get_current_title() local doc_title = args.from ~= "-" and new_title(args.from or title.fullText .. '/documentation') or nil local contentModel = title.contentModel local pagetype, is_script_or_stylesheet = get_pagetype(title) local preload, fallback_docs, doc_content, old_doc_title, user_name, skin_name, needs_doc local doc_content_source = "Module:documentation" local auto_generated_cat_source local cats_auto_generated = false if not args.allowondoc and is_documentation(title) then -- TODO: merge with {{documentation subpage}}, and choose behaviour based on the page type. error("This template should not be used on a documentation page. Please use [[Template:documentation subpage]].") elseif is_sandbox(title) then local sandbox_ns = title.nsText preload = ("Template:documentation/preload%s%sSandbox"):format( sandbox_ns == "Module" and sandbox_ns or "Template", title.rootText:match("^[Uu]ser:(.+)") and "User" or "" ) elseif pagetype:match("%f[%w]gadget%f[%W]") then preload = "Template:documentation/preloadGadget" elseif pagetype:match("%f[%w]script%f[%W]") then -- .js if title.nsText == "MediaWiki" then preload = "Template:documentation/preloadMediaWikiJavaScript" else preload = "Template:documentation/preloadTemplate" -- XXX if title.nsText == "User" then user_name = title.rootText end end is_script_or_stylesheet = true elseif pagetype:match("%f[%w]stylesheet%f[%W]") then -- .css preload = "Template:documentation/preloadTemplate" -- XXX if title.nsText == "User" then user_name = title.rootText end is_script_or_stylesheet = true elseif contentModel == "Scribunto" then -- Exclude pages in Module: which aren't Scribunto. preload = "Template:documentation/preloadModule" elseif pagetype:match("%f[%w]template%f[%W]") or pagetype:match("%f[%w]project%f[%W]") then preload = "Template:documentation/preloadTemplate" end if doc_title and doc_title.isRedirect then old_doc_title = doc_title doc_title = doc_title.redirectTarget end output:insert("<dl class=\"plainlinks\" style=\"font-size: smaller;\">") local function get_module_doc_and_cats(categories_only) cats_auto_generated = true local automatic_cats = nil if user_name then fallback_docs = "documentation/fallback/user module" automatic_cats = { "User sandbox modules" } else for _, data in ipairs(module_regex) do local captures = { umatch(title.fullText, data.regex) } if #captures > 0 then local cat, process_function if is_callable(data.process) then process_function = data.process elseif type(data.process) == "string" then doc_content_source = "Module:documentation/functions/" .. data.process process_function = require(doc_content_source) end if process_function then doc_content = process_function(title, cats, unpack(captures)) end if type(doc_content) == "table" then doc_content_source = doc_content.title and "Template:" .. doc_content.title or doc_content_source doc_content = expand_template(doc_content) elseif doc_content ~= nil then doc_content = preprocess(doc_content) end cat = data.cat if cat then if type(cat) == "string" then cat = { cat } end for _, c in ipairs(cat) do insert(cats, (ugsub(title.fullText, data.regex, c))) end end break end end end if title.subpageText == "templates" then cats:insert("Template interface modules") end if automatic_cats then for _, c in ipairs(automatic_cats) do cats:insert(c) end end if #cats == 0 then local auto_cats = categorize_module(frame, "return raw", "noerror") if #auto_cats > 0 then auto_generated_cat_source = "Module:module categorization" end for _, category in ipairs(auto_cats) do cats:insert(category) end end -- meaning module is not in user’s sandbox or one of many datamodule boring series needs_doc = not categories_only and not (automatic_cats or doc_content or fallback_docs) end -- Override automatic documentation, if present. if doc_title and doc_title.exists then local cats_auto_generated_text = "" if contentModel == "Scribunto" then local doc_page_content = doc_title.content -- Track then do nothing if there are uses of includeonly. The -- pattern is slightly too permissive, but any false-positives are -- obvious typos that should be corrected. if doc_page_content:lower():match("</?includeonly%f[%s/>][^>]*>") then track("module-includeonly") else -- Check for uses of {{module cat}}. find_templates treats the -- input as transcluded by default (i.e. it parses the wikitext -- which will be transcluded through to the module page). local module_cat for template in find_templates(doc_page_content) do if template:get_name() == "module cat" then module_cat = true break end end if not module_cat then get_module_doc_and_cats("categories only") auto_generated_cat_source = auto_generated_cat_source or doc_content_source cats_auto_generated_text = " Categories were auto-generated by [[" .. auto_generated_cat_source .. "]]. <sup>[[" .. new_title(auto_generated_cat_source):fullUrl { action = "edit" } .. " edit]]</sup>" end end end output:insert( "<dd><i style=\"font-size: larger;\">The following " .. "[[Help:Documenting templates and modules|documentation]] is located at [[" .. doc_title.fullText .. "]]. " .. "<sup>[[" .. doc_title:fullUrl { action = "edit" } .. " edit]]</sup>" .. cats_auto_generated_text .. "</i></dd>") else if contentModel == "Scribunto" then get_module_doc_and_cats(false) elseif title.nsText == "Template" then --cats:insert("Uncategorized templates") needs_doc = not (fallback_docs or nodoc) elseif user_name and is_script_or_stylesheet then skin_name = skins[title.text:sub(#title.rootText + 1):match("^/(%l+)%.[jc]ss?$")] if skin_name then fallback_docs = "documentation/fallback/user " .. contentModel end end if doc_content then output:insert( "<dd><i style=\"font-size: larger;\">The following " .. "[[Help:Documenting templates and modules|documentation]] is " .. "generated by [[" .. doc_content_source .. "]]. <sup>[[" .. new_title(doc_content_source):fullUrl { action = "edit" } .. " edit]]</sup> </i></dd>") elseif not nodoc then if doc_title then output:insert( "<dd><i style=\"font-size: larger;\">This " .. pagetype .. " lacks a [[Help:Documenting templates and modules|documentation subpage]]. " .. (fallback_docs and "You may " or "Please ") .. "[" .. doc_title:fullUrl { action = "edit", preload = preload } .. " create it].</i></dd>\n") else output:insert( "<dd><i style=\"font-size: larger; color: var(--wikt-palette-red-9,#FF0000);\">Unable to auto-generate " .. "documentation for this " .. pagetype .. ".</i></dd>\n") end end end if startswith(title.fullText, "MediaWiki:Gadget-") then local is_gadget = false for line in gline(new_title("MediaWiki:Gadgets-definition").content) do local gadget, items = line:match("^%*%s*(%a[%w_-]*)%[.-%]|(.+)$") if not gadget then gadget, items = line:match("^%*%s*(%a[%w_-]*)|(.+)$") end if gadget then items = Array(split(items, "|")) for i, item in ipairs(items) do if title.fullText == ("MediaWiki:Gadget-" .. item) then is_gadget = true output:insert("<dd> ''This script is a part of the <code>") output:insert(gadget) output:insert("</code> gadget ([") output:insert(tostring(full_url("MediaWiki:Gadgets-definition", { action = "edit" }))) output:insert(" edit definitions])'' <dl>") output:insert("<dd> ''Description ([") output:insert(tostring(full_url("MediaWiki:Gadget-" .. gadget, { action = "edit" }))) output:insert(" edit])'': ") output:insert(preprocess(new_message('Gadget-' .. gadget):plain())) output:insert(" </dd>") items:remove(i) if #items > 0 then for j, item in ipairs(items) do items[j] = '[[MediaWiki:Gadget-' .. item .. '|' .. item .. ']]' end output:insert("<dd> ''Other parts'': ") output:insert(list_to_text(items)) output:insert("</dd>") end output:insert("</dl></dd>") break end end end end if not is_gadget then output:insert("<dd> ''This script is not a part of any [") output:insert(tostring(full_url("Special:Gadgets", { uselang = "en" }))) output:insert(' gadget] ([') output:insert(tostring(full_url("MediaWiki:Gadgets-definition", { action = "edit" }))) output:insert(' edit definitions]).</dd>') -- else -- cats:insert("Wiktionary gadgets") end end if old_doc_title then output:insert("<dd> ''Redirected from'' [") output:insert(old_doc_title:fullUrl { redirect = "no" }) output:insert(" ") output:insert(old_doc_title.fullText) output:insert("] ([") output:insert(old_doc_title:fullUrl { action = "edit" }) output:insert(" edit]).</dd>\n") end if not args.nolinks then local links = Array() if title.isSubpage and not args.notsubpage then links:insert("[[:" .. title.nsText .. ":" .. title.rootText .. "|root page]]") links:insert("[[Special:PrefixIndex/" .. title.nsText .. ":" .. title.rootText .. "/|root page’s subpages]]") else links:insert("[[Special:PrefixIndex/" .. title.fullText .. "/|subpage list]]") end links:insert( "[" .. tostring(full_url("Special:WhatLinksHere/" .. title.fullText, { hidetrans = true, hideredirs = true })) .. " links]") if contentModel ~= "Scribunto" then links:insert( "[" .. tostring(full_url("Special:WhatLinksHere/" .. title.fullText, { hidelinks = true, hidetrans = true })) .. " redirects]") end if is_script_or_stylesheet then if user_name then links:insert("[[Special:MyPage" .. title.text:sub(#title.rootText + 1) .. "|your own]]") end else links:insert( "[" .. tostring(full_url("Special:WhatLinksHere/" .. title.fullText, { hidelinks = true, hideredirs = true })) .. " transclusions]") end if contentModel == "Scribunto" then local is_testcases = title.isSubpage and title.subpageText == "testcases" local without_subpage = title.nsText .. ":" .. title.baseText if is_testcases then links:insert("[[:" .. without_subpage .. "|tested module]]") else links:insert("[[" .. title.fullText .. "/testcases|testcases]]") end if user_name then links:insert("[[User:" .. user_name .. "|user page]]") links:insert("[[User talk:" .. user_name .. "|user talk page]]") links:insert("[[Special:PrefixIndex/User:" .. user_name .. "/|userspace]]") -- If sandbox module, add a link to the module that this is a sandbox of. -- Exclude user sandbox modules like [[User:Dine2016/sandbox]]. elseif title.text:find("^sandbox%d*/") or title.text:find("/sandbox%d*%f[/%z]") then cats:insert("Sandbox modules") -- Sandbox modules don’t really need documentation. needs_doc = false -- Don't track user sandbox modules. local text_title = new_title(title.text) if not (text_title and text_title.nsText == "User") then local diff local sandbox_of = title.text:match("^(.*)/sandbox%d*%f[/%z]") if sandbox_of then track("sandbox to be moved") else sandbox_of = title.text:match("^sandbox%d*/(.*)$") end if not sandbox_of then error(("Internal error: Something wrong, couldn't extract sandbox-of module from title '%s'") :format(title.text)) end sandbox_of = title.nsText .. ":" .. sandbox_of if title_exists(sandbox_of) then diff = " (" .. compare_pages(title.fullText, sandbox_of, "diff") .. ")" else track("no sandbox of") end links:insert("[[:" .. sandbox_of .. "|sandbox of]]" .. (diff or "")) end -- If not a sandbox module, add link to sandbox module. -- Sometimes there are multiple sandboxes for a single module: -- [[Module:sandbox/sa-pronunc]], [[Module:sandbox2/sa-pronunc]]. else local sandbox_title local user_prefix, user_rest = title.text:match("^(User:.-/)(.*)$") if not user_prefix then user_prefix = "" user_rest = title.text end sandbox_title = title.nsText .. ":" .. user_prefix .. "sandbox/" .. user_rest local sandbox_link = "[[:" .. sandbox_title .. "|sandbox]]" local diff if title_exists(sandbox_title) then diff = " (" .. compare_pages(title.fullText, sandbox_title, "diff") .. ")" end links:insert(sandbox_link .. (diff or "")) end end if title.nsText == "Template" then -- Error search: all(any namespace), hastemplate (show pages using the template), insource (show source code), incategory (any/specific error) -- [[mw:Help:CirrusSearch]], [[w:Help:Searching/Regex]] -- apparently same with/without: &profile=advanced&fulltext=1 local errorq = 'searchengineselect=mediawiki&search=all: hastemplate:\"' .. title.rootText .. '\" insource:\"' .. title.rootText .. '\" incategory:' local eincategory = "Pages_with_module_errors|ParserFunction_errors|DisplayTitle_errors|Pages_with_ISBN_errors|Pages_with_ISSN_errors|Pages_with_reference_errors|Pages_with_syntax_highlighting_errors|Pages_with_TemplateStyles_errors" links:insert( '[' .. tostring(full_url('Special:Search', errorq .. eincategory)) .. ' errors]' .. ' (' .. '[' .. tostring(full_url('Special:Search', errorq .. 'ParserFunction_errors')) .. ' parser]' .. '/' .. '[' .. tostring(full_url('Special:Search', errorq .. 'Pages_with_module_errors')) .. ' module]' .. ')' ) if title.isSubpage and title.text:find("/sandbox%d*%f[/%z]") then -- This is a sandbox template. -- At the moment there are no user sandbox templates with subpage -- “/sandbox”. cats:insert("Sandbox templates") -- Sandbox templates don’t really need documentation. needs_doc = false -- Will behave badly if “/sandbox” occurs twice in title! local sandbox_of = title.fullText:gsub("/sandbox%d*%f[/%z]", "") local diff if title_exists(sandbox_of) then diff = " (" .. compare_pages(title.fullText, sandbox_of, "diff") .. ")" else track("no sandbox of") end links:insert("[[:" .. sandbox_of .. "|sandbox of]]" .. (diff or "")) -- This is a template that can have a sandbox. elseif not args.nosandbox then -- unless we tell it not to local sandbox_title = title.fullText .. "/sandbox" local diff if title_exists(sandbox_title) then diff = " (" .. compare_pages(title.fullText, sandbox_title, "diff") .. ")" end links:insert("[[:" .. sandbox_title .. "|sandbox]]" .. (diff or "")) end end if #links > 0 then output:insert("<dd> ''Useful links'': " .. links:concat(" • ") .. "</dd>") end end output:insert("</dl>\n") -- Show error from [[Module:category tree/topic cat/data]] on its submodules' -- documentation to, for instance, warn about duplicate labels. if startswith(title.fullText, "Module:category tree/topic/") then local ok, err = pcall(require, "Module:category tree/topic/data") if not ok then output:insert('<span class="error">' .. err .. '</span>\n\n') end end if doc_title and doc_title.exists then -- Override automatic documentation, if present. doc_content = expand_template { title = doc_title.fullText } elseif not doc_content and fallback_docs then doc_content = expand_template { title = fallback_docs, args = { ['user'] = user_name, ['page'] = title.fullText, ['skin name'] = skin_name, }, } end if doc_content then output:insert(doc_content) end output:insert(('\n<%s style="clear: both;" />'):format(args.hr == "below" and "hr" or "br")) if cats_auto_generated and not cats[1] and (not doc_content or not doc_content:find("%[%[Category:")) then if contentModel == "Scribunto" then cats:insert("Uncategorized modules") -- elseif title.nsText == "Template" then -- cats:insert("Uncategorized templates") end end if needs_doc then cats:insert("Templates and modules needing documentation") end for _, cat in ipairs(cats) do output:insert("[[Category:" .. cat .. "]]") end output:insert("</div>\n") return output:concat() end function export.module_auto_doc_table() local parts = {} local function ins(text) insert(parts, text) end ins('{|class="wikitable"') ins("! Regex !! Category !! Handling modules") for _, spec in ipairs(module_regex) do local cat_text local cats = spec.cat if cats then local cat_parts = {} if type(cats) == "string" then cats = { cats } end for _, cat in ipairs(cats) do insert(cat_parts, ("<code>%s</code>"):format((cat:gsub("|", "&#124;")))) end cat_text = concat(cat_parts, ", ") else cat_text = "''(unspecified)''" end ins("|-") ins(("| <code>%s</code> || %s || %s"):format(spec.regex, cat_text, is_callable(spec.process) and "''(handled internally)''" or type(spec.process) == "string" and ("[[Module:documentation/functions/%s]]"):format(spec.process) or "''(no documentation generator)''")) end ins("|}") return concat(parts, "\n") end -- Used by {{translit module documentation}}. function export.translitModuleLangList(frame) local pagename, subpage if frame.args[1] then pagename = frame.args[1] else local title = get_current_title() subpage = title.subpageText pagename = title.text if subpage ~= pagename then pagename = title.rootText end end local translitModule = pagename local languageObjects = require("Module:languages/byTranslitModule")(translitModule) local codeInPagename = pagename:match("^([%l-]+)%-.*translit$") local categories = Array() local codeInPagenameInList = false if codeInPagename then if languageObjects[1] and subpage ~= "documentation" then local agreement = languageObjects[2] and "s" or "" categories:insert("[[Category:Transliteration modules used by " .. #languageObjects .. " language" .. agreement .. "]]") end languageObjects = Array(languageObjects) :filter( function(lang) local result = lang:getCode() ~= codeInPagename codeInPagenameInList = codeInPagenameInList or result return result end) end if subpage ~= "documentation" then for script_code in pagename:gmatch("%f[^-%z]%u%l%l%l%f[-]") do local script = get_script(script_code) if script then categories:insert("[[Category:" .. script:getCategoryName() .. "]]") end end end if subpage ~= "documentation" and not title_exists("Module:" .. pagename .. "/testcases") then categories:insert("[[Category:Transliteration modules without a testcases subpage]]") end if not languageObjects[1] then return categories:concat() end local langs = Array(languageObjects) :sort( function(lang1, lang2) return lang1:getCode() < lang2:getCode() end) -- This will not error because languageObjects is not empty. :map(languageObjects[1].makeCategoryLink) :serialCommaJoin() return "It is " .. (codeInPagenameInList and "also" or "") .. " used to transliterate " .. langs .. "." .. categories:concat() end -- Used by {{strip diacritics module documentation}}. function export.stripDiacriticsModuleLangList(frame) local pagename, subpage if frame.args[1] then pagename = frame.args[1] else local title = get_current_title() subpage = title.subpageText pagename = title.text if subpage ~= pagename then pagename = title.rootText end end local stripDiacriticsModule = pagename local languageObjects = require("Module:languages/byStripDiacriticsModule")(stripDiacriticsModule) local codeInPagename = pagename:match("^([%l-]+)%-.*stripdiacritics$") local categories = Array() local codeInPagenameInList = false if codeInPagename then if languageObjects[1] and subpage ~= "documentation" then local agreement = languageObjects[2] and "s" or "" categories:insert("[[Category:Diacritic-stripping modules used by " .. #languageObjects .. " language" .. agreement .. "]]") end languageObjects = Array(languageObjects) :filter( function(lang) local result = lang:getCode() ~= codeInPagename codeInPagenameInList = codeInPagenameInList or result return result end) end if subpage ~= "documentation" then for script_code in pagename:gmatch("%f[^-%z]%u%l%l%l%f[-]") do local script = get_script(script_code) if script then categories:insert("[[Category:" .. script:getCategoryName() .. "]]") end end end if subpage ~= "documentation" and not title_exists("Module:" .. pagename .. "/testcases") then categories:insert("[[Category:Diacritic-stripping modules without a testcases subpage]]") end if not languageObjects[1] then return categories:concat() end local langs = Array(languageObjects) :sort( function(lang1, lang2) return lang1:getCode() < lang2:getCode() end) -- This will not error because languageObjects is not empty. :map(languageObjects[1].makeCategoryLink) :serialCommaJoin() return "It is " .. (codeInPagenameInList and "also" or "") .. " used to strip diacritics for " .. langs .. "." .. categories:concat() end -- Used by {{sortkey module documentation}}. function export.sortkeyModuleLangList(frame) local pagename, subpage if frame.args[1] then pagename = frame.args[1] else local title = get_current_title() subpage = title.subpageText pagename = title.text if subpage ~= pagename then pagename = title.rootText end end local sortkeyModule = pagename local languageObjects = require("Module:languages/bySortkeyModule")(sortkeyModule) local codeInPagename = pagename:match("^([%l-]+)%-.*sortkey$") local categories = Array() local codeInPagenameInList = false if codeInPagename then if languageObjects[1] and subpage ~= "documentation" then local agreement = languageObjects[2] and "s" or "" categories:insert("[[Category:Sortkey-generating modules used by " .. #languageObjects .. " language" .. agreement .. "]]") end languageObjects = Array(languageObjects) :filter( function(lang) local result = lang:getCode() ~= codeInPagename codeInPagenameInList = codeInPagenameInList or result return result end) end if subpage ~= "documentation" then for script_code in pagename:gmatch("%f[^-%z]%u%l%l%l%f[-]") do local script = get_script(script_code) if script then categories:insert("[[Category:" .. script:getCategoryName() .. "]]") end end end if subpage ~= "documentation" and not title_exists("Module:" .. pagename .. "/testcases") then categories:insert("[[Category:Sortkey-generating modules without a testcases subpage]]") end if not languageObjects[1] then return categories:concat() end local langs = Array(languageObjects) :sort( function(lang1, lang2) return lang1:getCode() < lang2:getCode() end) -- This will not error because languageObjects is not empty. :map(languageObjects[1].makeCategoryLink) :serialCommaJoin() return "It is " .. (codeInPagenameInList and "also" or "") .. " used to sort " .. langs .. "." .. categories:concat() end return export 6dc60l37z33a4ot8ok9usxuuu2fyk3g 231493 231492 2026-04-15T16:54:21Z Lee 19 [[:en:Module:documentation]] වෙතින් එක් සංශෝධනයක් 231492 Scribunto text/plain local export = {} local array_module = "Module:array" local debug_track_module = "Module:debug/track" local frame_module = "Module:frame" local fun_is_callable_module = "Module:fun/isCallable" local languages_module = "Module:languages" local links_module = "Module:links" local load_module = "Module:load" local module_categorization_module = "Module:module categorization" local number_list_show_module = "Module:number list/show" local pages_module = "Module:pages" local parameters_module = "Module:parameters" local scripts_module = "Module:scripts" local string_endswith_module = "Module:string/endswith" local string_gline_module = "Module:string/gline" local string_insert_module = "Module:string/insert" local string_startswith_module = "Module:string/startswith" local string_utilities_module = "Module:string utilities" local template_parser_module = "Module:template parser" local title_exists_module = "Module:title/exists" local title_new_title_module = "Module:title/newTitle" local concat = table.concat local error = error local full_url = mw.uri.fullUrl local get_current_title = mw.title.getCurrentTitle local insert = table.insert local ipairs = ipairs local list_to_text = mw.text.listToText local new_message = mw.message.new local pcall = pcall local require = require local tonumber = tonumber local tostring = tostring local type = type local unpack = unpack or table.unpack -- Lua 5.2 compatibility local function Array(...) Array = require(array_module) return Array(...) end local function categorize_module(...) categorize_module = require(module_categorization_module).categorize return categorize_module(...) end local function debug_track(...) debug_track = require(debug_track_module) return debug_track(...) end local function endswith(...) endswith = require(string_endswith_module) return endswith(...) end local function expand_template(...) expand_template = require(frame_module).expandTemplate return expand_template(...) end local function find_templates(...) find_templates = require(template_parser_module).find_templates return find_templates(...) end local function full_link(...) full_link = require(links_module).full_link return full_link(...) end local function get_lang(...) get_lang = require(languages_module).getByCode return get_lang(...) end local function get_pagetype(...) get_pagetype = require(pages_module).get_pagetype return get_pagetype(...) end local function get_script(...) get_script = require(scripts_module).getByCode return get_script(...) end local function gline(...) gline = require(string_gline_module) return gline(...) end local function is_callable(...) is_callable = require(fun_is_callable_module) return is_callable(...) end local function is_documentation(...) is_documentation = require(pages_module).is_documentation return is_documentation(...) end local function is_sandbox(...) is_sandbox = require(pages_module).is_sandbox return is_sandbox(...) end local function new_title(...) new_title = require(title_new_title_module) return new_title(...) end local function number_list_show_table(...) number_list_show_table = require(number_list_show_module).table return number_list_show_table(...) end local function preprocess(...) preprocess = require(frame_module).preprocess return preprocess(...) end local function process_params(...) process_params = require(parameters_module).process return process_params(...) end local function safe_load_data(...) safe_load_data = require(load_module).safe_load_data return safe_load_data(...) end local function split(...) split = require(string_utilities_module).split return split(...) end local function startswith(...) startswith = require(string_startswith_module) return startswith(...) end local function string_insert(...) string_insert = require(string_insert_module) return string_insert(...) end local function title_exists(...) title_exists = require(title_exists_module) return title_exists(...) end local function ugsub(...) ugsub = require(string_utilities_module).gsub return ugsub(...) end local function umatch(...) umatch = require(string_utilities_module).match return umatch(...) end local skins = { ["common"] = "", ["vector"] = "Vector", ["monobook"] = "Monobook", ["cologneblue"] = "Cologne Blue", ["modern"] = "Modern", } local function track(page) debug_track("documentation/" .. page) return true end local function compare_pages(page1, page2, text) return "[" .. tostring( full_url("Special:ComparePages", { page1 = page1, page2 = page2 })) .. " " .. text .. "]" end -- Avoid transcluding [[Module:languages/cache]] everywhere. local lang_cache = setmetatable({}, { __index = function(self, k) return require("Module:languages/cache")[k] end }) local function zh_link(word) return full_link { lang = lang_cache.zh, term = word } end local function make_languages_data_documentation(title, cats, division) local doc_template, module_cat if endswith(division, "/extra") then division = division:sub(1, -7) doc_template = "language extradata documentation" module_cat = "Language extra data modules" else doc_template = "language data documentation" module_cat = "Language data modules" end local sort_key if division == "exceptional" then sort_key = "x" else sort_key = division:gsub("/", "") end cats:insert(module_cat .. "|" .. sort_key) return { title = doc_template } end local function make_Unicode_data_documentation(title, cats) local subpage, first_three_of_code_point = title.fullText:match("^Module:Unicode data/([^/]+)/(%x%x%x)$") if subpage == "names" or subpage == "images" or subpage == "emoji images" then local low, high = tonumber(first_three_of_code_point .. "000", 16), tonumber(first_three_of_code_point .. "FFF", 16) local text, text_type if subpage == "names" then text_type = "titles of images" elseif subpage == "images" then text_type = "titles of images" elseif subpage == "emoji images" then text_type = "emoji-style images" end text = string.format( "This data module contains the " .. text_type .. " of " .. "[[Appendix:Unicode|Unicode]] code points within the range U+%04X to U+%04X.", low, high) if subpage == "images" and safe_load_data("Module:Unicode data/emoji images/" .. first_three_of_code_point) then text = text .. " This list includes the text variants of emojis. For the list of emoji variants of those characters, see [[Module:Unicode data/emoji images/" .. first_three_of_code_point .. "]]." elseif subpage == "emoji images" then text = text .. " For text-style images, see [[Module:Unicode data/images/" .. first_three_of_code_point .. "]]." end return text end end local function insert_lang_data_module_cats(cats, langcode, overall_data_module_cat) local lang = lang_cache[langcode] if lang then local langname if lang._fullCode then langname = lang_cache[lang._fullCode]:getCanonicalName() else langname = lang:getCanonicalName() end cats:insert(overall_data_module_cat .. "|" .. langname) cats:insert(langname .. " modules") cats:insert(langname .. " data modules") return lang, langname end end --[=[ This provides categories and documentation for various data modules, so that [[Category:Uncategorized modules]] isn't unnecessarily cluttered. It is a list of tables, each of which have the following possible fields: `regex` (required): A Lua pattern to match the module's title. If it matches, the data in this entry will be used. Any captures in the pattern can by referenced in the `cat` field using %1 for the first capture, %2 for the second, etc. (often used for creating the sortkey for the category). In addition, the captures are passed to the `process` function as the third and subsequent parameters. `process` (optional): This may be a function or a string. If it is a function, it is called as follows: `process(TITLE, CATS, CAPTURE1, CAPTURE2, ...)` where: * TITLE is a title object describing the module's title; see [https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Title_objects]. * CATS is an array object (see [[Module:array]]) of categories that the module will be added to. * CAPTURE1, CAPTURE2, ... contain any captures in the `regex` field. The return value of `process` should either be a string (which will be used as the module's documentation), or a table specifying the name of a template to expand to get the documentation, along with the arguments to that template. In the latter format, the template name (bare, without the "Template:" prefix) should be in the `title` field, and any arguments should be in `args; in this case, the template name will be listed above the generated documentation as the source of the documentation, along with an edit button to edit the template's contents. If, however, the return value of the `process` function is a string, any template invocations will be expanded using frame:preprocess(), and [[Module:documentation]] will be listed as the source of the documentation. If `process` itself is a string rather than a function, it should name a submodule under [[Module:documentation/functions/]] which returns a function, of the same type as described above. This submodule will be specified as the source of the documentation (unless it returns a table naming a template to expand to get the documentation, as described above). If `process` is omitted entirely, the module will have no documentation. `cat` (optional): A string naming the category into which the module should be placed, or a list of such strings. Captures specified in `regex` may be referenced in this string using %1 for the first capture, %2 for the second, etc. It is also possible to add categories in the `process` function by inserting them into the passed-in CATS array (the second parameter). ]=] local module_regex = { { regex = "^Module:languages/data/(3/%l/extra)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/data/(3/%l)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/data/(2/extra)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/data/(2)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/data/(exceptional/extra)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/data/(exceptional)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/.+$", cat = "Language and script modules", }, { regex = "^Module:scripts/.+$", cat = "Language and script modules", }, { regex = "^Module:data tables/data..?.?.?$", cat = "Reference module sharded data tables", }, { regex = "^Module:zh/data/dial%-pron/.+$", cat = "Chinese dialectal pronunciation data modules", process = "zh dial or syn", }, { regex = "^Module:zh/data/dial%-syn/.+$", cat = "Chinese dialect synonyms data modules", process = "zh dial or syn", }, { regex = "^Module:zh/data/glyph%-data/.+$", cat = "Chinese historical character forms data modules", process = function(title, cats) local character = title.fullText:match("^Module:zh/data/glyph%-data/(.+)") if character then return ("This module contains data on historical forms of the Chinese character %s.") :format(zh_link(character)) end end, }, { regex = "^Module:zh/data/ltc%-pron/(.+)$", cat = "Middle Chinese pronunciation data modules|%1", process = "zh data", }, { regex = "^Module:zh/data/och%-pron%-BS/(.+)$", cat = "Old Chinese (Baxter-Sagart) pronunciation data modules|%1", process = "zh data", }, { regex = "^Module:zh/data/och%-pron%-ZS/(.+)$", cat = "Old Chinese (Zhengzhang) pronunciation data modules|%1", process = "zh data", }, { -- capture rest of zh/data submodules regex = "^Module:zh/data/(.+)$", cat = "Chinese data modules|%1", }, { regex = "^Module:mul/guoxue%-data/cjk%-?(.*)$", process = "guoxue-data", }, { regex = "^Module:Unicode data/(.+)$", cat = "Unicode data modules|%1", process = make_Unicode_data_documentation, }, { regex = "^Module:number list/data/(.+)$", process = function(title, cats, lang_code) local lang = insert_lang_data_module_cats(cats, lang_code, "Number data modules") if lang then return ("This module contains data on various types of numbers in %s.\n%s") :format(lang:makeCategoryLink(), number_list_show_table() or "") end end, }, { regex = "^Module:accel/(.+)$", process = function(title, cats) local lang_code = title.subpageText local lang = lang_cache[lang_code] if lang then cats:insert(lang:getCanonicalName() .. " modules|accel") cats:insert(("Accel submodules|%s"):format(lang:getCanonicalName())) return ("This module contains new entry creation rules for %s; see [[WT:ACCEL]] for an overview, and [[Module:accel]] for information on creating new rules.") :format(lang:makeCategoryLink()) end end, }, { regex = "^Module:inc%-ash/dial/data/(.+)$", cat = "Ashokan Prakrit modules|%1", process = function(title, cats) local word = title.fullText:match("^Module:inc%-ash/dial/data/(.+)$") if word then local lang = lang_cache["inc-ash"] return ("This module contains data on the pronunciation of %s in dialects of %s.") :format(full_link({ term = word, lang = lang }, "term"), lang:makeCategoryLink()) end end, }, { regex = "^.+%-translit$", process = "translit", }, { regex = "^Module:form of/lang%-data/(.+)$", process = function(title, cats, lang_code) local lang, langname = insert_lang_data_module_cats(cats, lang_code, "Language-specific form-of modules") if lang then -- FIXME, display more info. return "This module contains language-specific form-of data (tags, shortcuts, base lemma params. etc.) for " .. langname .. "." end end }, { regex = "^Module:labels/data/lang/(.+)$", process = function(title, cats, lang_code) local lang = insert_lang_data_module_cats(cats, lang_code, "Language-specific label data modules") if lang then return { title = "label language-specific data documentation", args = { [1] = lang_code }, } end end }, { regex = "^Module:category tree/lang/(.+)$", process = function(title, cats, lang_code) local lang, langname = insert_lang_data_module_cats(cats, lang_code, "Category tree data modules/lang") if lang then return "This module handles generating the descriptions and categorization for " .. langname .. " category pages " .. "of the format \"" .. langname .. " LABEL\" where LABEL can be any text. Examples are " .. "[[:Category:Bulgarian conjugation 2.1 verbs]] and [[:Category:Russian velar-stem neuter-form nouns]]. " .. "This module is part of the category tree system, which is a general framework for generating the " .. "descriptions and categorization of category pages.\n\n" .. "For more information, see [[Module:category tree/lang/documentation]].\n\n" .. "'''NOTE:''' If you add a new language-specific module, you must add the language code to the " .. "list at the top of [[Module:category tree/lang]] in order for the module to be recognized." end end }, { regex = "^Module:category tree/topic/(.+)$", process = function(title, cats, submodule) cats:insert("Category tree data modules/topic| ") return { title = "topic cat data submodule documentation" } end }, { regex = "^Module:category tree/(.+)$", process = function(title, cats, submodule) cats:insert("Category tree data modules| ") return { title = "category tree data submodule documentation" } end }, { regex = "^Module:ja/data/(.+)$", cat = "Japanese data modules|%1", }, { regex = "^Module:fi%-dialects/data/feature/Kettunen1940 ([0-9]+)$", cat = "Finnish dialectal data atlas modules|%1", process = function(title, cats, shard) return "This module contains shard " .. shard .. " of the online version of Lauri Kettunen's 1940 work " .. "''Suomen murteet III A. Murrekartasto'' (\"Finnish dialects III A: Dialect atlas\"). " .. "It was imported and converted from urn:nbn:fi:csc-kata20151130145346403821, published by the " .. "''Kotimaisten kielten keskus'' under the CC BY 4.0 license." end }, { regex = "^Module:fi%-dialects/data/feature/(.+)", cat = "Finnish dialectal data modules|%1", }, { regex = "^Module:fi%-dialects/data/word/(.+)", cat = "Finnish dialectal data modules|%1", }, { regex = "^Module:Swadesh/data/([%l-]+)$", process = function(title, cats, lang_code) local lang, langname = insert_lang_data_module_cats(cats, lang_code, "Swadesh modules") if lang then return "This module contains the [[Swadesh list]] of basic vocabulary in " .. langname .. "." end end }, { regex = "^Module:Swadesh/data/([%l-]+)/([^/]*)$", process = function(title, cats, lang_code, variety) local lang, langname = insert_lang_data_module_cats(cats, lang_code, "Swadesh modules") if lang then local prefix = "This module contains the [[Swadesh list]] of basic vocabulary in the " local etym_lang = get_lang(variety, nil, "allow etym") if etym_lang then return ("%s %s variety of %s."):format(prefix, etym_lang:getCanonicalName(), langname) end local script = get_script(variety) if script then return ("%s %s %s script."):format(prefix, langname, script:getCanonicalName()) end return ("%s %s variety of %s."):format(prefix, variety, langname) end end }, { regex = "^Module:typing%-aids", process = function(title, cats) local data_suffix = title.fullText:match("^Module:typing%-aids/data/(.+)$") local sortkey if data_suffix then if data_suffix:find "^[%l-]+$" then local lang = get_lang(data_suffix) if lang then sortkey = lang:getCanonicalName() cats:insert(sortkey .. " data modules") end elseif data_suffix:find "^%u%l%l%l$" then local script = get_script(data_suffix) if script then sortkey = script:getCanonicalName() cats:insert(script:getCategoryName()) end end cats:insert("Character insertion data modules|" .. (sortkey or data_suffix)) end end, }, { regex = "^Module:R:([%l-]+):(.+)$", process = function(title, cats, lang_code, refname) local lang = lang_cache[lang_code] if lang then cats:insert(lang:getCanonicalName() .. " modules|" .. refname) cats:insert(("Reference modules|%s"):format(lang:getCanonicalName())) return "This module implements the reference template {{temp|R:" .. lang_code .. ":" .. refname .. "}}." end end, }, { regex = "^Module:Quotations/([%l-]+)/?(.*)", process = "Quotation", }, { regex = "^Module:affix/lang%-data/([%l-]+)", process = "affix lang-data", }, { regex = "^Module:dialect synonyms/([%l-]+)$", process = function(title, cats, lang_code) local lang = lang_cache[lang_code] if lang then local langname = lang:getCanonicalName() cats:insert("Dialect synonyms data modules|" .. langname) cats:insert(langname .. " dialect synonyms data modules| ") return "This module contains data on specific varieties of " .. langname .. ", for use by " .. "{{tl|dialect synonyms}}. The actual synonyms themselves are contained in submodules.\n\n" .. "==== Language data module structure ====\n" .. "* <code>export.title</code> — optional; table title template (e.g. \"Regional synonyms of %s\").\n" .. "* <code>export.columns</code> — optional; list of column headers for location hierarchy (e.g. {\"Dialect group\", \"Dialect\", \"Location\"}).\n" .. "* <code>export.notes</code> — optional; table of note keys to text.\n" .. "* <code>export.sources</code> — optional; table of source keys to text.\n" .. "* <code>export.note_aliases</code> — optional; alias map for notes.\n" .. "* <code>export.varieties</code> — required; nested table of variety nodes. Each node must have <code>name</code>; array part holds children. Node keys can include <code>text_display</code>, <code>color</code>, <code>code</code>, <code>wikidata</code>, <code>lat</code>, <code>long</code>, and language-specific keys (e.g. <code>persian</code>, <code>armenian</code>, <code>chinese</code>).\n\n" .. expand_template({ title = 'dial syn', args = { lang_code, ["demo mode"] = "y" } }) end end, }, { regex = "^Module:dialect synonyms/([%l-]+)/([^/]+)$", process = function(title, cats, lang_code, term) local lang = lang_cache[lang_code] if lang then local langname = lang:getCanonicalName() cats:insert("Dialect synonyms data modules|" .. langname) cats:insert(langname .. " dialect synonyms data modules|" .. term) return ("%s\n\n%s"):format( "==== Term/sense module structure ====\n" .. "* <code>export.title</code> — optional; custom table title (e.g. \"Realization of 'strong R' between vowels\"). Overrides the language default.\n" .. "* <code>export.meaning</code> — optional; meaning/gloss (alternative to <code>gloss</code>).\n" .. "* <code>export.gloss</code> — optional; short meaning for the table.\n" .. "* <code>export.note</code> — optional; single note key or string, or list of note keys.\n" .. "* <code>export.notes</code> — optional; list of note keys.\n" .. "* <code>export.source</code> / <code>export.sources</code> — optional; source keys.\n" .. "* <code>export.last_column</code> — optional; label for the data column (default \"Words\"; e.g. \"Realization\").\n" .. "* <code>export.syns</code> — required; table mapping variety/location names (keys from the language data module) to a list of term entries. Each entry can be a string or a table (e.g. <code>{ ipa = \"[ɽ]\" }</code> or <code>{ term = \"word\" }</code>).\n\n" .. "Example (custom title and data column, IPA realizations):\n" .. "<pre>\nlocal export = {}\n\nexport.title = \"Realization of 'strong R' between vowels\"\n" .. "export.meaning = \"\"\nexport.note = \"realization of 'strong R' between vowels\"\n" .. "export.last_column = \"Realization\"\n\nexport.syns = {\n\t[\"ALERS-158\"] = { { ipa = \"[ɽ]\" } },\n\t[\"ALERS-175\"] = { { ipa = \"[x]\" } },\n}\n\nreturn export\n</pre>\n\n", expand_template({ title = 'dial syn', args = { lang_code, term } })) end end, }, { regex = "^Module:dialect synonyms/([%l-]+)/([^/]+)/([^/]+)$", process = function(title, cats, lang_code, term, id) local lang = lang_cache[lang_code] if lang then local langname = lang:getCanonicalName() cats:insert("Dialect synonyms data modules|" .. langname) cats:insert(langname .. " dialect synonyms data modules|" .. term) return ("%s\n\n%s"):format( "==== Term/sense module structure ====\n" .. "* <code>export.title</code> — optional; custom table title (e.g. \"Realization of 'strong R' between vowels\"). Overrides the language default.\n" .. "* <code>export.meaning</code> — optional; meaning/gloss (alternative to <code>gloss</code>).\n" .. "* <code>export.gloss</code> — optional; short meaning for the table.\n" .. "* <code>export.note</code> — optional; single note key or string, or list of note keys.\n" .. "* <code>export.notes</code> — optional; list of note keys.\n" .. "* <code>export.source</code> / <code>export.sources</code> — optional; source keys.\n" .. "* <code>export.last_column</code> — optional; label for the data column (default \"Words\"; e.g. \"Realization\").\n" .. "* <code>export.syns</code> — required; table mapping variety/location names (keys from the language data module) to a list of term entries. Each entry can be a string or a table (e.g. <code>{ ipa = \"[ɽ]\" }</code> or <code>{ term = \"word\" }</code>).\n\n" .. "Example (custom title and data column, IPA realizations):\n" .. "<pre>\nlocal export = {}\n\nexport.title = \"Realization of 'strong R' between vowels\"\n" .. "export.meaning = \"\"\nexport.note = \"realization of 'strong R' between vowels\"\n" .. "export.last_column = \"Realization\"\n\nexport.syns = {\n\t[\"ALERS-158\"] = { { ipa = \"[ɽ]\" } },\n\t[\"ALERS-175\"] = { { ipa = \"[x]\" } },\n}\n\nreturn export\n</pre>\n\n", expand_template({ title = 'dial syn', args = { lang_code, term, id = id } })) end end, }, { regex = "^Module:bibliography/data/([%l-]+)$", process = function(title, cats, lang_code) if lang_code == "preload" then return 'Used as a base model for other languages when the button "create new language submodule" is clicked.' end local page = require(title.fullText).bib_page if not page then page = lang_cache[lang_code]:getCanonicalName() if page then cats:insert(page .. " modules") end end cats:insert("Reference modules") return "This module holds bibliographical data for " .. page .. ". For the formatted bibliography see '''[[Appendix:Bibliography/" .. page .. "]]'''." end, }, } function export.show(frame) local boolean_default_false = { type = "boolean", default = false } local args = process_params(frame.args, { ["hr"] = true, ["for"] = true, ["from"] = true, ["allowondoc"] = boolean_default_false, -- Don't throw an error if used on a documentation subpage. ["notsubpage"] = boolean_default_false, ["nodoc"] = boolean_default_false, ["nolinks"] = boolean_default_false, -- suppress all "Useful links" ["nosandbox"] = boolean_default_false, -- supress sandbox }) local output = Array('\n<div class="documentation" style="display:block; clear:both">\n') local cats = Array() local nodoc = args.nodoc if (not args.hr) or (args.hr == "above") then output:insert("----\n") end local title = args["for"] and new_title(args["for"]) or get_current_title() local doc_title = args.from ~= "-" and new_title(args.from or title.fullText .. '/documentation') or nil local contentModel = title.contentModel local pagetype, is_script_or_stylesheet = get_pagetype(title) local preload, fallback_docs, doc_content, old_doc_title, user_name, skin_name, needs_doc local doc_content_source = "Module:documentation" local auto_generated_cat_source local cats_auto_generated = false if not args.allowondoc and is_documentation(title) then -- TODO: merge with {{documentation subpage}}, and choose behaviour based on the page type. error("This template should not be used on a documentation page. Please use [[Template:documentation subpage]].") elseif is_sandbox(title) then local sandbox_ns = title.nsText preload = ("Template:documentation/preload%s%sSandbox"):format( sandbox_ns == "Module" and sandbox_ns or "Template", title.rootText:match("^[Uu]ser:(.+)") and "User" or "" ) elseif pagetype:match("%f[%w]gadget%f[%W]") then preload = "Template:documentation/preloadGadget" elseif pagetype:match("%f[%w]script%f[%W]") then -- .js if title.nsText == "MediaWiki" then preload = "Template:documentation/preloadMediaWikiJavaScript" else preload = "Template:documentation/preloadTemplate" -- XXX if title.nsText == "User" then user_name = title.rootText end end is_script_or_stylesheet = true elseif pagetype:match("%f[%w]stylesheet%f[%W]") then -- .css preload = "Template:documentation/preloadTemplate" -- XXX if title.nsText == "User" then user_name = title.rootText end is_script_or_stylesheet = true elseif contentModel == "Scribunto" then -- Exclude pages in Module: which aren't Scribunto. preload = "Template:documentation/preloadModule" elseif pagetype:match("%f[%w]template%f[%W]") or pagetype:match("%f[%w]project%f[%W]") then preload = "Template:documentation/preloadTemplate" end if doc_title and doc_title.isRedirect then old_doc_title = doc_title doc_title = doc_title.redirectTarget end output:insert("<dl class=\"plainlinks\" style=\"font-size: smaller;\">") local function get_module_doc_and_cats(categories_only) cats_auto_generated = true local automatic_cats = nil if user_name then fallback_docs = "documentation/fallback/user module" automatic_cats = { "User sandbox modules" } else for _, data in ipairs(module_regex) do local captures = { umatch(title.fullText, data.regex) } if #captures > 0 then local cat, process_function if is_callable(data.process) then process_function = data.process elseif type(data.process) == "string" then doc_content_source = "Module:documentation/functions/" .. data.process process_function = require(doc_content_source) end if process_function then doc_content = process_function(title, cats, unpack(captures)) end if type(doc_content) == "table" then doc_content_source = doc_content.title and "Template:" .. doc_content.title or doc_content_source doc_content = expand_template(doc_content) elseif doc_content ~= nil then doc_content = preprocess(doc_content) end cat = data.cat if cat then if type(cat) == "string" then cat = { cat } end for _, c in ipairs(cat) do insert(cats, (ugsub(title.fullText, data.regex, c))) end end break end end end if title.subpageText == "templates" then cats:insert("Template interface modules") end if automatic_cats then for _, c in ipairs(automatic_cats) do cats:insert(c) end end if #cats == 0 then local auto_cats = categorize_module(frame, "return raw", "noerror") if #auto_cats > 0 then auto_generated_cat_source = "Module:module categorization" end for _, category in ipairs(auto_cats) do cats:insert(category) end end -- meaning module is not in user’s sandbox or one of many datamodule boring series needs_doc = not categories_only and not (automatic_cats or doc_content or fallback_docs) end -- Override automatic documentation, if present. if doc_title and doc_title.exists then local cats_auto_generated_text = "" if contentModel == "Scribunto" then local doc_page_content = doc_title.content -- Track then do nothing if there are uses of includeonly. The -- pattern is slightly too permissive, but any false-positives are -- obvious typos that should be corrected. if doc_page_content:lower():match("</?includeonly%f[%s/>][^>]*>") then track("module-includeonly") else -- Check for uses of {{module cat}}. find_templates treats the -- input as transcluded by default (i.e. it parses the wikitext -- which will be transcluded through to the module page). local module_cat for template in find_templates(doc_page_content) do if template:get_name() == "module cat" then module_cat = true break end end if not module_cat then get_module_doc_and_cats("categories only") auto_generated_cat_source = auto_generated_cat_source or doc_content_source cats_auto_generated_text = " Categories were auto-generated by [[" .. auto_generated_cat_source .. "]]. <sup>[[" .. new_title(auto_generated_cat_source):fullUrl { action = "edit" } .. " edit]]</sup>" end end end output:insert( "<dd><i style=\"font-size: larger;\">The following " .. "[[Help:Documenting templates and modules|documentation]] is located at [[" .. doc_title.fullText .. "]]. " .. "<sup>[[" .. doc_title:fullUrl { action = "edit" } .. " edit]]</sup>" .. cats_auto_generated_text .. "</i></dd>") else if contentModel == "Scribunto" then get_module_doc_and_cats(false) elseif title.nsText == "Template" then --cats:insert("Uncategorized templates") needs_doc = not (fallback_docs or nodoc) elseif user_name and is_script_or_stylesheet then skin_name = skins[title.text:sub(#title.rootText + 1):match("^/(%l+)%.[jc]ss?$")] if skin_name then fallback_docs = "documentation/fallback/user " .. contentModel end end if doc_content then output:insert( "<dd><i style=\"font-size: larger;\">The following " .. "[[Help:Documenting templates and modules|documentation]] is " .. "generated by [[" .. doc_content_source .. "]]. <sup>[[" .. new_title(doc_content_source):fullUrl { action = "edit" } .. " edit]]</sup> </i></dd>") elseif not nodoc then if doc_title then output:insert( "<dd><i style=\"font-size: larger;\">This " .. pagetype .. " lacks a [[Help:Documenting templates and modules|documentation subpage]]. " .. (fallback_docs and "You may " or "Please ") .. "[" .. doc_title:fullUrl { action = "edit", preload = preload } .. " create it].</i></dd>\n") else output:insert( "<dd><i style=\"font-size: larger; color: var(--wikt-palette-red-9,#FF0000);\">Unable to auto-generate " .. "documentation for this " .. pagetype .. ".</i></dd>\n") end end end if startswith(title.fullText, "MediaWiki:Gadget-") then local is_gadget = false for line in gline(new_title("MediaWiki:Gadgets-definition").content) do local gadget, items = line:match("^%*%s*(%a[%w_-]*)%[.-%]|(.+)$") if not gadget then gadget, items = line:match("^%*%s*(%a[%w_-]*)|(.+)$") end if gadget then items = Array(split(items, "|")) for i, item in ipairs(items) do if title.fullText == ("MediaWiki:Gadget-" .. item) then is_gadget = true output:insert("<dd> ''This script is a part of the <code>") output:insert(gadget) output:insert("</code> gadget ([") output:insert(tostring(full_url("MediaWiki:Gadgets-definition", { action = "edit" }))) output:insert(" edit definitions])'' <dl>") output:insert("<dd> ''Description ([") output:insert(tostring(full_url("MediaWiki:Gadget-" .. gadget, { action = "edit" }))) output:insert(" edit])'': ") output:insert(preprocess(new_message('Gadget-' .. gadget):plain())) output:insert(" </dd>") items:remove(i) if #items > 0 then for j, item in ipairs(items) do items[j] = '[[MediaWiki:Gadget-' .. item .. '|' .. item .. ']]' end output:insert("<dd> ''Other parts'': ") output:insert(list_to_text(items)) output:insert("</dd>") end output:insert("</dl></dd>") break end end end end if not is_gadget then output:insert("<dd> ''This script is not a part of any [") output:insert(tostring(full_url("Special:Gadgets", { uselang = "en" }))) output:insert(' gadget] ([') output:insert(tostring(full_url("MediaWiki:Gadgets-definition", { action = "edit" }))) output:insert(' edit definitions]).</dd>') -- else -- cats:insert("Wiktionary gadgets") end end if old_doc_title then output:insert("<dd> ''Redirected from'' [") output:insert(old_doc_title:fullUrl { redirect = "no" }) output:insert(" ") output:insert(old_doc_title.fullText) output:insert("] ([") output:insert(old_doc_title:fullUrl { action = "edit" }) output:insert(" edit]).</dd>\n") end if not args.nolinks then local links = Array() if title.isSubpage and not args.notsubpage then links:insert("[[:" .. title.nsText .. ":" .. title.rootText .. "|root page]]") links:insert("[[Special:PrefixIndex/" .. title.nsText .. ":" .. title.rootText .. "/|root page’s subpages]]") else links:insert("[[Special:PrefixIndex/" .. title.fullText .. "/|subpage list]]") end links:insert( "[" .. tostring(full_url("Special:WhatLinksHere/" .. title.fullText, { hidetrans = true, hideredirs = true })) .. " links]") if contentModel ~= "Scribunto" then links:insert( "[" .. tostring(full_url("Special:WhatLinksHere/" .. title.fullText, { hidelinks = true, hidetrans = true })) .. " redirects]") end if is_script_or_stylesheet then if user_name then links:insert("[[Special:MyPage" .. title.text:sub(#title.rootText + 1) .. "|your own]]") end else links:insert( "[" .. tostring(full_url("Special:WhatLinksHere/" .. title.fullText, { hidelinks = true, hideredirs = true })) .. " transclusions]") end if contentModel == "Scribunto" then local is_testcases = title.isSubpage and title.subpageText == "testcases" local without_subpage = title.nsText .. ":" .. title.baseText if is_testcases then links:insert("[[:" .. without_subpage .. "|tested module]]") else links:insert("[[" .. title.fullText .. "/testcases|testcases]]") end if user_name then links:insert("[[User:" .. user_name .. "|user page]]") links:insert("[[User talk:" .. user_name .. "|user talk page]]") links:insert("[[Special:PrefixIndex/User:" .. user_name .. "/|userspace]]") -- If sandbox module, add a link to the module that this is a sandbox of. -- Exclude user sandbox modules like [[User:Dine2016/sandbox]]. elseif title.text:find("^sandbox%d*/") or title.text:find("/sandbox%d*%f[/%z]") then cats:insert("Sandbox modules") -- Sandbox modules don’t really need documentation. needs_doc = false -- Don't track user sandbox modules. local text_title = new_title(title.text) if not (text_title and text_title.nsText == "User") then local diff local sandbox_of = title.text:match("^(.*)/sandbox%d*%f[/%z]") if sandbox_of then track("sandbox to be moved") else sandbox_of = title.text:match("^sandbox%d*/(.*)$") end if not sandbox_of then error(("Internal error: Something wrong, couldn't extract sandbox-of module from title '%s'") :format(title.text)) end sandbox_of = title.nsText .. ":" .. sandbox_of if title_exists(sandbox_of) then diff = " (" .. compare_pages(title.fullText, sandbox_of, "diff") .. ")" else track("no sandbox of") end links:insert("[[:" .. sandbox_of .. "|sandbox of]]" .. (diff or "")) end -- If not a sandbox module, add link to sandbox module. -- Sometimes there are multiple sandboxes for a single module: -- [[Module:sandbox/sa-pronunc]], [[Module:sandbox2/sa-pronunc]]. else local sandbox_title local user_prefix, user_rest = title.text:match("^(User:.-/)(.*)$") if not user_prefix then user_prefix = "" user_rest = title.text end sandbox_title = title.nsText .. ":" .. user_prefix .. "sandbox/" .. user_rest local sandbox_link = "[[:" .. sandbox_title .. "|sandbox]]" local diff if title_exists(sandbox_title) then diff = " (" .. compare_pages(title.fullText, sandbox_title, "diff") .. ")" end links:insert(sandbox_link .. (diff or "")) end end if title.nsText == "Template" then -- Error search: all(any namespace), hastemplate (show pages using the template), insource (show source code), incategory (any/specific error) -- [[mw:Help:CirrusSearch]], [[w:Help:Searching/Regex]] -- apparently same with/without: &profile=advanced&fulltext=1 local errorq = 'searchengineselect=mediawiki&search=all: hastemplate:\"' .. title.rootText .. '\" insource:\"' .. title.rootText .. '\" incategory:' local eincategory = "Pages_with_module_errors|ParserFunction_errors|DisplayTitle_errors|Pages_with_ISBN_errors|Pages_with_ISSN_errors|Pages_with_reference_errors|Pages_with_syntax_highlighting_errors|Pages_with_TemplateStyles_errors" links:insert( '[' .. tostring(full_url('Special:Search', errorq .. eincategory)) .. ' errors]' .. ' (' .. '[' .. tostring(full_url('Special:Search', errorq .. 'ParserFunction_errors')) .. ' parser]' .. '/' .. '[' .. tostring(full_url('Special:Search', errorq .. 'Pages_with_module_errors')) .. ' module]' .. ')' ) if title.isSubpage and title.text:find("/sandbox%d*%f[/%z]") then -- This is a sandbox template. -- At the moment there are no user sandbox templates with subpage -- “/sandbox”. cats:insert("Sandbox templates") -- Sandbox templates don’t really need documentation. needs_doc = false -- Will behave badly if “/sandbox” occurs twice in title! local sandbox_of = title.fullText:gsub("/sandbox%d*%f[/%z]", "") local diff if title_exists(sandbox_of) then diff = " (" .. compare_pages(title.fullText, sandbox_of, "diff") .. ")" else track("no sandbox of") end links:insert("[[:" .. sandbox_of .. "|sandbox of]]" .. (diff or "")) -- This is a template that can have a sandbox. elseif not args.nosandbox then -- unless we tell it not to local sandbox_title = title.fullText .. "/sandbox" local diff if title_exists(sandbox_title) then diff = " (" .. compare_pages(title.fullText, sandbox_title, "diff") .. ")" end links:insert("[[:" .. sandbox_title .. "|sandbox]]" .. (diff or "")) end end if #links > 0 then output:insert("<dd> ''Useful links'': " .. links:concat(" • ") .. "</dd>") end end output:insert("</dl>\n") -- Show error from [[Module:category tree/topic cat/data]] on its submodules' -- documentation to, for instance, warn about duplicate labels. if startswith(title.fullText, "Module:category tree/topic/") then local ok, err = pcall(require, "Module:category tree/topic/data") if not ok then output:insert('<span class="error">' .. err .. '</span>\n\n') end end if doc_title and doc_title.exists then -- Override automatic documentation, if present. doc_content = expand_template { title = doc_title.fullText } elseif not doc_content and fallback_docs then doc_content = expand_template { title = fallback_docs, args = { ['user'] = user_name, ['page'] = title.fullText, ['skin name'] = skin_name, }, } end if doc_content then output:insert(doc_content) end output:insert(('\n<%s style="clear: both;" />'):format(args.hr == "below" and "hr" or "br")) if cats_auto_generated and not cats[1] and (not doc_content or not doc_content:find("%[%[Category:")) then if contentModel == "Scribunto" then cats:insert("Uncategorized modules") -- elseif title.nsText == "Template" then -- cats:insert("Uncategorized templates") end end if needs_doc then cats:insert("Templates and modules needing documentation") end for _, cat in ipairs(cats) do output:insert("[[Category:" .. cat .. "]]") end output:insert("</div>\n") return output:concat() end function export.module_auto_doc_table() local parts = {} local function ins(text) insert(parts, text) end ins('{|class="wikitable"') ins("! Regex !! Category !! Handling modules") for _, spec in ipairs(module_regex) do local cat_text local cats = spec.cat if cats then local cat_parts = {} if type(cats) == "string" then cats = { cats } end for _, cat in ipairs(cats) do insert(cat_parts, ("<code>%s</code>"):format((cat:gsub("|", "&#124;")))) end cat_text = concat(cat_parts, ", ") else cat_text = "''(unspecified)''" end ins("|-") ins(("| <code>%s</code> || %s || %s"):format(spec.regex, cat_text, is_callable(spec.process) and "''(handled internally)''" or type(spec.process) == "string" and ("[[Module:documentation/functions/%s]]"):format(spec.process) or "''(no documentation generator)''")) end ins("|}") return concat(parts, "\n") end -- Used by {{translit module documentation}}. function export.translitModuleLangList(frame) local pagename, subpage if frame.args[1] then pagename = frame.args[1] else local title = get_current_title() subpage = title.subpageText pagename = title.text if subpage ~= pagename then pagename = title.rootText end end local translitModule = pagename local languageObjects = require("Module:languages/byTranslitModule")(translitModule) local codeInPagename = pagename:match("^([%l-]+)%-.*translit$") local categories = Array() local codeInPagenameInList = false if codeInPagename then if languageObjects[1] and subpage ~= "documentation" then local agreement = languageObjects[2] and "s" or "" categories:insert("[[Category:Transliteration modules used by " .. #languageObjects .. " language" .. agreement .. "]]") end languageObjects = Array(languageObjects) :filter( function(lang) local result = lang:getCode() ~= codeInPagename codeInPagenameInList = codeInPagenameInList or result return result end) end if subpage ~= "documentation" then for script_code in pagename:gmatch("%f[^-%z]%u%l%l%l%f[-]") do local script = get_script(script_code) if script then categories:insert("[[Category:" .. script:getCategoryName() .. "]]") end end end if subpage ~= "documentation" and not title_exists("Module:" .. pagename .. "/testcases") then categories:insert("[[Category:Transliteration modules without a testcases subpage]]") end if not languageObjects[1] then return categories:concat() end local langs = Array(languageObjects) :sort( function(lang1, lang2) return lang1:getCode() < lang2:getCode() end) -- This will not error because languageObjects is not empty. :map(languageObjects[1].makeCategoryLink) :serialCommaJoin() return "It is " .. (codeInPagenameInList and "also" or "") .. " used to transliterate " .. langs .. "." .. categories:concat() end -- Used by {{strip diacritics module documentation}}. function export.stripDiacriticsModuleLangList(frame) local pagename, subpage if frame.args[1] then pagename = frame.args[1] else local title = get_current_title() subpage = title.subpageText pagename = title.text if subpage ~= pagename then pagename = title.rootText end end local stripDiacriticsModule = pagename local languageObjects = require("Module:languages/byStripDiacriticsModule")(stripDiacriticsModule) local codeInPagename = pagename:match("^([%l-]+)%-.*stripdiacritics$") local categories = Array() local codeInPagenameInList = false if codeInPagename then if languageObjects[1] and subpage ~= "documentation" then local agreement = languageObjects[2] and "s" or "" categories:insert("[[Category:Diacritic-stripping modules used by " .. #languageObjects .. " language" .. agreement .. "]]") end languageObjects = Array(languageObjects) :filter( function(lang) local result = lang:getCode() ~= codeInPagename codeInPagenameInList = codeInPagenameInList or result return result end) end if subpage ~= "documentation" then for script_code in pagename:gmatch("%f[^-%z]%u%l%l%l%f[-]") do local script = get_script(script_code) if script then categories:insert("[[Category:" .. script:getCategoryName() .. "]]") end end end if subpage ~= "documentation" and not title_exists("Module:" .. pagename .. "/testcases") then categories:insert("[[Category:Diacritic-stripping modules without a testcases subpage]]") end if not languageObjects[1] then return categories:concat() end local langs = Array(languageObjects) :sort( function(lang1, lang2) return lang1:getCode() < lang2:getCode() end) -- This will not error because languageObjects is not empty. :map(languageObjects[1].makeCategoryLink) :serialCommaJoin() return "It is " .. (codeInPagenameInList and "also" or "") .. " used to strip diacritics for " .. langs .. "." .. categories:concat() end -- Used by {{sortkey module documentation}}. function export.sortkeyModuleLangList(frame) local pagename, subpage if frame.args[1] then pagename = frame.args[1] else local title = get_current_title() subpage = title.subpageText pagename = title.text if subpage ~= pagename then pagename = title.rootText end end local sortkeyModule = pagename local languageObjects = require("Module:languages/bySortkeyModule")(sortkeyModule) local codeInPagename = pagename:match("^([%l-]+)%-.*sortkey$") local categories = Array() local codeInPagenameInList = false if codeInPagename then if languageObjects[1] and subpage ~= "documentation" then local agreement = languageObjects[2] and "s" or "" categories:insert("[[Category:Sortkey-generating modules used by " .. #languageObjects .. " language" .. agreement .. "]]") end languageObjects = Array(languageObjects) :filter( function(lang) local result = lang:getCode() ~= codeInPagename codeInPagenameInList = codeInPagenameInList or result return result end) end if subpage ~= "documentation" then for script_code in pagename:gmatch("%f[^-%z]%u%l%l%l%f[-]") do local script = get_script(script_code) if script then categories:insert("[[Category:" .. script:getCategoryName() .. "]]") end end end if subpage ~= "documentation" and not title_exists("Module:" .. pagename .. "/testcases") then categories:insert("[[Category:Sortkey-generating modules without a testcases subpage]]") end if not languageObjects[1] then return categories:concat() end local langs = Array(languageObjects) :sort( function(lang1, lang2) return lang1:getCode() < lang2:getCode() end) -- This will not error because languageObjects is not empty. :map(languageObjects[1].makeCategoryLink) :serialCommaJoin() return "It is " .. (codeInPagenameInList and "also" or "") .. " used to sort " .. langs .. "." .. categories:concat() end return export 6dc60l37z33a4ot8ok9usxuuu2fyk3g 231494 231493 2026-04-15T17:08:50Z Lee 19 පැරණි සංස්කරණයකින් ගත් කොටස්... 231494 Scribunto text/plain local m_sinhala = require("Module:sinhala") local export = {} local array_module = "Module:array" local debug_track_module = "Module:debug/track" local frame_module = "Module:frame" local fun_is_callable_module = "Module:fun/isCallable" local languages_module = "Module:languages" local links_module = "Module:links" local load_module = "Module:load" local module_categorization_module = "Module:module categorization" local number_list_show_module = "Module:number list/show" local pages_module = "Module:pages" local parameters_module = "Module:parameters" local scripts_module = "Module:scripts" local string_endswith_module = "Module:string/endswith" local string_gline_module = "Module:string/gline" local string_insert_module = "Module:string/insert" local string_startswith_module = "Module:string/startswith" local string_utilities_module = "Module:string utilities" local template_parser_module = "Module:template parser" local title_exists_module = "Module:title/exists" local title_new_title_module = "Module:title/newTitle" local concat = table.concat local error = error local full_url = mw.uri.fullUrl local get_current_title = mw.title.getCurrentTitle local insert = table.insert local ipairs = ipairs local list_to_text = mw.text.listToText local new_message = mw.message.new local pcall = pcall local require = require local tonumber = tonumber local tostring = tostring local type = type local unpack = unpack or table.unpack -- Lua 5.2 compatibility local function Array(...) Array = require(array_module) return Array(...) end local function categorize_module(...) categorize_module = require(module_categorization_module).categorize return categorize_module(...) end local function debug_track(...) debug_track = require(debug_track_module) return debug_track(...) end local function endswith(...) endswith = require(string_endswith_module) return endswith(...) end local function expand_template(...) expand_template = require(frame_module).expandTemplate return expand_template(...) end local function find_templates(...) find_templates = require(template_parser_module).find_templates return find_templates(...) end local function full_link(...) full_link = require(links_module).full_link return full_link(...) end local function get_lang(...) get_lang = require(languages_module).getByCode return get_lang(...) end local function get_pagetype(...) get_pagetype = require(pages_module).get_pagetype return get_pagetype(...) end local function get_script(...) get_script = require(scripts_module).getByCode return get_script(...) end local function gline(...) gline = require(string_gline_module) return gline(...) end local function is_callable(...) is_callable = require(fun_is_callable_module) return is_callable(...) end local function is_documentation(...) is_documentation = require(pages_module).is_documentation return is_documentation(...) end local function is_sandbox(...) is_sandbox = require(pages_module).is_sandbox return is_sandbox(...) end local function new_title(...) new_title = require(title_new_title_module) return new_title(...) end local function number_list_show_table(...) number_list_show_table = require(number_list_show_module).table return number_list_show_table(...) end local function preprocess(...) preprocess = require(frame_module).preprocess return preprocess(...) end local function process_params(...) process_params = require(parameters_module).process return process_params(...) end local function safe_load_data(...) safe_load_data = require(load_module).safe_load_data return safe_load_data(...) end local function split(...) split = require(string_utilities_module).split return split(...) end local function startswith(...) startswith = require(string_startswith_module) return startswith(...) end local function string_insert(...) string_insert = require(string_insert_module) return string_insert(...) end local function title_exists(...) title_exists = require(title_exists_module) return title_exists(...) end local function ugsub(...) ugsub = require(string_utilities_module).gsub return ugsub(...) end local function umatch(...) umatch = require(string_utilities_module).match return umatch(...) end local skins = { ["common"] = "", ["vector"] = "Vector", ["monobook"] = "Monobook", ["cologneblue"] = "Cologne Blue", ["modern"] = "Modern", } local function track(page) debug_track("documentation/" .. page) return true end local function compare_pages(page1, page2, text) return "[" .. tostring( full_url("Special:ComparePages", { page1 = page1, page2 = page2 })) .. " " .. text .. "]" end -- Avoid transcluding [[Module:languages/cache]] everywhere. local lang_cache = setmetatable({}, { __index = function(self, k) return require("Module:languages/cache")[k] end }) local function zh_link(word) return full_link { lang = lang_cache.zh, term = word } end local function make_languages_data_documentation(title, cats, division) local doc_template, module_cat if endswith(division, "/extra") then division = division:sub(1, -7) doc_template = "language extradata documentation" module_cat = "භාෂා අමතර දත්ත මොඩියුල" else doc_template = "language data documentation" module_cat = "භාෂා දත්ත මොඩියුල" end local sort_key if division == "exceptional" then sort_key = "x" else sort_key = division:gsub("/", "") end cats:insert(module_cat .. "|" .. sort_key) return { title = doc_template } end local function make_Unicode_data_documentation(title, cats) local subpage, first_three_of_code_point = title.fullText:match("^Module:Unicode data/([^/]+)/(%x%x%x)$") if subpage == "names" or subpage == "images" or subpage == "emoji images" then local low, high = tonumber(first_three_of_code_point .. "000", 16), tonumber(first_three_of_code_point .. "FFF", 16) local text, text_type if subpage == "names" then text_type = "titles of images" elseif subpage == "images" then text_type = "titles of images" elseif subpage == "emoji images" then text_type = "emoji-style images" end text = string.format( "This data module contains the " .. text_type .. " of " .. "[[Appendix:Unicode|Unicode]] code points within the range U+%04X to U+%04X.", low, high) if subpage == "images" and safe_load_data("Module:Unicode data/emoji images/" .. first_three_of_code_point) then text = text .. " This list includes the text variants of emojis. For the list of emoji variants of those characters, see [[Module:Unicode data/emoji images/" .. first_three_of_code_point .. "]]." elseif subpage == "emoji images" then text = text .. " For text-style images, see [[Module:Unicode data/images/" .. first_three_of_code_point .. "]]." end return text end end local function insert_lang_data_module_cats(cats, langcode, overall_data_module_cat) local lang = lang_cache[langcode] if lang then local langname if lang._fullCode then langname = lang_cache[lang._fullCode]:getCanonicalName() else langname = lang:getCanonicalName() end cats:insert(overall_data_module_cat .. "|" .. langname) cats:insert(m_sinhala.sinhala(langname) .. " මොඩියුල") cats:insert(m_sinhala.sinhala(langname) .. " දත්ත මොඩියුල") return lang, langname end end --[=[ This provides categories and documentation for various data modules, so that [[Category:Uncategorized modules]] isn't unnecessarily cluttered. It is a list of tables, each of which have the following possible fields: `regex` (required): A Lua pattern to match the module's title. If it matches, the data in this entry will be used. Any captures in the pattern can by referenced in the `cat` field using %1 for the first capture, %2 for the second, etc. (often used for creating the sortkey for the category). In addition, the captures are passed to the `process` function as the third and subsequent parameters. `process` (optional): This may be a function or a string. If it is a function, it is called as follows: `process(TITLE, CATS, CAPTURE1, CAPTURE2, ...)` where: * TITLE is a title object describing the module's title; see [https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Title_objects]. * CATS is an array object (see [[Module:array]]) of categories that the module will be added to. * CAPTURE1, CAPTURE2, ... contain any captures in the `regex` field. The return value of `process` should either be a string (which will be used as the module's documentation), or a table specifying the name of a template to expand to get the documentation, along with the arguments to that template. In the latter format, the template name (bare, without the "Template:" prefix) should be in the `title` field, and any arguments should be in `args; in this case, the template name will be listed above the generated documentation as the source of the documentation, along with an edit button to edit the template's contents. If, however, the return value of the `process` function is a string, any template invocations will be expanded using frame:preprocess(), and [[Module:documentation]] will be listed as the source of the documentation. If `process` itself is a string rather than a function, it should name a submodule under [[Module:documentation/functions/]] which returns a function, of the same type as described above. This submodule will be specified as the source of the documentation (unless it returns a table naming a template to expand to get the documentation, as described above). If `process` is omitted entirely, the module will have no documentation. `cat` (optional): A string naming the category into which the module should be placed, or a list of such strings. Captures specified in `regex` may be referenced in this string using %1 for the first capture, %2 for the second, etc. It is also possible to add categories in the `process` function by inserting them into the passed-in CATS array (the second parameter). ]=] local module_regex = { { regex = "^Module:languages/data/(3/%l/extra)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/data/(3/%l)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/data/(2/extra)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/data/(2)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/data/(exceptional/extra)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/data/(exceptional)$", process = make_languages_data_documentation, }, { regex = "^Module:languages/.+$", cat = "භාෂා සහ අක්ෂරක්‍රම මොඩියුල", }, { regex = "^Module:scripts/.+$", cat = "භාෂා සහ අක්ෂරක්‍රම මොඩියුල", }, { regex = "^Module:data tables/data..?.?.?$", cat = "Reference module sharded data tables", }, { regex = "^Module:zh/data/dial%-pron/.+$", cat = "Chinese dialectal pronunciation data modules", process = "zh dial or syn", }, { regex = "^Module:zh/data/dial%-syn/.+$", cat = "Chinese dialect synonyms data modules", process = "zh dial or syn", }, { regex = "^Module:zh/data/glyph%-data/.+$", cat = "Chinese historical character forms data modules", process = function(title, cats) local character = title.fullText:match("^Module:zh/data/glyph%-data/(.+)") if character then return ("This module contains data on historical forms of the Chinese character %s.") :format(zh_link(character)) end end, }, { regex = "^Module:zh/data/ltc%-pron/(.+)$", cat = "Middle Chinese pronunciation data modules|%1", process = "zh data", }, { regex = "^Module:zh/data/och%-pron%-BS/(.+)$", cat = "Old Chinese (Baxter-Sagart) pronunciation data modules|%1", process = "zh data", }, { regex = "^Module:zh/data/och%-pron%-ZS/(.+)$", cat = "Old Chinese (Zhengzhang) pronunciation data modules|%1", process = "zh data", }, { -- capture rest of zh/data submodules regex = "^Module:zh/data/(.+)$", cat = "චීන දත්ත මොඩියුල|%1", }, { regex = "^Module:mul/guoxue%-data/cjk%-?(.*)$", process = "guoxue-data", }, { regex = "^Module:Unicode data/(.+)$", cat = "යුනිකෝඩ් දත්ත මොඩියුල|%1", process = make_Unicode_data_documentation, }, { regex = "^Module:number list/data/(.+)$", process = function(title, cats, lang_code) local lang = insert_lang_data_module_cats(cats, lang_code, "සංඛ්‍යා දත්ත මොඩියුල") if lang then return ("This module contains data on various types of numbers in %s.\n%s") :format(lang:makeCategoryLink(), number_list_show_table() or "") end end, }, { regex = "^Module:accel/(.+)$", process = function(title, cats) local lang_code = title.subpageText local lang = lang_cache[lang_code] if lang then cats:insert(lang:getCanonicalName() .. " මොඩියුල|accel") cats:insert(("Accel submodules|%s"):format(lang:getCanonicalName())) return ("This module contains new entry creation rules for %s; see [[WT:ACCEL]] for an overview, and [[Module:accel]] for information on creating new rules.") :format(lang:makeCategoryLink()) end end, }, { regex = "^Module:inc%-ash/dial/data/(.+)$", cat = "Ashokan Prakrit modules|%1", process = function(title, cats) local word = title.fullText:match("^Module:inc%-ash/dial/data/(.+)$") if word then local lang = lang_cache["inc-ash"] return ("This module contains data on the pronunciation of %s in dialects of %s.") :format(full_link({ term = word, lang = lang }, "term"), lang:makeCategoryLink()) end end, }, { regex = "^.+%-translit$", process = "translit", }, { regex = "^Module:form of/lang%-data/(.+)$", process = function(title, cats, lang_code) local lang, langname = insert_lang_data_module_cats(cats, lang_code, "Language-specific form-of modules") if lang then -- FIXME, display more info. return "This module contains language-specific form-of data (tags, shortcuts, base lemma params. etc.) for " .. langname .. "." end end }, { regex = "^Module:labels/data/lang/(.+)$", process = function(title, cats, lang_code) local lang = insert_lang_data_module_cats(cats, lang_code, "Language-specific label data modules") if lang then return { title = "label language-specific data documentation", args = { [1] = lang_code }, } end end }, { regex = "^Module:category tree/lang/(.+)$", process = function(title, cats, lang_code) local lang, langname = insert_lang_data_module_cats(cats, lang_code, "Category tree data modules/lang") if lang then return "This module handles generating the descriptions and categorization for " .. langname .. " category pages " .. "of the format \"" .. langname .. " LABEL\" where LABEL can be any text. Examples are " .. "[[:Category:Bulgarian conjugation 2.1 verbs]] and [[:Category:Russian velar-stem neuter-form nouns]]. " .. "This module is part of the category tree system, which is a general framework for generating the " .. "descriptions and categorization of category pages.\n\n" .. "For more information, see [[Module:category tree/lang/documentation]].\n\n" .. "'''NOTE:''' If you add a new language-specific module, you must add the language code to the " .. "list at the top of [[Module:category tree/lang]] in order for the module to be recognized." end end }, { regex = "^Module:category tree/topic/(.+)$", process = function(title, cats, submodule) cats:insert("Category tree data modules/topic| ") return { title = "topic cat data submodule documentation" } end }, { regex = "^Module:category tree/(.+)$", process = function(title, cats, submodule) cats:insert("Category tree data modules| ") return { title = "category tree data submodule documentation" } end }, { regex = "^Module:ja/data/(.+)$", cat = "ජපන් දත්ත මොඩියුල|%1", }, { regex = "^Module:fi%-dialects/data/feature/Kettunen1940 ([0-9]+)$", cat = "Finnish dialectal data atlas modules|%1", process = function(title, cats, shard) return "This module contains shard " .. shard .. " of the online version of Lauri Kettunen's 1940 work " .. "''Suomen murteet III A. Murrekartasto'' (\"Finnish dialects III A: Dialect atlas\"). " .. "It was imported and converted from urn:nbn:fi:csc-kata20151130145346403821, published by the " .. "''Kotimaisten kielten keskus'' under the CC BY 4.0 license." end }, { regex = "^Module:fi%-dialects/data/feature/(.+)", cat = "Finnish dialectal data modules|%1", }, { regex = "^Module:fi%-dialects/data/word/(.+)", cat = "Finnish dialectal data modules|%1", }, { regex = "^Module:Swadesh/data/([%l-]+)$", process = function(title, cats, lang_code) local lang, langname = insert_lang_data_module_cats(cats, lang_code, "Swadesh modules") if lang then return "This module contains the [[Swadesh list]] of basic vocabulary in " .. langname .. "." end end }, { regex = "^Module:Swadesh/data/([%l-]+)/([^/]*)$", process = function(title, cats, lang_code, variety) local lang, langname = insert_lang_data_module_cats(cats, lang_code, "Swadesh modules") if lang then local prefix = "This module contains the [[Swadesh list]] of basic vocabulary in the " local etym_lang = get_lang(variety, nil, "allow etym") if etym_lang then return ("%s %s variety of %s."):format(prefix, etym_lang:getCanonicalName(), langname) end local script = get_script(variety) if script then return ("%s %s %s script."):format(prefix, langname, script:getCanonicalName()) end return ("%s %s variety of %s."):format(prefix, variety, langname) end end }, { regex = "^Module:typing%-aids", process = function(title, cats) local data_suffix = title.fullText:match("^Module:typing%-aids/data/(.+)$") local sortkey if data_suffix then if data_suffix:find "^[%l-]+$" then local lang = get_lang(data_suffix) if lang then sortkey = lang:getCanonicalName() cats:insert(sortkey .. " දත්ත මොඩියුල") end elseif data_suffix:find "^%u%l%l%l$" then local script = get_script(data_suffix) if script then sortkey = script:getCanonicalName() cats:insert(script:getCategoryName()) end end cats:insert("Character insertion data modules|" .. (sortkey or data_suffix)) end end, }, { regex = "^Module:R:([%l-]+):(.+)$", process = function(title, cats, lang_code, refname) local lang = lang_cache[lang_code] if lang then cats:insert(lang:getCanonicalName() .. " මොඩියුල|" .. refname) cats:insert(("Reference modules|%s"):format(lang:getCanonicalName())) return "This module implements the reference template {{temp|R:" .. lang_code .. ":" .. refname .. "}}." end end, }, { regex = "^Module:Quotations/([%l-]+)/?(.*)", process = "Quotation", }, { regex = "^Module:affix/lang%-data/([%l-]+)", process = "affix lang-data", }, { regex = "^Module:dialect synonyms/([%l-]+)$", process = function(title, cats, lang_code) local lang = lang_cache[lang_code] if lang then local langname = lang:getCanonicalName() cats:insert("Dialect synonyms data modules|" .. langname) cats:insert(langname .. " dialect synonyms data modules| ") return "This module contains data on specific varieties of " .. langname .. ", for use by " .. "{{tl|dialect synonyms}}. The actual synonyms themselves are contained in submodules.\n\n" .. "==== Language data module structure ====\n" .. "* <code>export.title</code> — optional; table title template (e.g. \"Regional synonyms of %s\").\n" .. "* <code>export.columns</code> — optional; list of column headers for location hierarchy (e.g. {\"Dialect group\", \"Dialect\", \"Location\"}).\n" .. "* <code>export.notes</code> — optional; table of note keys to text.\n" .. "* <code>export.sources</code> — optional; table of source keys to text.\n" .. "* <code>export.note_aliases</code> — optional; alias map for notes.\n" .. "* <code>export.varieties</code> — required; nested table of variety nodes. Each node must have <code>name</code>; array part holds children. Node keys can include <code>text_display</code>, <code>color</code>, <code>code</code>, <code>wikidata</code>, <code>lat</code>, <code>long</code>, and language-specific keys (e.g. <code>persian</code>, <code>armenian</code>, <code>chinese</code>).\n\n" .. expand_template({ title = 'dial syn', args = { lang_code, ["demo mode"] = "y" } }) end end, }, { regex = "^Module:dialect synonyms/([%l-]+)/([^/]+)$", process = function(title, cats, lang_code, term) local lang = lang_cache[lang_code] if lang then local langname = lang:getCanonicalName() cats:insert("Dialect synonyms data modules|" .. langname) cats:insert(langname .. " dialect synonyms data modules|" .. term) return ("%s\n\n%s"):format( "==== Term/sense module structure ====\n" .. "* <code>export.title</code> — optional; custom table title (e.g. \"Realization of 'strong R' between vowels\"). Overrides the language default.\n" .. "* <code>export.meaning</code> — optional; meaning/gloss (alternative to <code>gloss</code>).\n" .. "* <code>export.gloss</code> — optional; short meaning for the table.\n" .. "* <code>export.note</code> — optional; single note key or string, or list of note keys.\n" .. "* <code>export.notes</code> — optional; list of note keys.\n" .. "* <code>export.source</code> / <code>export.sources</code> — optional; source keys.\n" .. "* <code>export.last_column</code> — optional; label for the data column (default \"Words\"; e.g. \"Realization\").\n" .. "* <code>export.syns</code> — required; table mapping variety/location names (keys from the language data module) to a list of term entries. Each entry can be a string or a table (e.g. <code>{ ipa = \"[ɽ]\" }</code> or <code>{ term = \"word\" }</code>).\n\n" .. "Example (custom title and data column, IPA realizations):\n" .. "<pre>\nlocal export = {}\n\nexport.title = \"Realization of 'strong R' between vowels\"\n" .. "export.meaning = \"\"\nexport.note = \"realization of 'strong R' between vowels\"\n" .. "export.last_column = \"Realization\"\n\nexport.syns = {\n\t[\"ALERS-158\"] = { { ipa = \"[ɽ]\" } },\n\t[\"ALERS-175\"] = { { ipa = \"[x]\" } },\n}\n\nreturn export\n</pre>\n\n", expand_template({ title = 'dial syn', args = { lang_code, term } })) end end, }, { regex = "^Module:dialect synonyms/([%l-]+)/([^/]+)/([^/]+)$", process = function(title, cats, lang_code, term, id) local lang = lang_cache[lang_code] if lang then local langname = lang:getCanonicalName() cats:insert("Dialect synonyms data modules|" .. langname) cats:insert(langname .. " dialect synonyms data modules|" .. term) return ("%s\n\n%s"):format( "==== Term/sense module structure ====\n" .. "* <code>export.title</code> — optional; custom table title (e.g. \"Realization of 'strong R' between vowels\"). Overrides the language default.\n" .. "* <code>export.meaning</code> — optional; meaning/gloss (alternative to <code>gloss</code>).\n" .. "* <code>export.gloss</code> — optional; short meaning for the table.\n" .. "* <code>export.note</code> — optional; single note key or string, or list of note keys.\n" .. "* <code>export.notes</code> — optional; list of note keys.\n" .. "* <code>export.source</code> / <code>export.sources</code> — optional; source keys.\n" .. "* <code>export.last_column</code> — optional; label for the data column (default \"Words\"; e.g. \"Realization\").\n" .. "* <code>export.syns</code> — required; table mapping variety/location names (keys from the language data module) to a list of term entries. Each entry can be a string or a table (e.g. <code>{ ipa = \"[ɽ]\" }</code> or <code>{ term = \"word\" }</code>).\n\n" .. "Example (custom title and data column, IPA realizations):\n" .. "<pre>\nlocal export = {}\n\nexport.title = \"Realization of 'strong R' between vowels\"\n" .. "export.meaning = \"\"\nexport.note = \"realization of 'strong R' between vowels\"\n" .. "export.last_column = \"Realization\"\n\nexport.syns = {\n\t[\"ALERS-158\"] = { { ipa = \"[ɽ]\" } },\n\t[\"ALERS-175\"] = { { ipa = \"[x]\" } },\n}\n\nreturn export\n</pre>\n\n", expand_template({ title = 'dial syn', args = { lang_code, term, id = id } })) end end, }, { regex = "^Module:bibliography/data/([%l-]+)$", process = function(title, cats, lang_code) if lang_code == "preload" then return 'Used as a base model for other languages when the button "create new language submodule" is clicked.' end local page = require(title.fullText).bib_page if not page then page = lang_cache[lang_code]:getCanonicalName() if page then cats:insert(page .. " modules") end end cats:insert("Reference modules") return "This module holds bibliographical data for " .. page .. ". For the formatted bibliography see '''[[Appendix:Bibliography/" .. page .. "]]'''." end, }, } function export.show(frame) local boolean_default_false = { type = "boolean", default = false } local args = process_params(frame.args, { ["hr"] = true, ["for"] = true, ["from"] = true, ["allowondoc"] = boolean_default_false, -- Don't throw an error if used on a documentation subpage. ["notsubpage"] = boolean_default_false, ["nodoc"] = boolean_default_false, ["nolinks"] = boolean_default_false, -- suppress all "Useful links" ["nosandbox"] = boolean_default_false, -- supress sandbox }) local output = Array('\n<div class="documentation" style="display:block; clear:both">\n') local cats = Array() local nodoc = args.nodoc if (not args.hr) or (args.hr == "above") then output:insert("----\n") end local title = args["for"] and new_title(args["for"]) or get_current_title() local doc_title = args.from ~= "-" and new_title(args.from or title.fullText .. '/documentation') or nil local contentModel = title.contentModel local pagetype, is_script_or_stylesheet = get_pagetype(title) local preload, fallback_docs, doc_content, old_doc_title, user_name, skin_name, needs_doc local doc_content_source = "Module:documentation" local auto_generated_cat_source local cats_auto_generated = false if not args.allowondoc and is_documentation(title) then -- TODO: merge with {{documentation subpage}}, and choose behaviour based on the page type. error("This template should not be used on a documentation page. Please use [[Template:documentation subpage]].") elseif is_sandbox(title) then local sandbox_ns = title.nsText preload = ("Template:documentation/preload%s%sSandbox"):format( sandbox_ns == "Module" and sandbox_ns or "Template", title.rootText:match("^[Uu]ser:(.+)") and "User" or "" ) elseif pagetype:match("%f[%w]gadget%f[%W]") then preload = "Template:documentation/preloadGadget" elseif pagetype:match("%f[%w]script%f[%W]") then -- .js if title.nsText == "MediaWiki" then preload = "Template:documentation/preloadMediaWikiJavaScript" else preload = "Template:documentation/preloadTemplate" -- XXX if title.nsText == "User" then user_name = title.rootText end end is_script_or_stylesheet = true elseif pagetype:match("%f[%w]stylesheet%f[%W]") then -- .css preload = "Template:documentation/preloadTemplate" -- XXX if title.nsText == "User" then user_name = title.rootText end is_script_or_stylesheet = true elseif contentModel == "Scribunto" then -- Exclude pages in Module: which aren't Scribunto. preload = "Template:documentation/preloadModule" elseif pagetype:match("%f[%w]template%f[%W]") or pagetype:match("%f[%w]project%f[%W]") then preload = "Template:documentation/preloadTemplate" end if doc_title and doc_title.isRedirect then old_doc_title = doc_title doc_title = doc_title.redirectTarget end output:insert("<dl class=\"plainlinks\" style=\"font-size: smaller;\">") local function get_module_doc_and_cats(categories_only) cats_auto_generated = true local automatic_cats = nil if user_name then fallback_docs = "documentation/fallback/user module" automatic_cats = { "User sandbox modules" } else for _, data in ipairs(module_regex) do local captures = { umatch(title.fullText, data.regex) } if #captures > 0 then local cat, process_function if is_callable(data.process) then process_function = data.process elseif type(data.process) == "string" then doc_content_source = "Module:documentation/functions/" .. data.process process_function = require(doc_content_source) end if process_function then doc_content = process_function(title, cats, unpack(captures)) end if type(doc_content) == "table" then doc_content_source = doc_content.title and "Template:" .. doc_content.title or doc_content_source doc_content = expand_template(doc_content) elseif doc_content ~= nil then doc_content = preprocess(doc_content) end cat = data.cat if cat then if type(cat) == "string" then cat = { cat } end for _, c in ipairs(cat) do insert(cats, (ugsub(title.fullText, data.regex, c))) end end break end end end if title.subpageText == "templates" then cats:insert("Template interface modules") end if automatic_cats then for _, c in ipairs(automatic_cats) do cats:insert(c) end end if #cats == 0 then local auto_cats = categorize_module(frame, "return raw", "noerror") if #auto_cats > 0 then auto_generated_cat_source = "Module:module categorization" end for _, category in ipairs(auto_cats) do cats:insert(category) end end -- meaning module is not in user’s sandbox or one of many datamodule boring series needs_doc = not categories_only and not (automatic_cats or doc_content or fallback_docs) end -- Override automatic documentation, if present. if doc_title and doc_title.exists then local cats_auto_generated_text = "" if contentModel == "Scribunto" then local doc_page_content = doc_title.content -- Track then do nothing if there are uses of includeonly. The -- pattern is slightly too permissive, but any false-positives are -- obvious typos that should be corrected. if doc_page_content:lower():match("</?includeonly%f[%s/>][^>]*>") then track("module-includeonly") else -- Check for uses of {{module cat}}. find_templates treats the -- input as transcluded by default (i.e. it parses the wikitext -- which will be transcluded through to the module page). local module_cat for template in find_templates(doc_page_content) do if template:get_name() == "module cat" then module_cat = true break end end if not module_cat then get_module_doc_and_cats("categories only") auto_generated_cat_source = auto_generated_cat_source or doc_content_source cats_auto_generated_text = " Categories were auto-generated by [[" .. auto_generated_cat_source .. "]]. <sup>[[" .. new_title(auto_generated_cat_source):fullUrl { action = "edit" } .. " edit]]</sup>" end end end output:insert( "<dd><i style=\"font-size: larger;\">පහත දැක්වෙන " .. "[[Help:Documenting templates and modules|උපදෙස්]], [[" .. doc_title.fullText .. "]] හි පිහිටා ඇත. " .. doc_title.fullText .. "]]. " .. "<sup>[[" .. doc_title:fullUrl { action = "edit" } .. " සංස්කරණය]]</sup>" .. cats_auto_generated_text .. "</i></dd>") else if contentModel == "Scribunto" then get_module_doc_and_cats(false) elseif title.nsText == "සැකිල්ල" then --cats:insert("Uncategorized templates") needs_doc = not (fallback_docs or nodoc) elseif user_name and is_script_or_stylesheet then skin_name = skins[title.text:sub(#title.rootText + 1):match("^/(%l+)%.[jc]ss?$")] if skin_name then fallback_docs = "documentation/fallback/user " .. contentModel end end if doc_content then output:insert( "<dd><i style=\"font-size: larger;\">පහත දැක්වෙන " .. "[[Help:Documenting templates and modules|උපදෙස්]], " .. "[[" .. doc_content_source .. "]] මගින් ජනනය කොට ඇත. <sup>[[" .. new_title(doc_content_source):fullUrl { action = "edit" } .. " සංස්කරණය කරන්න]]</sup> </i></dd>") elseif not nodoc then if doc_title then output:insert( "<dd><i style=\"font-size: larger;\">මෙම " .. pagetype .. " සතුව [[Help:Documenting templates and modules|උපදෙස් උප පිටුවක්]] නොපවතියි. " .. (fallback_docs and "You may " or "Please ") .. "[" .. doc_title:fullUrl { action = "edit", preload = preload } .. " නිර්මාණය කරන්න].</i></dd>\n") else output:insert( "<dd><i style=\"font-size: larger; color: var(--wikt-palette-red-9,#FF0000);\">Unable to auto-generate " .. "documentation for this " .. pagetype .. ".</i></dd>\n") end end end if startswith(title.fullText, "MediaWiki:Gadget-") then local is_gadget = false for line in gline(new_title("MediaWiki:Gadgets-definition").content) do local gadget, items = line:match("^%*%s*(%a[%w_-]*)%[.-%]|(.+)$") if not gadget then gadget, items = line:match("^%*%s*(%a[%w_-]*)|(.+)$") end if gadget then items = Array(split(items, "|")) for i, item in ipairs(items) do if title.fullText == ("MediaWiki:Gadget-" .. item) then is_gadget = true output:insert("<dd> ''This script is a part of the <code>") output:insert(gadget) output:insert("</code> gadget ([") output:insert(tostring(full_url("MediaWiki:Gadgets-definition", { action = "edit" }))) output:insert(" edit definitions])'' <dl>") output:insert("<dd> ''Description ([") output:insert(tostring(full_url("MediaWiki:Gadget-" .. gadget, { action = "edit" }))) output:insert(" සංස්කරණය])'': ") output:insert(preprocess(new_message('Gadget-' .. gadget):plain())) output:insert(" </dd>") items:remove(i) if #items > 0 then for j, item in ipairs(items) do items[j] = '[[MediaWiki:Gadget-' .. item .. '|' .. item .. ']]' end output:insert("<dd> ''Other parts'': ") output:insert(list_to_text(items)) output:insert("</dd>") end output:insert("</dl></dd>") break end end end end if not is_gadget then output:insert("<dd> ''This script is not a part of any [") output:insert(tostring(full_url("Special:Gadgets", { uselang = "en" }))) output:insert(' gadget] ([') output:insert(tostring(full_url("MediaWiki:Gadgets-definition", { action = "edit" }))) output:insert(' edit definitions]).</dd>') -- else -- cats:insert("Wiktionary gadgets") end end if old_doc_title then output:insert("<dd> ''Redirected from'' [") output:insert(old_doc_title:fullUrl { redirect = "no" }) output:insert(" ") output:insert(old_doc_title.fullText) output:insert("] ([") output:insert(old_doc_title:fullUrl { action = "edit" }) output:insert(" සංස්කරණය]).</dd>\n") end if not args.nolinks then local links = Array() if title.isSubpage and not args.notsubpage then links:insert("[[:" .. title.nsText .. ":" .. title.rootText .. "|root page]]") links:insert("[[Special:PrefixIndex/" .. title.nsText .. ":" .. title.rootText .. "/|root page’s subpages]]") else links:insert("[[Special:PrefixIndex/" .. title.fullText .. "/|උප පිටු ලැයිස්තුව]]") end links:insert( "[" .. tostring(full_url("Special:WhatLinksHere/" .. title.fullText, { hidetrans = true, hideredirs = true })) .. " සබැඳි]") if contentModel ~= "Scribunto" then links:insert( "[" .. tostring(full_url("Special:WhatLinksHere/" .. title.fullText, { hidelinks = true, hidetrans = true })) .. " යළි යොමු]") end if is_script_or_stylesheet then if user_name then links:insert("[[Special:MyPage" .. title.text:sub(#title.rootText + 1) .. "|your own]]") end else links:insert( "[" .. tostring(full_url("Special:WhatLinksHere/" .. title.fullText, { hidelinks = true, hideredirs = true })) .. " transclusions]") end if contentModel == "Scribunto" then local is_testcases = title.isSubpage and title.subpageText == "testcases" local without_subpage = title.nsText .. ":" .. title.baseText if is_testcases then links:insert("[[:" .. without_subpage .. "|tested module]]") else links:insert("[[" .. title.fullText .. "/testcases|testcases]]") end if user_name then links:insert("[[User:" .. user_name .. "|user page]]") links:insert("[[User talk:" .. user_name .. "|user talk page]]") links:insert("[[Special:PrefixIndex/User:" .. user_name .. "/|userspace]]") -- If sandbox module, add a link to the module that this is a sandbox of. -- Exclude user sandbox modules like [[User:Dine2016/sandbox]]. elseif title.text:find("^sandbox%d*/") or title.text:find("/sandbox%d*%f[/%z]") then cats:insert("වැලිපිලි මොඩියුල") -- Sandbox modules don’t really need documentation. needs_doc = false -- Don't track user sandbox modules. local text_title = new_title(title.text) if not (text_title and text_title.nsText == "User") then local diff local sandbox_of = title.text:match("^(.*)/sandbox%d*%f[/%z]") if sandbox_of then track("sandbox to be moved") else sandbox_of = title.text:match("^sandbox%d*/(.*)$") end if not sandbox_of then error(("Internal error: Something wrong, couldn't extract sandbox-of module from title '%s'") :format(title.text)) end sandbox_of = title.nsText .. ":" .. sandbox_of if title_exists(sandbox_of) then diff = " (" .. compare_pages(title.fullText, sandbox_of, "වෙනස") .. ")" else track("no sandbox of") end links:insert("[[:" .. sandbox_of .. "|sandbox of]]" .. (diff or "")) end -- If not a sandbox module, add link to sandbox module. -- Sometimes there are multiple sandboxes for a single module: -- [[Module:sandbox/sa-pronunc]], [[Module:sandbox2/sa-pronunc]]. else local sandbox_title local user_prefix, user_rest = title.text:match("^(User:.-/)(.*)$") if not user_prefix then user_prefix = "" user_rest = title.text end sandbox_title = title.nsText .. ":" .. user_prefix .. "sandbox/" .. user_rest local sandbox_link = "[[:" .. sandbox_title .. "|sandbox]]" local diff if title_exists(sandbox_title) then diff = " (" .. compare_pages(title.fullText, sandbox_title, "වෙනස") .. ")" end links:insert(sandbox_link .. (diff or "")) end end if title.nsText == "සැකිල්ල" then -- Error search: all(any namespace), hastemplate (show pages using the template), insource (show source code), incategory (any/specific error) -- [[mw:Help:CirrusSearch]], [[w:Help:Searching/Regex]] -- apparently same with/without: &profile=advanced&fulltext=1 local errorq = 'searchengineselect=mediawiki&search=all: hastemplate:\"' .. title.rootText .. '\" insource:\"' .. title.rootText .. '\" incategory:' local eincategory = "Pages_with_module_errors|ParserFunction_errors|DisplayTitle_errors|Pages_with_ISBN_errors|Pages_with_ISSN_errors|Pages_with_reference_errors|Pages_with_syntax_highlighting_errors|Pages_with_TemplateStyles_errors" links:insert( '[' .. tostring(full_url('Special:Search', errorq .. eincategory)) .. ' errors]' .. ' (' .. '[' .. tostring(full_url('Special:Search', errorq .. 'ParserFunction_errors')) .. ' parser]' .. '/' .. '[' .. tostring(full_url('Special:Search', errorq .. 'Pages_with_module_errors')) .. ' module]' .. ')' ) if title.isSubpage and title.text:find("/sandbox%d*%f[/%z]") then -- This is a sandbox template. -- At the moment there are no user sandbox templates with subpage -- “/sandbox”. cats:insert("වැලිපිලි සැකිලි") -- Sandbox templates don’t really need documentation. needs_doc = false -- Will behave badly if “/sandbox” occurs twice in title! local sandbox_of = title.fullText:gsub("/sandbox%d*%f[/%z]", "") local diff if title_exists(sandbox_of) then diff = " (" .. compare_pages(title.fullText, sandbox_of, "වෙනස") .. ")" else track("no sandbox of") end links:insert("[[:" .. sandbox_of .. "|sandbox of]]" .. (diff or "")) -- This is a template that can have a sandbox. elseif not args.nosandbox then -- unless we tell it not to local sandbox_title = title.fullText .. "/sandbox" local diff if title_exists(sandbox_title) then diff = " (" .. compare_pages(title.fullText, sandbox_title, "වෙනස") .. ")" end links:insert("[[:" .. sandbox_title .. "|sandbox]]" .. (diff or "")) end end if #links > 0 then output:insert("<dd> ''ප්‍රයෝජනවත් සබැඳි'': " .. links:concat(" • ") .. "</dd>") end end output:insert("</dl>\n") -- Show error from [[Module:category tree/topic cat/data]] on its submodules' -- documentation to, for instance, warn about duplicate labels. if startswith(title.fullText, "Module:category tree/topic/") then local ok, err = pcall(require, "Module:category tree/topic/data") if not ok then output:insert('<span class="error">' .. err .. '</span>\n\n') end end if doc_title and doc_title.exists then -- Override automatic documentation, if present. doc_content = expand_template { title = doc_title.fullText } elseif not doc_content and fallback_docs then doc_content = expand_template { title = fallback_docs, args = { ['user'] = user_name, ['page'] = title.fullText, ['skin name'] = skin_name, }, } end if doc_content then output:insert(doc_content) end output:insert(('\n<%s style="clear: both;" />'):format(args.hr == "below" and "hr" or "br")) if cats_auto_generated and not cats[1] and (not doc_content or not doc_content:find("%[%[Category:")) then if contentModel == "Scribunto" then cats:insert("ප්‍රවර්ගීකරණය නොවූ මොඩියුල") -- elseif title.nsText == "Template" then -- cats:insert("Uncategorized templates") end end if needs_doc then cats:insert("Templates and modules needing documentation") end for _, cat in ipairs(cats) do output:insert("[[Category:" .. cat .. "]]") end output:insert("</div>\n") return output:concat() end function export.module_auto_doc_table() local parts = {} local function ins(text) insert(parts, text) end ins('{|class="wikitable"') ins("! Regex !! Category !! Handling modules") for _, spec in ipairs(module_regex) do local cat_text local cats = spec.cat if cats then local cat_parts = {} if type(cats) == "string" then cats = { cats } end for _, cat in ipairs(cats) do insert(cat_parts, ("<code>%s</code>"):format((cat:gsub("|", "&#124;")))) end cat_text = concat(cat_parts, ", ") else cat_text = "''(unspecified)''" end ins("|-") ins(("| <code>%s</code> || %s || %s"):format(spec.regex, cat_text, is_callable(spec.process) and "''(handled internally)''" or type(spec.process) == "string" and ("[[Module:documentation/functions/%s]]"):format(spec.process) or "''(no documentation generator)''")) end ins("|}") return concat(parts, "\n") end -- Used by {{translit module documentation}}. function export.translitModuleLangList(frame) local pagename, subpage if frame.args[1] then pagename = frame.args[1] else local title = get_current_title() subpage = title.subpageText pagename = title.text if subpage ~= pagename then pagename = title.rootText end end local translitModule = pagename local languageObjects = require("Module:languages/byTranslitModule")(translitModule) local codeInPagename = pagename:match("^([%l-]+)%-.*translit$") local categories = Array() local codeInPagenameInList = false if codeInPagename then if languageObjects[1] and subpage ~= "documentation" then local agreement = languageObjects[2] and "" or "" categories:insert("[[ප්‍රවර්ගය:භාෂා " .. #languageObjects .. " ක් මගින් භාවිතා වන අක්ෂර පරිවර්තන මොඩියුල" .. agreement .. "]]") end languageObjects = Array(languageObjects) :filter( function(lang) local result = lang:getCode() ~= codeInPagename codeInPagenameInList = codeInPagenameInList or result return result end) end if subpage ~= "documentation" then for script_code in pagename:gmatch("%f[^-%z]%u%l%l%l%f[-]") do local script = get_script(script_code) if script then categories:insert("[[Category:" .. script:getCategoryName() .. "]]") end end end if subpage ~= "documentation" and not title_exists("Module:" .. pagename .. "/testcases") then categories:insert("[[Category:Transliteration modules without a testcases subpage]]") end if not languageObjects[1] then return categories:concat() end local langs = Array(languageObjects) :sort( function(lang1, lang2) return lang1:getCode() < lang2:getCode() end) -- This will not error because languageObjects is not empty. :map(languageObjects[1].makeCategoryLink) :serialCommaJoin() return "It is " .. (codeInPagenameInList and "also" or "") .. " used to transliterate " .. langs .. "." .. categories:concat() end -- Used by {{strip diacritics module documentation}}. function export.stripDiacriticsModuleLangList(frame) local pagename, subpage if frame.args[1] then pagename = frame.args[1] else local title = get_current_title() subpage = title.subpageText pagename = title.text if subpage ~= pagename then pagename = title.rootText end end local stripDiacriticsModule = pagename local languageObjects = require("Module:languages/byStripDiacriticsModule")(stripDiacriticsModule) local codeInPagename = pagename:match("^([%l-]+)%-.*stripdiacritics$") local categories = Array() local codeInPagenameInList = false if codeInPagename then if languageObjects[1] and subpage ~= "documentation" then local agreement = languageObjects[2] and "s" or "" categories:insert(("[[ප්‍රවර්ගය:භාෂා %s ක් මගින් භාවිතා වන ප්‍රවේශ නාම-ජනක මොඩියුල%s]]"):format(#languageObjects, agreement)) end languageObjects = Array(languageObjects) :filter( function(lang) local result = lang:getCode() ~= codeInPagename codeInPagenameInList = codeInPagenameInList or result return result end) end if subpage ~= "documentation" then for script_code in pagename:gmatch("%f[^-%z]%u%l%l%l%f[-]") do local script = get_script(script_code) if script then categories:insert("[[Category:" .. script:getCategoryName() .. "]]") end end end if subpage ~= "documentation" and not title_exists("Module:" .. pagename .. "/testcases") then categories:insert("[[Category:Diacritic-stripping modules without a testcases subpage]]") end if not languageObjects[1] then return categories:concat() end local langs = Array(languageObjects) :sort( function(lang1, lang2) return lang1:getCode() < lang2:getCode() end) -- This will not error because languageObjects is not empty. :map(languageObjects[1].makeCategoryLink) :serialCommaJoin() return "It is " .. (codeInPagenameInList and "also" or "") .. " used to strip diacritics for " .. langs .. "." .. categories:concat() end -- Used by {{sortkey module documentation}}. function export.sortkeyModuleLangList(frame) local pagename, subpage if frame.args[1] then pagename = frame.args[1] else local title = get_current_title() subpage = title.subpageText pagename = title.text if subpage ~= pagename then pagename = title.rootText end end local sortkeyModule = pagename local languageObjects = require("Module:languages/bySortkeyModule")(sortkeyModule) local codeInPagename = pagename:match("^([%l-]+)%-.*sortkey$") local categories = Array() local codeInPagenameInList = false if codeInPagename then if languageObjects[1] and subpage ~= "documentation" then local agreement = languageObjects[2] and "s" or "" categories:insert("[[ප්‍රවර්ගය:භාෂා " .. #languageObjects .. " ක් මගින් භාවිතා වන සෝට්කී-ජනක මොඩියුල" .. agreement .. "]]") end languageObjects = Array(languageObjects) :filter( function(lang) local result = lang:getCode() ~= codeInPagename codeInPagenameInList = codeInPagenameInList or result return result end) end if subpage ~= "documentation" then for script_code in pagename:gmatch("%f[^-%z]%u%l%l%l%f[-]") do local script = get_script(script_code) if script then categories:insert("[[Category:" .. script:getCategoryName() .. "]]") end end end if subpage ~= "documentation" and not title_exists("Module:" .. pagename .. "/testcases") then categories:insert("[[Category:Sortkey-generating modules without a testcases subpage]]") end if not languageObjects[1] then return categories:concat() end local langs = Array(languageObjects) :sort( function(lang1, lang2) return lang1:getCode() < lang2:getCode() end) -- This will not error because languageObjects is not empty. :map(languageObjects[1].makeCategoryLink) :serialCommaJoin() return "It is " .. (codeInPagenameInList and "also" or "") .. " used to sort " .. langs .. "." .. categories:concat() end return export e3hwza93077wtb9e1tthhs9dg2xew04 Module:module categorization 828 6853 231497 221400 2025-12-21T05:45:54Z en>Benwing2 0 "entry name" as a concept is going away in favor of "strip diacritics" + logical-to-physical 231497 Scribunto text/plain local export = {} local put_module = "Module:parse utilities" local rsplit = mw.text.split local rfind = mw.ustring.find local unpack = unpack or table.unpack -- Lua 5.2 compatibility local keyword_to_module_type = { common = "Language-specific utility", utilities = "Language-specific utility", headword = "Headword-line", translit = "Transliteration", infl = "Inflection", inflection = "Inflection", decl = "Inflection", declension = "Inflection", adecl = "Inflection", conj = "Inflection", conjugation = "Inflection", noun = "Inflection", nouns = "Inflection", pronoun = "Inflection", pronouns = "Inflection", verb = "Inflection", verbs = "Inflection", adjective = "Inflection", adjectives = "Inflection", adj = "Inflection", nominal = "Inflection", nominals = "Inflection", pron = "Pronunciation", pronun = "Pronunciation", pronunc = "Pronunciation", pronunciation = "Pronunciation", IPA = "Pronunciation", stripdiacritics = "Diacritic-stripping", sortkey = "Sortkey-generating", } -- If a module type is here, we will generate a lang-specific module-type category such as -- [[:Category:Pali inflection modules]]. local module_type_generates_lang_specific_cat = { ["Inflection"] = true, ["Data"] = true, ["Testcase"] = true, } -- If a module type is here, we will generate a lang-specific module-type category such as -- [[:Category:Pali inflection modules]]. The value is a module that returns a function that fetches all the -- languages that use a given module for transliteration/diacritic-stripping/sortkey generation. local languages_from_module_name = { ["Transliteration"] = "Module:languages/byTranslitModule", ["Transliteration testcase"] = "Module:languages/byTranslitModule", ["Diacritic-stripping"] = "Module:languages/byStripDiacriticsModule", ["Sortkey-generating"] = "Module:languages/bySortkeyModule", } local module_type_patterns = { {"/data%f[-/%z]", "Data"}, {"/testcases%f[-/%z]", function(typ) if typ == "Pronunciation" then return "Pronunciation testcase" elseif typ == "Transliteration" then return "Transliteration testcase" else return "Testcase" end end}, } -- Split an argument on comma, but not comma followed by whitespace. local function split_on_comma(val) if val:find(",%s") then return require(put_module).split_on_comma(val) else return rsplit(val, ",") end end local function get_lang_or_script(code) return code == "-" and code or require("Module:languages").getByCode(code, nil, "allow etym") or require("Module:languages").getByCode(code .. "-pro", nil, "allow etym") or require("Module:scripts").getByCode(code) end local function obj_code(obj) if obj == "-" then return obj end return obj:getCode() end local function infer_lang_or_script_code(name) local hyphen_parts = rsplit(name, "%-") for i = #hyphen_parts - 1, 1, -1 do local code = table.concat(hyphen_parts, "-", 1, i) local obj = get_lang_or_script(code) if obj then local rest = table.concat(hyphen_parts, "-", i + 1) return obj, rest end end return nil, nil end local function infer_lang_and_script_codes(name) local objs = {} while true do local obj, rest = infer_lang_or_script_code(name) if not obj then return objs, name end if #objs > 0 and obj:getCode() == "to" then -- skip 'to' in e.g. [[Module:ks-Arab-to-Deva-translit]]; it's not Tongan else table.insert(objs, obj) end name = rest end end --[==[ Main entry point. Can be called from Lua or another module. `return_raw` set to true makes function return a table of categories with {"[[Category:"} and {"]]"} stripped away. It is used by [[Module:documentation]]. ]==] function export.categorize(frame, return_raw, noerror) local categories = {} local function insert_cat(cat, sortkey) for _, existing_cat in ipairs(categories) do if existing_cat.name == cat then return end end table.insert(categories, {name = cat, sort = sortkey}) end local pagename if frame.args[1] then pagename = frame.args[1] end local args if frame.args.is_template then local params = { [1] = {}, -- comma-separated list of languages; by default, inferred from module name ["type"] = {}, [2] = {alias_of = "type"}, ["pagename"] = {}, -- for testing ["return_cats"] = {type = "boolean"}, -- for testing } local parent_args = frame:getParent().args args = require("Module:parameters").process(parent_args, params) else args = {} end pagename = pagename or args.pagename local title if pagename then title = mw.title.new(pagename, 'Module') else title = mw.title.getCurrentTitle() -- Fuckme, sometimes this function is called with a faked frame and a title with the namespace already chopped out, -- so this test cannot be done in that case. if title.nsText ~= "Module" then error(("This template should only be used in the Module namespace, not on page '%s'."):format(title.fullText)) end pagename = title.fullText end local subpage = title.subpageText local null_return_value = return_raw and {} or "" -- To ensure no categories are added on documentation pages. if subpage == "documentation" then return null_return_value end local root_pagename if subpage ~= pagename then root_pagename = title.rootText else root_pagename = pagename end root_pagename = root_pagename:gsub("^Module:", "") -- Take the module type(s) from type= if given, or infer from the pagename. local module_types if args.type then module_types = {} local module_type_specs = split_on_comma(args.type) for _, spec in ipairs(module_type_specs) do local modtype, sortkey = spec:match("^(.-):(.*)$") modtype = modtype or spec sortkey = sortkey and sortkey:gsub("_", " ") or nil table.insert(module_types, {type = modtype, sort = sortkey}) end else local module_type_keyword = root_pagename:match("[-%a]+[- ]([^/]+)%f[/%z]") if not module_type_keyword then if noerror then return null_return_value else error(("Could not extract module type from root pagename '%s'"):format(root_pagename)) end end local module_type = keyword_to_module_type[module_type_keyword] if not module_type then if noerror then return null_return_value else error(("Did not recognize inferred module-type keyword '%s' from root pagename '%s'"):format( module_type_keyword, root_pagename)) end end module_types = {{type = module_type}} end -- Look for additional module type(s) inferred by pattern. for _, pattern_spec in ipairs(module_type_patterns) do local pattern, inferred_type = unpack(pattern_spec) if rfind(pagename, pattern) then local function insert_module_type(typ) require("Module:table").insertIfNot(module_types, typ, {key = function(obj) return obj.type end}) end if type(inferred_type) == "string" then insert_module_type({type = inferred_type}) else local addl_types = {} for _, typ in ipairs(module_types) do table.insert(addl_types, {type = inferred_type(typ.type), sort = typ.sort}) end for _, typ in ipairs(addl_types) do insert_module_type(typ) end end end end -- If 1= specified, take the languages/scripts directly from there. Otherwise, (a) try to extract one or more -- languages/scripts from the pagename (e.g. [[Module:uk-be-headword]] -> Ukrainian and Belarusian (languages); -- [[Module:bho-Kthi-translit]] -> Bhojpuri (language) and Kaithi (script); [[Module:Deva-Kthi-translit]] -> -- Devanagari and Kaithi (scripts)); and (b) if the specified or inferred module type(s) contain a type listed in -- languages_from_module_name[], use the function referenced there to extract additional languages (i.e. all the -- languages that use the module we are processing). local inferred_objs if args[1] then inferred_objs = {} for _, code in ipairs(rsplit(args[1], ",")) do -- We need to have an indicator of families because we allow bare family codes to stand for proto-languages. if code:find("^fam:") then code = code:gsub("^fam:", "") local family = require("Module:families").getByCode(code) or error(("Unrecognized family code '%s' in [[Module:module categorization]]"):format(code)) local descendants = family:getDescendantCodes() for _, desc in ipairs(descendants) do local obj = get_lang_or_script(desc) if obj then -- make sure we skip families without proto-languages table.insert(inferred_objs, obj) end end else local obj = get_lang_or_script(code) if not obj then error(("Unrecognized language or script code '%s'"):format(code)) end table.insert(inferred_objs, obj) end end else inferred_objs = infer_lang_and_script_codes(root_pagename) for _, module_type in ipairs(module_types) do local languages_extractor = languages_from_module_name[module_type.type] if languages_extractor then local langs = require(languages_extractor)(root_pagename) if langs then for _, obj in ipairs(langs) do require("Module:table").insertIfNot(inferred_objs, obj, {key = obj_code}) end end end end if #inferred_objs == 0 then if noerror then return null_return_value else error(("Could not infer any languages or scripts from root pagename '%s'"):format(root_pagename)) end end end if pagename:find("^Module:User:") then insert_cat("User sandbox modules") elseif pagename:find("/sandbox") then insert_cat("Sandbox modules") else for _, module_type in ipairs(module_types) do for _, obj in ipairs(inferred_objs) do local function insert_overall_module_type_cat(sortkey) if module_type.type ~= "-" then insert_cat(module_type.type .. " modules", module_type.sort or sortkey) end end if obj == "-" then insert_overall_module_type_cat() else if obj:hasType("script") and module_type.type ~= "-" then insert_cat(module_type.type .. " modules by script", obj:getCanonicalName()) end local function construct_lang_or_sc_cat(obj, suffix) local prefix if obj:hasType("language") then prefix = obj:getFullName() else prefix = obj:getCategoryName() end return prefix .. " " .. suffix end insert_cat(construct_lang_or_sc_cat(obj, "modules"), module_type.type) insert_overall_module_type_cat(obj:getCanonicalName()) if module_type_generates_lang_specific_cat[module_type.type] then insert_cat(construct_lang_or_sc_cat(obj, mw.getContentLanguage():lcfirst(module_type.type) .. " modules")) end end end end end for i, catspec in ipairs(categories) do if catspec.sort then categories[i] = ("%s|%s"):format(catspec.name, catspec.sort) else categories[i] = catspec.name end end if args.return_cats then return table.concat(categories, ",") elseif return_raw then return categories else for i, cat in ipairs(categories) do categories[i] = "[[Category:" .. cat .. "]]" end return table.concat(categories) end end --[==[Table used in the documentation to {{tl|module cat}}.]==] function export.keyword_to_module_type_table() local parts = {} local function ins(text) table.insert(parts, text) end ins('{|class="wikitable"') ins("! Keyword !! Inferred module type") local keywords = {} for k, v in pairs(keyword_to_module_type) do table.insert(keywords, k) end table.sort(keywords) for _, keyword in ipairs(keywords) do ins("|-") ins(("| <code>%s</code> || <code>%s</code>"):format(keyword, keyword_to_module_type[keyword])) end ins("|}") return table.concat(parts, "\n") end return export 49kgokr598jhf8kb74maya3v6r032gc 231498 231497 2026-04-15T17:10:33Z Lee 19 [[:en:Module:module_categorization]] වෙතින් එක් සංශෝධනයක් 231497 Scribunto text/plain local export = {} local put_module = "Module:parse utilities" local rsplit = mw.text.split local rfind = mw.ustring.find local unpack = unpack or table.unpack -- Lua 5.2 compatibility local keyword_to_module_type = { common = "Language-specific utility", utilities = "Language-specific utility", headword = "Headword-line", translit = "Transliteration", infl = "Inflection", inflection = "Inflection", decl = "Inflection", declension = "Inflection", adecl = "Inflection", conj = "Inflection", conjugation = "Inflection", noun = "Inflection", nouns = "Inflection", pronoun = "Inflection", pronouns = "Inflection", verb = "Inflection", verbs = "Inflection", adjective = "Inflection", adjectives = "Inflection", adj = "Inflection", nominal = "Inflection", nominals = "Inflection", pron = "Pronunciation", pronun = "Pronunciation", pronunc = "Pronunciation", pronunciation = "Pronunciation", IPA = "Pronunciation", stripdiacritics = "Diacritic-stripping", sortkey = "Sortkey-generating", } -- If a module type is here, we will generate a lang-specific module-type category such as -- [[:Category:Pali inflection modules]]. local module_type_generates_lang_specific_cat = { ["Inflection"] = true, ["Data"] = true, ["Testcase"] = true, } -- If a module type is here, we will generate a lang-specific module-type category such as -- [[:Category:Pali inflection modules]]. The value is a module that returns a function that fetches all the -- languages that use a given module for transliteration/diacritic-stripping/sortkey generation. local languages_from_module_name = { ["Transliteration"] = "Module:languages/byTranslitModule", ["Transliteration testcase"] = "Module:languages/byTranslitModule", ["Diacritic-stripping"] = "Module:languages/byStripDiacriticsModule", ["Sortkey-generating"] = "Module:languages/bySortkeyModule", } local module_type_patterns = { {"/data%f[-/%z]", "Data"}, {"/testcases%f[-/%z]", function(typ) if typ == "Pronunciation" then return "Pronunciation testcase" elseif typ == "Transliteration" then return "Transliteration testcase" else return "Testcase" end end}, } -- Split an argument on comma, but not comma followed by whitespace. local function split_on_comma(val) if val:find(",%s") then return require(put_module).split_on_comma(val) else return rsplit(val, ",") end end local function get_lang_or_script(code) return code == "-" and code or require("Module:languages").getByCode(code, nil, "allow etym") or require("Module:languages").getByCode(code .. "-pro", nil, "allow etym") or require("Module:scripts").getByCode(code) end local function obj_code(obj) if obj == "-" then return obj end return obj:getCode() end local function infer_lang_or_script_code(name) local hyphen_parts = rsplit(name, "%-") for i = #hyphen_parts - 1, 1, -1 do local code = table.concat(hyphen_parts, "-", 1, i) local obj = get_lang_or_script(code) if obj then local rest = table.concat(hyphen_parts, "-", i + 1) return obj, rest end end return nil, nil end local function infer_lang_and_script_codes(name) local objs = {} while true do local obj, rest = infer_lang_or_script_code(name) if not obj then return objs, name end if #objs > 0 and obj:getCode() == "to" then -- skip 'to' in e.g. [[Module:ks-Arab-to-Deva-translit]]; it's not Tongan else table.insert(objs, obj) end name = rest end end --[==[ Main entry point. Can be called from Lua or another module. `return_raw` set to true makes function return a table of categories with {"[[Category:"} and {"]]"} stripped away. It is used by [[Module:documentation]]. ]==] function export.categorize(frame, return_raw, noerror) local categories = {} local function insert_cat(cat, sortkey) for _, existing_cat in ipairs(categories) do if existing_cat.name == cat then return end end table.insert(categories, {name = cat, sort = sortkey}) end local pagename if frame.args[1] then pagename = frame.args[1] end local args if frame.args.is_template then local params = { [1] = {}, -- comma-separated list of languages; by default, inferred from module name ["type"] = {}, [2] = {alias_of = "type"}, ["pagename"] = {}, -- for testing ["return_cats"] = {type = "boolean"}, -- for testing } local parent_args = frame:getParent().args args = require("Module:parameters").process(parent_args, params) else args = {} end pagename = pagename or args.pagename local title if pagename then title = mw.title.new(pagename, 'Module') else title = mw.title.getCurrentTitle() -- Fuckme, sometimes this function is called with a faked frame and a title with the namespace already chopped out, -- so this test cannot be done in that case. if title.nsText ~= "Module" then error(("This template should only be used in the Module namespace, not on page '%s'."):format(title.fullText)) end pagename = title.fullText end local subpage = title.subpageText local null_return_value = return_raw and {} or "" -- To ensure no categories are added on documentation pages. if subpage == "documentation" then return null_return_value end local root_pagename if subpage ~= pagename then root_pagename = title.rootText else root_pagename = pagename end root_pagename = root_pagename:gsub("^Module:", "") -- Take the module type(s) from type= if given, or infer from the pagename. local module_types if args.type then module_types = {} local module_type_specs = split_on_comma(args.type) for _, spec in ipairs(module_type_specs) do local modtype, sortkey = spec:match("^(.-):(.*)$") modtype = modtype or spec sortkey = sortkey and sortkey:gsub("_", " ") or nil table.insert(module_types, {type = modtype, sort = sortkey}) end else local module_type_keyword = root_pagename:match("[-%a]+[- ]([^/]+)%f[/%z]") if not module_type_keyword then if noerror then return null_return_value else error(("Could not extract module type from root pagename '%s'"):format(root_pagename)) end end local module_type = keyword_to_module_type[module_type_keyword] if not module_type then if noerror then return null_return_value else error(("Did not recognize inferred module-type keyword '%s' from root pagename '%s'"):format( module_type_keyword, root_pagename)) end end module_types = {{type = module_type}} end -- Look for additional module type(s) inferred by pattern. for _, pattern_spec in ipairs(module_type_patterns) do local pattern, inferred_type = unpack(pattern_spec) if rfind(pagename, pattern) then local function insert_module_type(typ) require("Module:table").insertIfNot(module_types, typ, {key = function(obj) return obj.type end}) end if type(inferred_type) == "string" then insert_module_type({type = inferred_type}) else local addl_types = {} for _, typ in ipairs(module_types) do table.insert(addl_types, {type = inferred_type(typ.type), sort = typ.sort}) end for _, typ in ipairs(addl_types) do insert_module_type(typ) end end end end -- If 1= specified, take the languages/scripts directly from there. Otherwise, (a) try to extract one or more -- languages/scripts from the pagename (e.g. [[Module:uk-be-headword]] -> Ukrainian and Belarusian (languages); -- [[Module:bho-Kthi-translit]] -> Bhojpuri (language) and Kaithi (script); [[Module:Deva-Kthi-translit]] -> -- Devanagari and Kaithi (scripts)); and (b) if the specified or inferred module type(s) contain a type listed in -- languages_from_module_name[], use the function referenced there to extract additional languages (i.e. all the -- languages that use the module we are processing). local inferred_objs if args[1] then inferred_objs = {} for _, code in ipairs(rsplit(args[1], ",")) do -- We need to have an indicator of families because we allow bare family codes to stand for proto-languages. if code:find("^fam:") then code = code:gsub("^fam:", "") local family = require("Module:families").getByCode(code) or error(("Unrecognized family code '%s' in [[Module:module categorization]]"):format(code)) local descendants = family:getDescendantCodes() for _, desc in ipairs(descendants) do local obj = get_lang_or_script(desc) if obj then -- make sure we skip families without proto-languages table.insert(inferred_objs, obj) end end else local obj = get_lang_or_script(code) if not obj then error(("Unrecognized language or script code '%s'"):format(code)) end table.insert(inferred_objs, obj) end end else inferred_objs = infer_lang_and_script_codes(root_pagename) for _, module_type in ipairs(module_types) do local languages_extractor = languages_from_module_name[module_type.type] if languages_extractor then local langs = require(languages_extractor)(root_pagename) if langs then for _, obj in ipairs(langs) do require("Module:table").insertIfNot(inferred_objs, obj, {key = obj_code}) end end end end if #inferred_objs == 0 then if noerror then return null_return_value else error(("Could not infer any languages or scripts from root pagename '%s'"):format(root_pagename)) end end end if pagename:find("^Module:User:") then insert_cat("User sandbox modules") elseif pagename:find("/sandbox") then insert_cat("Sandbox modules") else for _, module_type in ipairs(module_types) do for _, obj in ipairs(inferred_objs) do local function insert_overall_module_type_cat(sortkey) if module_type.type ~= "-" then insert_cat(module_type.type .. " modules", module_type.sort or sortkey) end end if obj == "-" then insert_overall_module_type_cat() else if obj:hasType("script") and module_type.type ~= "-" then insert_cat(module_type.type .. " modules by script", obj:getCanonicalName()) end local function construct_lang_or_sc_cat(obj, suffix) local prefix if obj:hasType("language") then prefix = obj:getFullName() else prefix = obj:getCategoryName() end return prefix .. " " .. suffix end insert_cat(construct_lang_or_sc_cat(obj, "modules"), module_type.type) insert_overall_module_type_cat(obj:getCanonicalName()) if module_type_generates_lang_specific_cat[module_type.type] then insert_cat(construct_lang_or_sc_cat(obj, mw.getContentLanguage():lcfirst(module_type.type) .. " modules")) end end end end end for i, catspec in ipairs(categories) do if catspec.sort then categories[i] = ("%s|%s"):format(catspec.name, catspec.sort) else categories[i] = catspec.name end end if args.return_cats then return table.concat(categories, ",") elseif return_raw then return categories else for i, cat in ipairs(categories) do categories[i] = "[[Category:" .. cat .. "]]" end return table.concat(categories) end end --[==[Table used in the documentation to {{tl|module cat}}.]==] function export.keyword_to_module_type_table() local parts = {} local function ins(text) table.insert(parts, text) end ins('{|class="wikitable"') ins("! Keyword !! Inferred module type") local keywords = {} for k, v in pairs(keyword_to_module_type) do table.insert(keywords, k) end table.sort(keywords) for _, keyword in ipairs(keywords) do ins("|-") ins(("| <code>%s</code> || <code>%s</code>"):format(keyword, keyword_to_module_type[keyword])) end ins("|}") return table.concat(parts, "\n") end return export 49kgokr598jhf8kb74maya3v6r032gc 231499 231498 2026-04-15T17:12:20Z Lee 19 පැරණි සංස්කරණයකින් ගත් කොටස්... 231499 Scribunto text/plain local m_sinhala = require("Module:sinhala") local export = {} local put_module = "Module:parse utilities" local rsplit = mw.text.split local rfind = mw.ustring.find local unpack = unpack or table.unpack -- Lua 5.2 compatibility local keyword_to_module_type = { common = "Language-specific utility", utilities = "Language-specific utility", headword = "Headword-line", translit = "Transliteration", infl = "Inflection", inflection = "Inflection", decl = "Inflection", declension = "Inflection", adecl = "Inflection", conj = "Inflection", conjugation = "Inflection", noun = "Inflection", nouns = "Inflection", pronoun = "Inflection", pronouns = "Inflection", verb = "Inflection", verbs = "Inflection", adjective = "Inflection", adjectives = "Inflection", adj = "Inflection", nominal = "Inflection", nominals = "Inflection", pron = "Pronunciation", pronun = "Pronunciation", pronunc = "Pronunciation", pronunciation = "Pronunciation", IPA = "Pronunciation", stripdiacritics = "Diacritic-stripping", sortkey = "Sortkey-generating", } -- If a module type is here, we will generate a lang-specific module-type category such as -- [[:Category:Pali inflection modules]]. local module_type_generates_lang_specific_cat = { ["Inflection"] = true, ["Data"] = true, ["Testcase"] = true, } -- If a module type is here, we will generate a lang-specific module-type category such as -- [[:Category:Pali inflection modules]]. The value is a module that returns a function that fetches all the -- languages that use a given module for transliteration/diacritic-stripping/sortkey generation. local languages_from_module_name = { ["Transliteration"] = "Module:languages/byTranslitModule", ["Transliteration testcase"] = "Module:languages/byTranslitModule", ["Diacritic-stripping"] = "Module:languages/byStripDiacriticsModule", ["Sortkey-generating"] = "Module:languages/bySortkeyModule", } local module_type_patterns = { {"/data%f[-/%z]", "Data"}, {"/testcases%f[-/%z]", function(typ) if typ == "Pronunciation" then return "Pronunciation testcase" elseif typ == "Transliteration" then return "Transliteration testcase" else return "Testcase" end end}, } -- Split an argument on comma, but not comma followed by whitespace. local function split_on_comma(val) if val:find(",%s") then return require(put_module).split_on_comma(val) else return rsplit(val, ",") end end local function get_lang_or_script(code) return code == "-" and code or require("Module:languages").getByCode(code, nil, "allow etym") or require("Module:languages").getByCode(code .. "-pro", nil, "allow etym") or require("Module:scripts").getByCode(code) end local function obj_code(obj) if obj == "-" then return obj end return obj:getCode() end local function infer_lang_or_script_code(name) local hyphen_parts = rsplit(name, "%-") for i = #hyphen_parts - 1, 1, -1 do local code = table.concat(hyphen_parts, "-", 1, i) local obj = get_lang_or_script(code) if obj then local rest = table.concat(hyphen_parts, "-", i + 1) return obj, rest end end return nil, nil end local function infer_lang_and_script_codes(name) local objs = {} while true do local obj, rest = infer_lang_or_script_code(name) if not obj then return objs, name end if #objs > 0 and obj:getCode() == "to" then -- skip 'to' in e.g. [[Module:ks-Arab-to-Deva-translit]]; it's not Tongan else table.insert(objs, obj) end name = rest end end --[==[ Main entry point. Can be called from Lua or another module. `return_raw` set to true makes function return a table of categories with {"[[Category:"} and {"]]"} stripped away. It is used by [[Module:documentation]]. ]==] function export.categorize(frame, return_raw, noerror) local categories = {} local function insert_cat(cat, sortkey) for _, existing_cat in ipairs(categories) do if existing_cat.name == cat then return end end table.insert(categories, {name = cat, sort = sortkey}) end local pagename if frame.args[1] then pagename = frame.args[1] end local args if frame.args.is_template then local params = { [1] = {}, -- comma-separated list of languages; by default, inferred from module name ["type"] = {}, [2] = {alias_of = "type"}, ["pagename"] = {}, -- for testing ["return_cats"] = {type = "boolean"}, -- for testing } local parent_args = frame:getParent().args args = require("Module:parameters").process(parent_args, params) else args = {} end pagename = pagename or args.pagename local title if pagename then title = mw.title.new(pagename, 'Module') else title = mw.title.getCurrentTitle() -- Fuckme, sometimes this function is called with a faked frame and a title with the namespace already chopped out, -- so this test cannot be done in that case. if title.nsText ~= "Module" then error(("This template should only be used in the Module namespace, not on page '%s'."):format(title.fullText)) end pagename = title.fullText end local subpage = title.subpageText local null_return_value = return_raw and {} or "" -- To ensure no categories are added on documentation pages. if subpage == "documentation" then return null_return_value end local root_pagename if subpage ~= pagename then root_pagename = title.rootText else root_pagename = pagename end root_pagename = root_pagename:gsub("^Module:", "") -- Take the module type(s) from type= if given, or infer from the pagename. local module_types if args.type then module_types = {} local module_type_specs = split_on_comma(args.type) for _, spec in ipairs(module_type_specs) do local modtype, sortkey = spec:match("^(.-):(.*)$") modtype = modtype or spec sortkey = sortkey and sortkey:gsub("_", " ") or nil table.insert(module_types, {type = modtype, sort = sortkey}) end else local module_type_keyword = root_pagename:match("[-%a]+[- ]([^/]+)%f[/%z]") if not module_type_keyword then if noerror then return null_return_value else error(("Could not extract module type from root pagename '%s'"):format(root_pagename)) end end local module_type = keyword_to_module_type[module_type_keyword] if not module_type then if noerror then return null_return_value else error(("Did not recognize inferred module-type keyword '%s' from root pagename '%s'"):format( module_type_keyword, root_pagename)) end end module_types = {{type = module_type}} end -- Look for additional module type(s) inferred by pattern. for _, pattern_spec in ipairs(module_type_patterns) do local pattern, inferred_type = unpack(pattern_spec) if rfind(pagename, pattern) then local function insert_module_type(typ) require("Module:table").insertIfNot(module_types, typ, {key = function(obj) return obj.type end}) end if type(inferred_type) == "string" then insert_module_type({type = inferred_type}) else local addl_types = {} for _, typ in ipairs(module_types) do table.insert(addl_types, {type = inferred_type(typ.type), sort = typ.sort}) end for _, typ in ipairs(addl_types) do insert_module_type(typ) end end end end -- If 1= specified, take the languages/scripts directly from there. Otherwise, (a) try to extract one or more -- languages/scripts from the pagename (e.g. [[Module:uk-be-headword]] -> Ukrainian and Belarusian (languages); -- [[Module:bho-Kthi-translit]] -> Bhojpuri (language) and Kaithi (script); [[Module:Deva-Kthi-translit]] -> -- Devanagari and Kaithi (scripts)); and (b) if the specified or inferred module type(s) contain a type listed in -- languages_from_module_name[], use the function referenced there to extract additional languages (i.e. all the -- languages that use the module we are processing). local inferred_objs if args[1] then inferred_objs = {} for _, code in ipairs(rsplit(args[1], ",")) do -- We need to have an indicator of families because we allow bare family codes to stand for proto-languages. if code:find("^fam:") then code = code:gsub("^fam:", "") local family = require("Module:families").getByCode(code) or error(("Unrecognized family code '%s' in [[Module:module categorization]]"):format(code)) local descendants = family:getDescendantCodes() for _, desc in ipairs(descendants) do local obj = get_lang_or_script(desc) if obj then -- make sure we skip families without proto-languages table.insert(inferred_objs, obj) end end else local obj = get_lang_or_script(code) if not obj then error(("Unrecognized language or script code '%s'"):format(code)) end table.insert(inferred_objs, obj) end end else inferred_objs = infer_lang_and_script_codes(root_pagename) for _, module_type in ipairs(module_types) do local languages_extractor = languages_from_module_name[module_type.type] if languages_extractor then local langs = require(languages_extractor)(root_pagename) if langs then for _, obj in ipairs(langs) do require("Module:table").insertIfNot(inferred_objs, obj, {key = obj_code}) end end end end if #inferred_objs == 0 then if noerror then return null_return_value else error(("Could not infer any languages or scripts from root pagename '%s'"):format(root_pagename)) end end end if pagename:find("^Module:User:") then insert_cat("User sandbox modules") elseif pagename:find("/sandbox") then insert_cat("වැලිපිලි මොඩියුල") else for _, module_type in ipairs(module_types) do for _, obj in ipairs(inferred_objs) do local function insert_overall_module_type_cat(sortkey) if module_type.type ~= "-" then insert_cat(m_sinhala.sinhala_adjective(module_type.type) .. " මොඩියුල", module_type.sort or sortkey) end end if obj == "-" then insert_overall_module_type_cat() else if obj:hasType("script") and module_type.type ~= "-" then insert_cat("අක්ෂරක්‍රමය අනුව " .. m_sinhala.sinhala_adjective(module_type.type) .. " මොඩියුල", obj:getCanonicalName()) end local function construct_lang_or_sc_cat(obj, suffix) local prefix local extra = "" if obj:hasType("language") then prefix = obj:getFullName() else prefix = obj:getCategoryName() extra = "පිළිබඳ " end return prefix .. " " .. extra .. suffix end insert_cat(construct_lang_or_sc_cat(obj, "මොඩියුල"), module_type.type) insert_overall_module_type_cat(obj:getCanonicalName()) if module_type_generates_lang_specific_cat[module_type.type] then insert_cat(construct_lang_or_sc_cat(obj, m_sinhala.sinhala_adjective(mw.getContentLanguage():lcfirst(module_type.type)) .. " මොඩියුල")) end end end end end for i, catspec in ipairs(categories) do if catspec.sort then categories[i] = ("%s|%s"):format(catspec.name, catspec.sort) else categories[i] = catspec.name end end if args.return_cats then return table.concat(categories, ",") elseif return_raw then return categories else for i, cat in ipairs(categories) do categories[i] = "[[Category:" .. cat .. "]]" end return table.concat(categories) end end --[==[Table used in the documentation to {{tl|module cat}}.]==] function export.keyword_to_module_type_table() local parts = {} local function ins(text) table.insert(parts, text) end ins('{|class="wikitable"') ins("! Keyword !! Inferred module type") local keywords = {} for k, v in pairs(keyword_to_module_type) do table.insert(keywords, k) end table.sort(keywords) for _, keyword in ipairs(keywords) do ins("|-") ins(("| <code>%s</code> || <code>%s</code>"):format(keyword, keyword_to_module_type[keyword])) end ins("|}") return table.concat(parts, "\n") end return export f9xcqejfjuocr1gx2879yvfqfrs5esd කයිපය 0 114235 231541 168363 2026-04-16T11:52:27Z WrdSrchSi 3305 + etymon|si + මූලාශ්‍ර 231541 wikitext text/x-wiki == සිංහල == === නිරුක්තිය === {{etymon|si|id=කයිප අ|af|si>කයිප#නාම ප්‍රකෘතිය>ප්‍රකෘතිය|si>-අ>ප්‍රත්‍ය|tree=1}} ''කඩුල්ල ඉදිරියෙහි පය, ක.ඉ.ප. > [[කයිප#නාම ප්‍රකෘතිය|කයිප]] (ප්‍රකෘතිය) + [[-අ]] (ප්‍රත්‍යය) > කයිප + '''ය්''' ([[ව්‍යඤ්ජනාගමනය]]) + අ >'' [[කයිපය]] === නාම පදය === {{si-noun}} # ක්‍රිකට් ක්‍රීඩාවේ දැවී යන ආකාරයකි. :# [[LBW]], [[Leg before wicket]] === මූලාශ්‍ර === <references/> * "17. [https://www.ahubudu.lk/assets/pdf/EDIYA_Veluma4_Kalaba1.pdf]", '''''පහදුව''''', — එඩිය, 4 වෙළුම, 1 කලබ, 17 පිටුව, 2506 බක් - 1963 අප්රේල් pa3uh4ou47khi5fi0wsjcqwkmrhh5fx විනිසනවා 0 114617 231464 215089 2026-04-15T12:54:32Z WrdSrchSi 3305 + head|si|verb forms|head= + infl of|si 231464 wikitext text/x-wiki ==සිංහල== ===ක්‍රියා පදය=== {{head|si|verb forms|head=විනිසනවා|අතීත සකර්මක|විනිසුවා?|අනතීත කර්ම කාරක|විනිසෙනවා|ප්‍රයුක්ත|විනිසවනවා}} # {{infl of|si|විනිසයි||භාව නාම ක්‍රියා}} <!-- විනිසයි - විනිසති - විනිසි යැ - විනිසි හ. --> [[ප්‍රවර්ගය: සිංහල භාව නාම ක්‍රියා පද]] 5r25hjofoz12s64l08hlhn0uubdlt3d ප්‍රවර්ගය:පාලි non-lemma forms 14 120918 231527 185194 2026-04-16T10:50:39Z Pinthura 2424 සේවා: මෘදු ප්‍රවර්ග යළියොමුවක් ලෙස වෙනස් කිරීම. 231527 wikitext text/x-wiki {{category redirect|පාලි ලෙමා-නොවන ස්වරූප}} {{ස්වයංක්‍රීය සේවා ක්‍රියාව නිවැරදි බව තහවුරු කරන්න}} falncre65vnqznyg4sd5290xv4tkj8n නැඟෙනවා 0 127118 231463 216237 2026-04-15T12:46:40Z WrdSrchSi 3305 + head|si|verb forms|head= + infl of|si 231463 wikitext text/x-wiki ==සිංහල== ===ක්‍රියා පද=== {{head|si|verb forms|head=නැඟෙනවා}} # {{infl of|si|නැඟේ||භාව නාම ක්‍රියා}} [[ප්‍රවර්ගය: සිංහල භාව නාම ක්‍රියා පද]] c6nlykstmhxb0jg8r2cdisgrl0hd1dd විදුතවරයි 0 129934 231467 204500 2026-04-15T13:18:55Z WrdSrchSi 3305 + වෙනත් ආකාර + etymon|si + IPA|si + head|si|verb (new) + definition (changed) + ආශ්‍රිත පද + මූලාශ්‍ර 231467 wikitext text/x-wiki ==සිංහල== ===වෙනත් ආකාර=== * {{l|si|විදුතවරා#ක්‍රියා පද|විදුතවරා}} === නිරුක්තිය === {{etymon|si|id=විදුතවරා යි|af|si>විදුතවරා#ක්‍රියා පද>පදය|si>යි>පදය|tree=1}} ''[[විදුතවරා#ක්‍රියා පද|විදුතවරා]] (වෙනත් ආකාර) + [[යි]] >'' [[විදුතවරයි]] ===උච්චාරණය=== * {{IPA|si|/ˈʋidutaʋəraji/}} ===ක්‍රියා පද=== {{head|si|verb |අතීත-සකර්මක අන්‍ය පුරුෂ ඒකවචන|විදුතැවැරී |අනතීත-කර්ම කාරක අන්‍ය පුරුෂ ඒකවචන|විදුතැවැරේ |අතීත-කර්ම කාරක අන්‍ය පුරුෂ ඒකවචන|විදුතැවැරිණි |අනතීත-සකර්මක අන්‍ය පුරුෂ බහුවචන|විදුතවරති |අනතීත-සකර්මක උත්තම ඒකවචන|විදුතවරමි |අනතීත-සකර්මක උත්තම බහුවචන|විදුතවරමු |අනතීත-ප්‍රයුක්ත|විදුතවරවයි |අනතීත-භාව නාම ක්‍රියා|විදුතවරනවා |අනතීත-කාරක නාම ක්‍රියා|විදුතවරන්නේ |වර්තමාන-නිපාත|විදුතවරමින් }} # {{lb|si||සකර්මක}} [[electroplate#ක්‍රියා පද|electroplate]] කරයි. ====ආශ්‍රිත පද==== {{top2}} * {{l|si|විදුතැවැරූ}} * {{l|si|විදුතවරා#ක්‍රියා පද|විදුතවරා}} {{bottom}} ==== පරිවර්තන ==== {{trans-top|විදුතවරයි}} * ඉංග්‍රීසි: {{t|en|electroplates}} {{trans-bottom}} === මූලාශ්‍ර === <references/> * "[[තවර#ධාතු ප්‍රකෘතිය|තවර]] [https://tipitaka.lk/library/963]", කුමාරණතුංග මුනිදාස, '''''ක්‍රියා විවරණය''''', — 141 පිටුව [[ප්‍රවර්ගය: සිංහල ආඛ්‍යාත ක්‍රියා පද]] <!-- ==== පරිවර්තන ==== {{trans-top|පරිවර්තන}} * ඉංග්‍රීසි: {{t|en|<<ඉංග්‍රීසි වචනය>>}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|<<ආශ්‍රිත පවතින වෙනත් වචන>>}} --> 7x1lgxwe04nh03j20jl5wz2gt6dp5gn 231469 231467 2026-04-15T13:35:30Z WrdSrchSi 3305 /* මූලාශ්‍ර */ 1 ref. 231469 wikitext text/x-wiki ==සිංහල== ===වෙනත් ආකාර=== * {{l|si|විදුතවරා#ක්‍රියා පද|විදුතවරා}} === නිරුක්තිය === {{etymon|si|id=විදුතවරා යි|af|si>විදුතවරා#ක්‍රියා පද>පදය|si>යි>පදය|tree=1}} ''[[විදුතවරා#ක්‍රියා පද|විදුතවරා]] (වෙනත් ආකාර) + [[යි]] >'' [[විදුතවරයි]] ===උච්චාරණය=== * {{IPA|si|/ˈʋidutaʋəraji/}} ===ක්‍රියා පද=== {{head|si|verb |අතීත-සකර්මක අන්‍ය පුරුෂ ඒකවචන|විදුතැවැරී |අනතීත-කර්ම කාරක අන්‍ය පුරුෂ ඒකවචන|විදුතැවැරේ |අතීත-කර්ම කාරක අන්‍ය පුරුෂ ඒකවචන|විදුතැවැරිණි |අනතීත-සකර්මක අන්‍ය පුරුෂ බහුවචන|විදුතවරති |අනතීත-සකර්මක උත්තම ඒකවචන|විදුතවරමි |අනතීත-සකර්මක උත්තම බහුවචන|විදුතවරමු |අනතීත-ප්‍රයුක්ත|විදුතවරවයි |අනතීත-භාව නාම ක්‍රියා|විදුතවරනවා |අනතීත-කාරක නාම ක්‍රියා|විදුතවරන්නේ |වර්තමාන-නිපාත|විදුතවරමින් }} # {{lb|si||සකර්මක}} [[electroplate#ක්‍රියා පද|electroplate]] කරයි. ====ආශ්‍රිත පද==== {{top2}} * {{l|si|විදුතැවැරූ}} * {{l|si|විදුතවරා#ක්‍රියා පද|විදුතවරා}} {{bottom}} ==== පරිවර්තන ==== {{trans-top|විදුතවරයි}} * ඉංග්‍රීසි: {{t|en|electroplates}} {{trans-bottom}} === මූලාශ්‍ර === <references/> * "[[තවර#ධාතු ප්‍රකෘතිය|තවර]] [https://tipitaka.lk/library/963]", කුමාරණතුංග මුනිදාස, '''''ක්‍රියා විවරණය''''', — 141 පිටුව * "Electroplate (v) [https://www.ahubudu.lk/assets/pdf/EDIYA_Veluma4_Kalaba2.pdf]", '''''පාරිභාෂික පටලැවිල්ල''''', — එඩිය, 4 වෙළුම, 2 කලබ, 51 පිටුව [[ප්‍රවර්ගය: සිංහල ආඛ්‍යාත ක්‍රියා පද]] <!-- === අමතර අවධානයට === * {{l|si|<<ආශ්‍රිත පවතින වෙනත් වචන>>}} --> km8xyfyh6rvxry2po85l9al8ie1d7sn 231470 231469 2026-04-15T13:42:24Z WrdSrchSi 3305 + අමතර අවධානයට 231470 wikitext text/x-wiki ==සිංහල== ===වෙනත් ආකාර=== * {{l|si|විදුතවරා#ක්‍රියා පද|විදුතවරා}} === නිරුක්තිය === {{etymon|si|id=විදුතවරා යි|af|si>විදුතවරා#ක්‍රියා පද>පදය|si>යි>පදය|tree=1}} ''[[විදුතවරා#ක්‍රියා පද|විදුතවරා]] (වෙනත් ආකාර) + [[යි]] >'' [[විදුතවරයි]] ===උච්චාරණය=== * {{IPA|si|/ˈʋidutaʋəraji/}} ===ක්‍රියා පද=== {{head|si|verb |අතීත-සකර්මක අන්‍ය පුරුෂ ඒකවචන|විදුතැවැරී |අනතීත-කර්ම කාරක අන්‍ය පුරුෂ ඒකවචන|විදුතැවැරේ |අතීත-කර්ම කාරක අන්‍ය පුරුෂ ඒකවචන|විදුතැවැරිණි |අනතීත-සකර්මක අන්‍ය පුරුෂ බහුවචන|විදුතවරති |අනතීත-සකර්මක උත්තම ඒකවචන|විදුතවරමි |අනතීත-සකර්මක උත්තම බහුවචන|විදුතවරමු |අනතීත-ප්‍රයුක්ත|විදුතවරවයි |අනතීත-භාව නාම ක්‍රියා|විදුතවරනවා |අනතීත-කාරක නාම ක්‍රියා|විදුතවරන්නේ |වර්තමාන-නිපාත|විදුතවරමින් }} # {{lb|si||සකර්මක}} [[electroplate#ක්‍රියා පද|electroplate]] කරයි. ====ආශ්‍රිත පද==== {{top2}} * {{l|si|විදුතැවැරූ}} * {{l|si|විදුතවරා#ක්‍රියා පද|විදුතවරා}} {{bottom}} ==== පරිවර්තන ==== {{trans-top|විදුතවරයි}} * ඉංග්‍රීසි: {{t|en|electroplates}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|පැහැතවරයි#ක්‍රියා පද|පැහැතවරයි}} === මූලාශ්‍ර === <references/> * "[[තවර#ධාතු ප්‍රකෘතිය|තවර]] [https://tipitaka.lk/library/963]", කුමාරණතුංග මුනිදාස, '''''ක්‍රියා විවරණය''''', — 141 පිටුව * "Electroplate (v) [https://www.ahubudu.lk/assets/pdf/EDIYA_Veluma4_Kalaba2.pdf]", '''''පාරිභාෂික පටලැවිල්ල''''', — එඩිය, 4 වෙළුම, 2 කලබ, 51 පිටුව [[ප්‍රවර්ගය: සිංහල ආඛ්‍යාත ක්‍රියා පද]] 32v8dg2qh3l68ml0xwnaimlvr2vqlk1 වුසූ 0 131236 231468 207764 2026-04-15T13:27:06Z WrdSrchSi 3305 + IPA|si + head|si|verb forms + alternative form of|si + ප්‍රවර්ගය 231468 wikitext text/x-wiki ==සිංහල== ===උච්චාරණය=== * {{IPA|si|/ˈʋusuː/}} ===ක්‍රියා පද=== {{head|si|verb forms}} # {{alternative form of|si|විසූ#ක්‍රියා පද}} [[ප්‍රවර්ගය: සිංහල ආඛ්‍යාත ක්‍රියා පද]] f3d28l0e76lfltetsjpsd5uppxnrwk6 විදුතවරනවා 0 135043 231465 216631 2026-04-15T12:57:05Z WrdSrchSi 3305 + head|si|verb forms|head= 231465 wikitext text/x-wiki == සිංහල == === ක්‍රියා පද === {{head|si|verb forms|head=විදුතවරනවා}} # {{infl of|si|විදුතවරයි||භාව නාම ක්‍රියා}} [[ප්‍රවර්ගය: සිංහල භාව නාම ක්‍රියා පද]] aaq43yni2n36w37xzwsomaz5fyi0lig නැඟෙති 0 135651 231462 217906 2026-04-15T12:43:02Z WrdSrchSi 3305 + head|si|verb forms|head= (ක්‍රියා පද 1, ක්‍රියා පද 2) 231462 wikitext text/x-wiki ==සිංහල== ===උච්චාරණය=== * {{IPA|si|/ˈnæᵑɡeti/}} ===ක්‍රියා පද 1=== {{head|si|verb forms|head=නැඟෙති|අතීත-අකර්මක අත් පද අන්‍ය පුරුෂ බහුවචන|නැඟුණු#ක්‍රියා පද 1}} # {{lb|si|අකර්මක|අත් පද}} {{infl of|si|නැඟේ#ක්‍රියා පද 1||අනතීත|අන්‍ය පුරුෂ|බහුවචන|ආඛ්‍යාත ක්‍රියා}} ===ක්‍රියා පද 2=== {{head|si|verb forms|head=නැඟෙති|අතීත-කර්ම කාරක අන්‍ය පුරුෂ බහුවචන|නැඟුණු#ක්‍රියා පද 2|අනතීත-සකර්මක අන්‍ය පුරුෂ බහුවචන|නඟති#ක්‍රියා පද}} # {{infl of|si|නඟයි||අනතීත|කර්ම කාරක|අන්‍ය පුරුෂ|බහුවචන|ආඛ්‍යාත ක්‍රියා}} [[ප්‍රවර්ගය: සිංහල ආඛ්‍යාත ක්‍රියා පද]] 2vrtwxo619qhv1livmv00vzpcfj8us4 හළ 0 137431 231531 221550 2026-04-16T11:06:57Z WrdSrchSi 3305 + IPA|si + alternative form of|si + ප්‍රවර්ගය + අමතර අවධානයට + /*මූලාශ්‍ර*/ ref#2 231531 wikitext text/x-wiki ==සිංහල== ===උච්චාරණය=== * {{IPA|si|/ˈɦaɭə/}} ===ක්‍රියා පද=== {{head|si|verb forms}} # {{alternative form of|si|හළි#ක්‍රියා පද}} # {{alternative form of|si|හළු#ක්‍රියා පද}} === අමතර අවධානයට === * {{l|si|හල#නාම පදය|හල}} === මූලාශ්‍ර === <references/> * "11. කිරි හළ ද; කිරි හල ද? [https://www.ahubudu.lk/assets/pdf/EDIYA_Veluma4_Kalaba1.pdf]", '''''පහදුව''''', — එඩිය, 4 වෙළුම, 1 කලබ, 15 පිටුව, 2506 බක් - 1963 අප්රේල් * "[[හර#ධාතු ප්‍රකෘතිය|හර]] [https://tipitaka.lk/library/963]", කුමාරණතුංග මුනිදාස, ක්‍රියා විවරණය, — පිටුව 220 [[ප්‍රවර්ගය: සිංහල ආඛ්‍යාත ක්‍රියා පද]] nabjuo7ji0jfanjj0nr39lgu5gbrco1 හළි 0 142069 231523 231442 2026-04-16T10:44:10Z WrdSrchSi 3305 + වෙනත් ආකාර + IPA|si + infl of|si + ප්‍රවර්ගය 231523 wikitext text/x-wiki ==සිංහල== ===වෙනත් ආකාර=== * {{l|si|හළ#ක්‍රියා පද|හළ}} ===උච්චාරණය=== * {{IPA|si|/ˈɦaɭi/}} ===ක්‍රියා පද=== {{head|si|verb forms}} # {{lb|si|අකර්මක}} {{infl of|si|හරී#ක්‍රියා පද||අතීත|අන්‍ය පුරුෂ|ඒකවචන|ආඛ්‍යාත ක්‍රියා}} [[ප්‍රවර්ගය: සිංහල ආඛ්‍යාත ක්‍රියා පද]] 96ukoil2k2z73rmeq6qqkio42iw3c0y 231529 231523 2026-04-16T10:52:08Z WrdSrchSi 3305 /* ක්‍රියා පද */ Fixed lb|si 231529 wikitext text/x-wiki ==සිංහල== ===වෙනත් ආකාර=== * {{l|si|හළ#ක්‍රියා පද|හළ}} ===උච්චාරණය=== * {{IPA|si|/ˈɦaɭi/}} ===ක්‍රියා පද=== {{head|si|verb forms}} # {{lb|si|සකර්මක}} {{infl of|si|හරී#ක්‍රියා පද||අතීත|අන්‍ය පුරුෂ|ඒකවචන|ආඛ්‍යාත ක්‍රියා}} [[ප්‍රවර්ගය: සිංහල ආඛ්‍යාත ක්‍රියා පද]] c62rawqon92126m76jh6vdoe78ujlz9 හළු 0 142070 231530 231443 2026-04-16T10:53:01Z WrdSrchSi 3305 + වෙනත් ආකාර + IPA|si + head|si|verb forms|head= + infl of|si + ප්‍රවර්ගය 231530 wikitext text/x-wiki ==සිංහල== ===වෙනත් ආකාර=== * {{l|si|හළ#ක්‍රියා පද|හළ}} ===උච්චාරණය=== * {{IPA|si|/ˈɦaɭu/}} ===ක්‍රියා පද=== {{head|si|verb forms|head=හළු|අනතීත-සකර්මක අන්‍ය පුරුෂ බහුවචන|හරිති#ක්‍රියා පද}} # {{lb|si|සකර්මක}} {{infl of|si|හරී#ක්‍රියා පද||අතීත|අන්‍ය පුරුෂ|බහුවචන|ආඛ්‍යාත ක්‍රියා}} [[ප්‍රවර්ගය: සිංහල ආඛ්‍යාත ක්‍රියා පද]] t1c7o8075ckx2qrosnb9numwajwly2g electroplate 0 142076 231466 2026-04-15T13:04:15Z WrdSrchSi 3305 Page created: + en-verb + wikitable 231466 wikitext text/x-wiki ==ඉංග්‍රීසි== ===ක්‍රියා පද=== {{en-verb}} # {{{1|{{rfdef|en}}}}} #: ''{{{2|<!-- example sentence -->}}}'' {| class="wikitable" |+ |- ! !!අන්‍ය පුරුෂ<br>ඒකවචන!!අන්‍ය පුරුෂ<br>බහුවචන!!උත්තම පුරුෂ<br>ඒකවචන!!උත්තම පුරුෂ<br>බහුවචන |- |අනතීත-සකර්මක||[[විදුතවරයි]]||[[විදුතවරති]]||[[විදුතවරමි]]||[[විදුතවරමු]] |- |} afmmkfhl8jlvbpdxbivmxpevsves99y ප්‍රවර්ගය:පාලි සර්වනාම, බ්‍රාහ්මී අක්ෂරක්‍රමය භාවිතා කොට ලියන ලද 14 142077 231471 2026-04-15T14:42:36Z Pinthura 2424 සේවා: [[:[[en:Category:Pali pronouns in Brahmi script]]]] තුළ තිබූ පෙළ මෙහි ඇතුළු කිරීම. 231471 wikitext text/x-wiki {{auto cat}} eomzlm5v4j7ond1phrju7cnue91g5qx 231472 231471 2026-04-15T14:42:46Z Pinthura 2424 Pinthura විසින් [[ප්‍රවර්ගය:Pali pronouns in Brahmi script]] සිට [[ප්‍රවර්ගය:පාලි සර්වනාම, බ්‍රාහ්මී අක්ෂරක්‍රමය භාවිතා කොට ලියන ලද]] වෙත පිටුව ගෙන යන ලදී: සේවා: නව ප්‍රවර්ග නාමය වෙත ගෙනයාම. 231471 wikitext text/x-wiki {{auto cat}} eomzlm5v4j7ond1phrju7cnue91g5qx 231524 231472 2026-04-16T10:50:09Z Pinthura 2424 සේවා: ඉංග්‍රීසි ව්‍යාපෘතිය වෙත සබැඳියක් එක් කිරීම. 231524 wikitext text/x-wiki {{auto cat}} [[en:Category:Pali pronouns in Brahmi script]] grxpff7pgvmepsqpz1kcgz9pkq63fpu ප්‍රවර්ගය:Pali pronouns in Brahmi script 14 142078 231473 2026-04-15T14:42:47Z Pinthura 2424 Pinthura විසින් [[ප්‍රවර්ගය:Pali pronouns in Brahmi script]] සිට [[ප්‍රවර්ගය:පාලි සර්වනාම, බ්‍රාහ්මී අක්ෂරක්‍රමය භාවිතා කොට ලියන ලද]] වෙත පිටුව ගෙන යන ලදී: සේවා: නව ප්‍රවර්ග නාමය වෙත ගෙනයාම. 231473 wikitext text/x-wiki #යළියොමුව [[:ප්‍රවර්ගය:පාලි සර්වනාම, බ්‍රාහ්මී අක්ෂරක්‍රමය භාවිතා කොට ලියන ලද]] s8pfqy9gs55i18wu1ef4wsgjekxz0xy සැකිල්ල:my-proper noun 10 142079 231474 2025-12-31T08:03:56Z en>WingerBot 0 misc cleanup of uses of {{PAGENAME}}, sometimes reworking {{head}} calls to incorporate inflections into {{head}} (manually assisted) 231474 wikitext text/x-wiki {{head|my|proper noun|sort={{{sort|}}}|head={{{sg|{{{head|}}}}}}|tr={{{tr|}}}<!-- |cat2={{#if:{{{tr|}}}||terms lacking transliteration}}-->}}<noinclude>{{tcat|hw}}</noinclude> 01wfc2byhncz2bn14w048x6y9a48jp9 231475 231474 2026-04-15T14:59:14Z Lee 19 [[:en:Template:my-proper_noun]] වෙතින් එක් සංශෝධනයක් 231474 wikitext text/x-wiki {{head|my|proper noun|sort={{{sort|}}}|head={{{sg|{{{head|}}}}}}|tr={{{tr|}}}<!-- |cat2={{#if:{{{tr|}}}||terms lacking transliteration}}-->}}<noinclude>{{tcat|hw}}</noinclude> 01wfc2byhncz2bn14w048x6y9a48jp9 Module:ar-stripdiacritics 828 142080 231476 2025-12-21T06:12:18Z en>Benwing2 0 "entry name" as a concept is going away in favor of "strip diacritics" + logical-to-physical 231476 Scribunto text/plain local m_str_utils = require("Module:string utilities") local find = m_str_utils.find local gsub = m_str_utils.gsub local U = m_str_utils.char local taTwiil = U(0x640) local waSla = U(0x671) -- diacritics ordinarily removed by entry_name replacements local Arabic_diacritics = U(0x64B, 0x64C, 0x64D, 0x64E, 0x64F, 0x650, 0x651, 0x652, 0x670) -- replace alif waṣl with alif -- remove tatweel and diacritics: fathatan, dammatan, kasratan, fatha, -- damma, kasra, shadda, sukun, superscript (dagger) alef local replacements = { from = {U(0x0671), "[" .. U(0x0640, 0x064B) .. "-" .. U(0x0652, 0x0670, 0x0656) .. "]"}, to = {U(0x0627)}, } local export = {} function export.stripDiacritics(text, lang, sc) if text == waSla or find(text, "^" .. taTwiil .. "?[" .. Arabic_diacritics .. "]" .. "$") then return text end for i, from in ipairs(replacements.from) do local to = replacements.to[i] or "" text = gsub(text, from, to) end return text end return export gmvq9118u80f6bkyoy06j4u3ine0kmf 231477 231476 2026-04-15T16:41:29Z Lee 19 [[:en:Module:ar-stripdiacritics]] වෙතින් එක් සංශෝධනයක් 231476 Scribunto text/plain local m_str_utils = require("Module:string utilities") local find = m_str_utils.find local gsub = m_str_utils.gsub local U = m_str_utils.char local taTwiil = U(0x640) local waSla = U(0x671) -- diacritics ordinarily removed by entry_name replacements local Arabic_diacritics = U(0x64B, 0x64C, 0x64D, 0x64E, 0x64F, 0x650, 0x651, 0x652, 0x670) -- replace alif waṣl with alif -- remove tatweel and diacritics: fathatan, dammatan, kasratan, fatha, -- damma, kasra, shadda, sukun, superscript (dagger) alef local replacements = { from = {U(0x0671), "[" .. U(0x0640, 0x064B) .. "-" .. U(0x0652, 0x0670, 0x0656) .. "]"}, to = {U(0x0627)}, } local export = {} function export.stripDiacritics(text, lang, sc) if text == waSla or find(text, "^" .. taTwiil .. "?[" .. Arabic_diacritics .. "]" .. "$") then return text end for i, from in ipairs(replacements.from) do local to = replacements.to[i] or "" text = gsub(text, from, to) end return text end return export gmvq9118u80f6bkyoy06j4u3ine0kmf Module:ar-stripdiacritics/testcases/documentation 828 142081 231478 2025-12-21T06:18:37Z en>Benwing2 0 "entry name" as a concept is going away in favor of "strip diacritics" + logical-to-physical 231478 wikitext text/x-wiki {{#invoke:ar-stripdiacritics/testcases|run_tests|differs_at=true}} ctq2fl08parqc6cjh0inuza9tyoo5hb 231479 231478 2026-04-15T16:42:33Z Lee 19 [[:en:Module:ar-stripdiacritics/testcases/documentation]] වෙතින් එක් සංශෝධනයක් 231478 wikitext text/x-wiki {{#invoke:ar-stripdiacritics/testcases|run_tests|differs_at=true}} ctq2fl08parqc6cjh0inuza9tyoo5hb Module:ar-stripdiacritics/testcases 828 142082 231480 2025-12-21T06:13:11Z en>Benwing2 0 "entry name" as a concept is going away in favor of "strip diacritics" + logical-to-physical 231480 Scribunto text/plain local tests = require("Module:UnitTests") local stripDiacritics = require("Module:ar-stripdiacritics").stripDiacritics local function tag(text) return '<span>' .. text .. '</span>' end function tests:test_stripDiacritics() self:iterate({ { "هُوَ", "هو" }, { "كَانَ", "كان" }, { "ٱلْمَغْرِب", "المغرب" }, { "آلِهَةٍ", "آلهة" }, { "قُوَّةً", "قوة" }, { "مَرِيـــــــضٌ", "مريض" }, { "لٰكِنَّ", "لكن" }, { "ـــ", "" }, { "ــُـً", "" }, }, function(self, example, expected) self:equals(tag(example), stripDiacritics(example), expected) end) end return tests 1oaj3uv7z2n7k41qew1bwtilrew5g2g 231481 231480 2026-04-15T16:42:41Z Lee 19 [[:en:Module:ar-stripdiacritics/testcases]] වෙතින් එක් සංශෝධනයක් 231480 Scribunto text/plain local tests = require("Module:UnitTests") local stripDiacritics = require("Module:ar-stripdiacritics").stripDiacritics local function tag(text) return '<span>' .. text .. '</span>' end function tests:test_stripDiacritics() self:iterate({ { "هُوَ", "هو" }, { "كَانَ", "كان" }, { "ٱلْمَغْرِب", "المغرب" }, { "آلِهَةٍ", "آلهة" }, { "قُوَّةً", "قوة" }, { "مَرِيـــــــضٌ", "مريض" }, { "لٰكِنَّ", "لكن" }, { "ـــ", "" }, { "ــُـً", "" }, }, function(self, example, expected) self:equals(tag(example), stripDiacritics(example), expected) end) end return tests 1oaj3uv7z2n7k41qew1bwtilrew5g2g Module:ar-stripdiacritics/documentation 828 142083 231482 2025-12-21T05:39:23Z en>Benwing2 0 Benwing2 moved page [[Module:ar-entryname/documentation]] to [[Module:ar-stripdiacritics/documentation]]: "entry name" as concept is going away, replaced by "strip diacritics" + logical-to-physical 231482 wikitext text/x-wiki {{strip diacritics module documentation}} 4ee7ih8ihukb9zn2x59k8le2phktzzx 231483 231482 2026-04-15T16:43:10Z Lee 19 [[:en:Module:ar-stripdiacritics/documentation]] වෙතින් එක් සංශෝධනයක් 231482 wikitext text/x-wiki {{strip diacritics module documentation}} 4ee7ih8ihukb9zn2x59k8le2phktzzx Module:Polyt-stripdiacritics 828 142084 231484 2025-12-21T06:33:30Z en>Benwing2 0 makeEntryName -> stripDiacritics 231484 Scribunto text/plain local export = {} local toNFC = mw.ustring.toNFC local toNFD = mw.ustring.toNFD local u = require("Module:string/char") local ugsub = mw.ustring.gsub local umatch = mw.ustring.match local grave = u(0x300) local acute = u(0x301) local smooth = u(0x313) local rough = u(0x314) local word_ch = "[%w" .. grave .. acute .. smooth .. rough .. u(0x308, 0x342, 0x345) .. "]" local following_word_pattern = "^" .. word_ch .. "*%s+" .. word_ch -- not punctuation local breathing_ch = "[" .. smooth .. rough .. "]" local rho_cap_smooth_sub = u(0x1FDC) -- temporary (unused) codepoint for Ρ̓, which has no atomic codepoint local rho = "[ρῤῥΡ" .. rho_cap_smooth_sub .. "Ῥ]" local two_or_more_rhos = rho .. rho .. "+" local expected_rho_breathings = "^[ρῤΡ" .. rho_cap_smooth_sub .. "]+[ρῥΡῬ]$" local Grek_stripDiacritics = require("Module:Grek-common").stripDiacritics function export.stripDiacritics(text, lang, sc) -- Do some substitutions done for all Greek text. text = Grek_stripDiacritics(text, lang, sc) -- Remove length marks and double undertie. text = toNFD(text):gsub("\204[\132\134]", ""):gsub("\205\156", "") -- Convert grave to acute unless followed by another word. text = ugsub(text, grave .. "()", function(pos) if not umatch(text, following_word_pattern, pos) then return acute end end) -- Convert "ῤῥ" to "ρρ". text = ugsub(toNFC(text):gsub("Ρ̓", rho_cap_smooth_sub), two_or_more_rhos, function(rhos) if umatch(rhos, expected_rho_breathings) then return (toNFD(rhos:gsub(rho_cap_smooth_sub, "Ρ̓")):gsub(breathing_ch, "")) end end):gsub(rho_cap_smooth_sub, "Ρ̓") return toNFC(text) end return export eqzr38qq3hkar1z8ab23f1nx777pqka 231485 231484 2026-04-15T16:44:07Z Lee 19 [[:en:Module:Polyt-stripdiacritics]] වෙතින් එක් සංශෝධනයක් 231484 Scribunto text/plain local export = {} local toNFC = mw.ustring.toNFC local toNFD = mw.ustring.toNFD local u = require("Module:string/char") local ugsub = mw.ustring.gsub local umatch = mw.ustring.match local grave = u(0x300) local acute = u(0x301) local smooth = u(0x313) local rough = u(0x314) local word_ch = "[%w" .. grave .. acute .. smooth .. rough .. u(0x308, 0x342, 0x345) .. "]" local following_word_pattern = "^" .. word_ch .. "*%s+" .. word_ch -- not punctuation local breathing_ch = "[" .. smooth .. rough .. "]" local rho_cap_smooth_sub = u(0x1FDC) -- temporary (unused) codepoint for Ρ̓, which has no atomic codepoint local rho = "[ρῤῥΡ" .. rho_cap_smooth_sub .. "Ῥ]" local two_or_more_rhos = rho .. rho .. "+" local expected_rho_breathings = "^[ρῤΡ" .. rho_cap_smooth_sub .. "]+[ρῥΡῬ]$" local Grek_stripDiacritics = require("Module:Grek-common").stripDiacritics function export.stripDiacritics(text, lang, sc) -- Do some substitutions done for all Greek text. text = Grek_stripDiacritics(text, lang, sc) -- Remove length marks and double undertie. text = toNFD(text):gsub("\204[\132\134]", ""):gsub("\205\156", "") -- Convert grave to acute unless followed by another word. text = ugsub(text, grave .. "()", function(pos) if not umatch(text, following_word_pattern, pos) then return acute end end) -- Convert "ῤῥ" to "ρρ". text = ugsub(toNFC(text):gsub("Ρ̓", rho_cap_smooth_sub), two_or_more_rhos, function(rhos) if umatch(rhos, expected_rho_breathings) then return (toNFD(rhos:gsub(rho_cap_smooth_sub, "Ρ̓")):gsub(breathing_ch, "")) end end):gsub(rho_cap_smooth_sub, "Ρ̓") return toNFC(text) end return export eqzr38qq3hkar1z8ab23f1nx777pqka සැකිල්ල:strip diacritics module documentation 10 142085 231486 2025-12-21T05:53:38Z en>Benwing2 0 "entry name" as a concept is going away in favor of "strip diacritics" + logical-to-physical 231486 wikitext text/x-wiki <includeonly>{{#invoke:documentation/strip diacritics|documentation_template}}</includeonly><noinclude>{{documentation}}</noinclude> 58a079156e6nh9javlzg2pxrc7qdtax 231487 231486 2026-04-15T16:48:49Z Lee 19 [[:en:Template:strip_diacritics_module_documentation]] වෙතින් එක් සංශෝධනයක් 231486 wikitext text/x-wiki <includeonly>{{#invoke:documentation/strip diacritics|documentation_template}}</includeonly><noinclude>{{documentation}}</noinclude> 58a079156e6nh9javlzg2pxrc7qdtax සැකිල්ල:strip diacritics module documentation/documentation 10 142086 231488 2025-12-21T05:37:13Z en>Benwing2 0 231488 wikitext text/x-wiki {{documentation subpage}} {{uses lua|Module:documentation/strip diacritics}} This template is used to automatically generate documentation for diacritic stripping modules such as [[Module:ar-stripdiacritics]]. <includeonly> [[Category:Documentation templates]] </includeonly> emq25bl3kc5skp0vu8kvpjvdmdc9a6q 231489 231488 2026-04-15T16:49:19Z Lee 19 [[:en:Template:strip_diacritics_module_documentation/documentation]] වෙතින් එක් සංශෝධනයක් 231488 wikitext text/x-wiki {{documentation subpage}} {{uses lua|Module:documentation/strip diacritics}} This template is used to automatically generate documentation for diacritic stripping modules such as [[Module:ar-stripdiacritics]]. <includeonly> [[Category:Documentation templates]] </includeonly> emq25bl3kc5skp0vu8kvpjvdmdc9a6q Module:documentation/strip diacritics 828 142087 231490 2025-12-23T05:53:20Z en>Benwing2 0 {{entryname}} -> {{strip diacritics}} 231490 Scribunto text/plain local export = {} local function fake_frame(args, parent_args) return { args = args, getParent = function() return { args = parent_args, } end } end function get_by_code(code) return require "Module:languages".getByCode(code, nil, false, true) or require "Module:scripts".getByCode(code) end local function get_code_from_title_without_namespace(title_without_namespace) local prefix = title_without_namespace:match("^(.+)%-stripdiacritics%f[/%z]") if not prefix then error("Base segment of title should end in -stripdiacritics: " .. title_without_namespace) end local code = prefix local lang_or_family_or_script = get_by_code(code) return code, lang_or_family_or_script end function export.documentation(title_without_namespace, explanation) local code, lang_or_family_or_script = get_code_from_title_without_namespace(title_without_namespace) return export.documentation_from_code(code, explanation, title_without_namespace) end function export.documentation_from_code(code, explanation, title_without_namespace) local lang_or_family_or_script = get_by_code(code) if not lang_or_family_or_script then return "Language code in page name (<code>" .. code .. "</code>) not recognized." end local category_name = lang_or_family_or_script:getCategoryName() local strip_diacritics_input if lang_or_family_or_script:hasType("script") then strip_diacritics_input = "text in the [[:Category:" .. category_name .. "|" .. category_name .. "]]" elseif lang_or_family_or_script:hasType("family") then strip_diacritics_input = "text in one of the [[:Category:" .. category_name .. "|" .. category_name .. "]]" else -- language strip_diacritics_input = "[[:Category:" .. category_name .. "|" .. category_name .. "]] text" end return "This module will generate diacritic-stripped text for " .. strip_diacritics_input .. (explanation and " " .. explanation or "") .. ". " .. require("Module:documentation").stripDiacriticsModuleLangList({args = { [1] = title_without_namespace:gsub("/documentation$", "") }}) .. [=[ The module should preferably not be called directly from templates or other modules. To use it from a template, use <code>{{[[Template:strip diacritics|strip diacritics]]}}</code>. Within a module, use [[Module:languages#Language:stripDiacritics]]. For testcases, see [[Module:]=] .. title_without_namespace:gsub("/documentation$", "") .. [=[/testcases]]. == Functions == ; <code>stripDiacritics(text, lang, sc)</code> : Strips diacritics from a given piece of <code>text</code> written in the script specified by the code <code>sc</code>, and language specified by the code <code>lang</code>. : When diacritic stripping fails, returns <code>nil</code>.]=] .. require("Module:module categorization").categorize(fake_frame({ is_template = "1", [1] = title_without_namespace, }, { [1] = code, })) end function export.documentation_template(frame) -- Parameters to {{strip diacritics module documentation}}: -- |code|description -- Ignore code because we get it from the page name. local pagename = mw.title.getCurrentTitle().text -- DO NOT replace with mw.loadData("Module:headword/data").pagename as we need the root portion local args = frame:getParent().args if args[1] and get_code_from_title_without_namespace(pagename) ~= args[1] then -- [[Special:WhatLinksHere/Wiktionary:Tracking/strip diacritics/input different from title]] require("Module:debug").track("strip diacritics/input different from title") end if args[1] then return export.documentation_from_code(args[1], args[2], pagename) else return export.documentation(pagename, args[2]) end end return export e6echcarj70wq1apsv6ndu7unwblfwo 231491 231490 2026-04-15T16:50:20Z Lee 19 [[:en:Module:documentation/strip_diacritics]] වෙතින් එක් සංශෝධනයක් 231490 Scribunto text/plain local export = {} local function fake_frame(args, parent_args) return { args = args, getParent = function() return { args = parent_args, } end } end function get_by_code(code) return require "Module:languages".getByCode(code, nil, false, true) or require "Module:scripts".getByCode(code) end local function get_code_from_title_without_namespace(title_without_namespace) local prefix = title_without_namespace:match("^(.+)%-stripdiacritics%f[/%z]") if not prefix then error("Base segment of title should end in -stripdiacritics: " .. title_without_namespace) end local code = prefix local lang_or_family_or_script = get_by_code(code) return code, lang_or_family_or_script end function export.documentation(title_without_namespace, explanation) local code, lang_or_family_or_script = get_code_from_title_without_namespace(title_without_namespace) return export.documentation_from_code(code, explanation, title_without_namespace) end function export.documentation_from_code(code, explanation, title_without_namespace) local lang_or_family_or_script = get_by_code(code) if not lang_or_family_or_script then return "Language code in page name (<code>" .. code .. "</code>) not recognized." end local category_name = lang_or_family_or_script:getCategoryName() local strip_diacritics_input if lang_or_family_or_script:hasType("script") then strip_diacritics_input = "text in the [[:Category:" .. category_name .. "|" .. category_name .. "]]" elseif lang_or_family_or_script:hasType("family") then strip_diacritics_input = "text in one of the [[:Category:" .. category_name .. "|" .. category_name .. "]]" else -- language strip_diacritics_input = "[[:Category:" .. category_name .. "|" .. category_name .. "]] text" end return "This module will generate diacritic-stripped text for " .. strip_diacritics_input .. (explanation and " " .. explanation or "") .. ". " .. require("Module:documentation").stripDiacriticsModuleLangList({args = { [1] = title_without_namespace:gsub("/documentation$", "") }}) .. [=[ The module should preferably not be called directly from templates or other modules. To use it from a template, use <code>{{[[Template:strip diacritics|strip diacritics]]}}</code>. Within a module, use [[Module:languages#Language:stripDiacritics]]. For testcases, see [[Module:]=] .. title_without_namespace:gsub("/documentation$", "") .. [=[/testcases]]. == Functions == ; <code>stripDiacritics(text, lang, sc)</code> : Strips diacritics from a given piece of <code>text</code> written in the script specified by the code <code>sc</code>, and language specified by the code <code>lang</code>. : When diacritic stripping fails, returns <code>nil</code>.]=] .. require("Module:module categorization").categorize(fake_frame({ is_template = "1", [1] = title_without_namespace, }, { [1] = code, })) end function export.documentation_template(frame) -- Parameters to {{strip diacritics module documentation}}: -- |code|description -- Ignore code because we get it from the page name. local pagename = mw.title.getCurrentTitle().text -- DO NOT replace with mw.loadData("Module:headword/data").pagename as we need the root portion local args = frame:getParent().args if args[1] and get_code_from_title_without_namespace(pagename) ~= args[1] then -- [[Special:WhatLinksHere/Wiktionary:Tracking/strip diacritics/input different from title]] require("Module:debug").track("strip diacritics/input different from title") end if args[1] then return export.documentation_from_code(args[1], args[2], pagename) else return export.documentation(pagename, args[2]) end end return export e6echcarj70wq1apsv6ndu7unwblfwo Module:languages/byStripDiacriticsModule 828 142088 231495 2025-12-21T05:49:15Z en>Benwing2 0 "entry name" as a concept is going away in favor of "strip diacritics" + logical-to-physical 231495 Scribunto text/plain return function(stripDiacriticsModule) local langs = {} for code, data in pairs(require("Module:languages/data/all")) do if data.strip_diacritics == stripDiacriticsModule then langs[code] = data elseif type(data.strip_diacritics) == "table" then for script, strip_diacritics_data in pairs(data.strip_diacritics) do if strip_diacritics_data == stripDiacriticsModule then langs[code] = data end end end end local result = {} local i = 0 for code, data in pairs(langs) do i = i + 1 result[i] = require("Module:languages").makeObject(code, data) end return result end lk41omakl9gzgmnob1lrwt4rea7f6mb 231496 231495 2026-04-15T17:09:18Z Lee 19 [[:en:Module:languages/byStripDiacriticsModule]] වෙතින් එක් සංශෝධනයක් 231495 Scribunto text/plain return function(stripDiacriticsModule) local langs = {} for code, data in pairs(require("Module:languages/data/all")) do if data.strip_diacritics == stripDiacriticsModule then langs[code] = data elseif type(data.strip_diacritics) == "table" then for script, strip_diacritics_data in pairs(data.strip_diacritics) do if strip_diacritics_data == stripDiacriticsModule then langs[code] = data end end end end local result = {} local i = 0 for code, data in pairs(langs) do i = i + 1 result[i] = require("Module:languages").makeObject(code, data) end return result end lk41omakl9gzgmnob1lrwt4rea7f6mb Module:Grek-common 828 142089 231500 2025-12-22T07:34:28Z en>Benwing2 0 entryname -> stripdiacritics, etc. 231500 Scribunto text/plain local export = {} local toNFC = mw.ustring.toNFC local toNFD = mw.ustring.toNFD local u = require("Module:string/char") local ugsub = mw.ustring.gsub local PRIME = u(0x02B9) local CARON = u(0x030C) local DIAERBELOW = u(0x0324) local BREVEBELOW = u(0x032E) local KERAIA = u(0x0374) local CORONIS = u(0x1FBD) local PSILI = u(0x1FBF) local RSQUO = u(0x2019) local displaytext_substitutes = { ["'"] = RSQUO, [PRIME] = RSQUO, [KERAIA] = RSQUO, [CORONIS] = RSQUO, [PSILI] = RSQUO, ["Þ"] = "Ϸ", ["þ"] = "ϸ", } function export.makeDisplayText(text, lang, sc) return toNFC(ugsub(toNFD(text), "['" .. PRIME .. KERAIA .. CORONIS .. PSILI .. "Þþ]", displaytext_substitutes)) end local stripdiacritics_substitutes = {} for k, v in pairs(displaytext_substitutes) do stripdiacritics_substitutes[k == "'" and RSQUO or k] = v == RSQUO and "'" or v end function export.stripDiacritics(text, lang, sc) text = ugsub(toNFD(text), "[" .. PRIME .. KERAIA .. CORONIS .. PSILI .. RSQUO .. "Þþ]", stripdiacritics_substitutes) if sc == "Grek" then text = ugsub(toNFD(text), "[" .. CARON .. DIAERBELOW .. BREVEBELOW .. "]+", "") end return toNFC(text) end return export 0n5d7utoem7e7w0351nqq2uett8qq0u 231501 231500 2026-04-15T17:13:09Z Lee 19 [[:en:Module:Grek-common]] වෙතින් එක් සංශෝධනයක් 231500 Scribunto text/plain local export = {} local toNFC = mw.ustring.toNFC local toNFD = mw.ustring.toNFD local u = require("Module:string/char") local ugsub = mw.ustring.gsub local PRIME = u(0x02B9) local CARON = u(0x030C) local DIAERBELOW = u(0x0324) local BREVEBELOW = u(0x032E) local KERAIA = u(0x0374) local CORONIS = u(0x1FBD) local PSILI = u(0x1FBF) local RSQUO = u(0x2019) local displaytext_substitutes = { ["'"] = RSQUO, [PRIME] = RSQUO, [KERAIA] = RSQUO, [CORONIS] = RSQUO, [PSILI] = RSQUO, ["Þ"] = "Ϸ", ["þ"] = "ϸ", } function export.makeDisplayText(text, lang, sc) return toNFC(ugsub(toNFD(text), "['" .. PRIME .. KERAIA .. CORONIS .. PSILI .. "Þþ]", displaytext_substitutes)) end local stripdiacritics_substitutes = {} for k, v in pairs(displaytext_substitutes) do stripdiacritics_substitutes[k == "'" and RSQUO or k] = v == RSQUO and "'" or v end function export.stripDiacritics(text, lang, sc) text = ugsub(toNFD(text), "[" .. PRIME .. KERAIA .. CORONIS .. PSILI .. RSQUO .. "Þþ]", stripdiacritics_substitutes) if sc == "Grek" then text = ugsub(toNFD(text), "[" .. CARON .. DIAERBELOW .. BREVEBELOW .. "]+", "") end return toNFC(text) end return export 0n5d7utoem7e7w0351nqq2uett8qq0u Module:sd-Arab-translit 828 142090 231502 2026-02-10T01:17:48Z en>AryamanA 0 231502 Scribunto text/plain --- Taken from [[Module:pa-Arab-translit]] local m_str_utils = require("Module:string utilities") local U = m_str_utils.char local gsub = m_str_utils.gsub local export = {} local zabar = U(0x64E) local zer = U(0x650) local pesh = U(0x64F) local tashdid = U(0x651) -- also called shadda local jazm = "ْ" local hamza = 'ء' local alif = 'ا' local vav = 'و' local ye = 'ي' local he = 'ه' local nun = 'ن' local aspirates = '[ڙمنڻجگلn]' local diacritics = '[' .. zabar .. zer .. pesh .. jazm .. ']' local diacritics_or_alif = '[' .. zabar .. zer .. pesh .. jazm .. alif .. ']' local diacritics_or_matres = '[' .. zabar .. zer .. pesh .. jazm .. alif .. vav .. ye .. ']' local not_diacritics = '[^' .. zabar .. zer .. pesh .. jazm .. ']' local mapping = { ["آ"] = 'ā', ["ب"] = 'b', ["ٻ"] = 'ḇ', ["ڀ"] = 'bh', ["ت"] = 't', ["ٿ"] = 'th', ["ٽ"] = 'ṭ', ["ٺ"] = 'ṭh', ["ث"] = 's̱', ["پ"] = 'p', ["ج"] = 'j', ["ڄ"] = 'j̄', ["ڃ"] = "ñ", ["چ"] = 'c', ["ڇ"] = 'ch', ["ح"] = 'ḥ', ["خ"] = 'x', ["د"] = 'd', ["ڌ"] = 'dh', ["ڏ"] = 'ḏ', ["ڊ"] = 'ḍ', ["ڍ"] = 'ḍh', ["ذ"] = 'ẕ', ["ر"] = 'r', ['ڙ'] = "ṛ", ["ز"] = 'z', ["ژ"] = 'ž', ["س"] = 's', ["ش"] = 'ś', ["ص"] = 'ṣ', ["ض"] = 'ẓ', ["ط"] = 't̤', ["ظ"] = 'z̤', ["ع"] = 'ʻ', ["غ"] = 'ġ', ["ف"] = 'f', ["ڦ"] = 'ph', ["ق"] = 'q', ["ڪ"] = 'k', ["ک"] = 'kh', ["گ"] = 'g', ["ڳ"] = 'g̠', ["ڱ"] = 'ṅ', ["ل"] = 'l', ["م"] = 'm', ["ن"] = 'n', ["ڻ"] = 'ṇ', ["و"] = 'v', ["ہ"] = 'h', ["ي"] = 'y', ["۔"] = ".", ["ں"] = 'ṉ', ["۾"] = 'mẽ', ["۽"] = 'a͠i', ["ھ"] = "h", ["ه"] = "h", ["ؤ"] = "'o", -- diacritics [zabar] = "a", [zer] = "i", [pesh] = "u", [jazm] = "", -- also sukun - no vowel [U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner) -- ligatures ["ﻻ"] = "lā", ["ﷲ"] = "allāh", -- kashida ["ـ"] = "-", -- kashida, no sound -- hamza [hamza] = "", -- nothing ["ئ"] = "", -- numerals ["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5", ["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0", -- punctuation (leave on separate lines) ["؟"] = "?", -- question mark ["،"] = ",", -- comma ["؛"] = ";", -- semicolon ["«"] = '“', -- quotation mark ["»"] = '”', -- quotation mark ["٪"] = "%", -- percent ["؉"] = "‰", -- per mille ["٫"] = ".", -- decimals ["٬"] = ",", -- thousand ["ۓ"] = "-ye", ["ۀ"] = "h-e" -- he ye (in izafat) } function export.tr(text, lang, sc) -- nun with diacritics / matres is consonant, else nasalisation text = gsub(text, nun .. '(' .. diacritics_or_matres .. ')', 'n%1') text = gsub(text, nun, '̃') -- handle initial او (= o) so alif doesn't later become ā text = gsub(text, alif .. vav, 'o') -- alif text = gsub(text, alif .. zabar, 'a') text = gsub(text, alif .. zer, 'i') text = gsub(text, alif .. pesh, 'u') text = gsub(text, zabar .. alif, 'ā') -- aspirates should have diacritics moved after text = gsub(text, he .. '(' .. diacritics_or_alif .. ')', 'h%1') text = gsub(text, he .. he .. '(' .. not_diacritics .. ')', he .. '%1') text = gsub(text, he .. he .. '$', he) text = gsub(text, '(' .. aspirates .. ')(' .. diacritics .. ')' .. he, '%1' .. he .. '%2') -- vav + ye: DO VOWEL RULES FIRST (fixes لَوْنگُ) text = gsub(text, zabar .. vav, 'au') text = gsub(text, pesh .. vav, 'ū') text = gsub(text, zabar .. ye, 'ai') text = gsub(text, zer .. ye, 'ī') -- extra consonant heuristics demanded by your tests text = gsub(text, vav .. ye, 'v' .. ye) -- وي... -> ve... text = gsub(text, ye .. vav .. '$', ye .. 'v') -- ...يو -> ...ev -- now mark consonantal vav/ye when they have their own marks text = gsub(text, vav .. '(' .. diacritics_or_alif .. ')', 'v%1') text = gsub(text, ye .. '(' .. diacritics_or_alif .. ')', 'y%1') -- default case text = gsub(text, vav, 'o') text = gsub(text, ye, 'e') -- all rules text = gsub(text, '.', mapping) -- remaining alif text = gsub(text, alif, 'ā') return text end return export iz6rt9104xzf2q4aruqohl4amxho3xn 231503 231502 2026-04-15T17:15:12Z Lee 19 [[:en:Module:sd-Arab-translit]] වෙතින් එක් සංශෝධනයක් 231502 Scribunto text/plain --- Taken from [[Module:pa-Arab-translit]] local m_str_utils = require("Module:string utilities") local U = m_str_utils.char local gsub = m_str_utils.gsub local export = {} local zabar = U(0x64E) local zer = U(0x650) local pesh = U(0x64F) local tashdid = U(0x651) -- also called shadda local jazm = "ْ" local hamza = 'ء' local alif = 'ا' local vav = 'و' local ye = 'ي' local he = 'ه' local nun = 'ن' local aspirates = '[ڙمنڻجگلn]' local diacritics = '[' .. zabar .. zer .. pesh .. jazm .. ']' local diacritics_or_alif = '[' .. zabar .. zer .. pesh .. jazm .. alif .. ']' local diacritics_or_matres = '[' .. zabar .. zer .. pesh .. jazm .. alif .. vav .. ye .. ']' local not_diacritics = '[^' .. zabar .. zer .. pesh .. jazm .. ']' local mapping = { ["آ"] = 'ā', ["ب"] = 'b', ["ٻ"] = 'ḇ', ["ڀ"] = 'bh', ["ت"] = 't', ["ٿ"] = 'th', ["ٽ"] = 'ṭ', ["ٺ"] = 'ṭh', ["ث"] = 's̱', ["پ"] = 'p', ["ج"] = 'j', ["ڄ"] = 'j̄', ["ڃ"] = "ñ", ["چ"] = 'c', ["ڇ"] = 'ch', ["ح"] = 'ḥ', ["خ"] = 'x', ["د"] = 'd', ["ڌ"] = 'dh', ["ڏ"] = 'ḏ', ["ڊ"] = 'ḍ', ["ڍ"] = 'ḍh', ["ذ"] = 'ẕ', ["ر"] = 'r', ['ڙ'] = "ṛ", ["ز"] = 'z', ["ژ"] = 'ž', ["س"] = 's', ["ش"] = 'ś', ["ص"] = 'ṣ', ["ض"] = 'ẓ', ["ط"] = 't̤', ["ظ"] = 'z̤', ["ع"] = 'ʻ', ["غ"] = 'ġ', ["ف"] = 'f', ["ڦ"] = 'ph', ["ق"] = 'q', ["ڪ"] = 'k', ["ک"] = 'kh', ["گ"] = 'g', ["ڳ"] = 'g̠', ["ڱ"] = 'ṅ', ["ل"] = 'l', ["م"] = 'm', ["ن"] = 'n', ["ڻ"] = 'ṇ', ["و"] = 'v', ["ہ"] = 'h', ["ي"] = 'y', ["۔"] = ".", ["ں"] = 'ṉ', ["۾"] = 'mẽ', ["۽"] = 'a͠i', ["ھ"] = "h", ["ه"] = "h", ["ؤ"] = "'o", -- diacritics [zabar] = "a", [zer] = "i", [pesh] = "u", [jazm] = "", -- also sukun - no vowel [U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner) -- ligatures ["ﻻ"] = "lā", ["ﷲ"] = "allāh", -- kashida ["ـ"] = "-", -- kashida, no sound -- hamza [hamza] = "", -- nothing ["ئ"] = "", -- numerals ["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5", ["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0", -- punctuation (leave on separate lines) ["؟"] = "?", -- question mark ["،"] = ",", -- comma ["؛"] = ";", -- semicolon ["«"] = '“', -- quotation mark ["»"] = '”', -- quotation mark ["٪"] = "%", -- percent ["؉"] = "‰", -- per mille ["٫"] = ".", -- decimals ["٬"] = ",", -- thousand ["ۓ"] = "-ye", ["ۀ"] = "h-e" -- he ye (in izafat) } function export.tr(text, lang, sc) -- nun with diacritics / matres is consonant, else nasalisation text = gsub(text, nun .. '(' .. diacritics_or_matres .. ')', 'n%1') text = gsub(text, nun, '̃') -- handle initial او (= o) so alif doesn't later become ā text = gsub(text, alif .. vav, 'o') -- alif text = gsub(text, alif .. zabar, 'a') text = gsub(text, alif .. zer, 'i') text = gsub(text, alif .. pesh, 'u') text = gsub(text, zabar .. alif, 'ā') -- aspirates should have diacritics moved after text = gsub(text, he .. '(' .. diacritics_or_alif .. ')', 'h%1') text = gsub(text, he .. he .. '(' .. not_diacritics .. ')', he .. '%1') text = gsub(text, he .. he .. '$', he) text = gsub(text, '(' .. aspirates .. ')(' .. diacritics .. ')' .. he, '%1' .. he .. '%2') -- vav + ye: DO VOWEL RULES FIRST (fixes لَوْنگُ) text = gsub(text, zabar .. vav, 'au') text = gsub(text, pesh .. vav, 'ū') text = gsub(text, zabar .. ye, 'ai') text = gsub(text, zer .. ye, 'ī') -- extra consonant heuristics demanded by your tests text = gsub(text, vav .. ye, 'v' .. ye) -- وي... -> ve... text = gsub(text, ye .. vav .. '$', ye .. 'v') -- ...يو -> ...ev -- now mark consonantal vav/ye when they have their own marks text = gsub(text, vav .. '(' .. diacritics_or_alif .. ')', 'v%1') text = gsub(text, ye .. '(' .. diacritics_or_alif .. ')', 'y%1') -- default case text = gsub(text, vav, 'o') text = gsub(text, ye, 'e') -- all rules text = gsub(text, '.', mapping) -- remaining alif text = gsub(text, alif, 'ā') return text end return export iz6rt9104xzf2q4aruqohl4amxho3xn Module:sd-Arab-translit/testcases/documentation 828 142091 231504 2025-12-02T02:39:54Z en>AryamanA 0 Created page with "{{#invoke:sd-Arab-translit/testcases|run_tests}}" 231504 wikitext text/x-wiki {{#invoke:sd-Arab-translit/testcases|run_tests}} rrrzgzc0jl4ah0naxk36zshwkkinop0 231505 231504 2026-04-15T17:15:47Z Lee 19 [[:en:Module:sd-Arab-translit/testcases/documentation]] වෙතින් එක් සංශෝධනයක් 231504 wikitext text/x-wiki {{#invoke:sd-Arab-translit/testcases|run_tests}} rrrzgzc0jl4ah0naxk36zshwkkinop0 Module:sd-Arab-translit/testcases 828 142092 231506 2026-03-26T21:53:57Z en>Penguin112358 0 231506 Scribunto text/plain local full_link = require('Module:links').full_link local sd = require('Module:languages').getByCode('sd') local function link(word) return full_link{ term = word, lang = sd, tr = "-" } end return require("Module:transliteration module testcases") { module = "sd-Arab-translit", examples = { {"ٻُڌَڻُ", "ḇudhaṇu"}, {"ٻُڌو", "ḇudho"}, {"ٻُجَهڻُ", "ḇujhaṇu"}, {"چَمِڙو", "camiṛo"}, {"ماسُ", "māsu"}, {"مانسُ", "mā̃su"}, {"ڏاڙِهي", "ḏāṛhī"}, {"نَڪُ", "naku"}, {"هَٿُ", "hathu"}, {"هَٿِيارُ", "hathiyāru"}, {"اَٿَ", "atha"}, {"گَهَرو", "gaharo"}, {"مينهِن", "mẽhĩ"}, {"مِينهُن", "mī̃hũ"}, {"لَوْنگُ", "laũgu"}, {"ڏِئَڻُ", "ḏiaṇu"}, {"اَنوٺو", "anoṭho"}, {"اوڇو", "ocho"}, {"ويچَڻُ", "vecaṇu"}, {"ميو", "mev"}, {"اَرْتَوارُ", "artavāru"}, {"اِنساَنُ", "insānu"}, {"اَنبُ", "ambu"}, {"اِيمَانُ", "īmānu"}, {"ڇَن٘ڇَرُ", "chañcharu"}, {"مَن٘گَلُ", "maṅgalu"}, {"اَن٘بُ ", "ambu"}, {"تَوهِين", "tavhī̃"}, {"مُصَلّو", "muṣallo"}, {"تَصْوِيرَ ", "taṣvīra"}, {"اُوڻِيهَه", "ūṇīha"}, {"ڀَنڀو جو ڀَنڀورُ ۾، ڪاڪُلُ ڏِٺُمِ ڪالَهہ", "bhanbho jo bhanbhoru mẽ, kākulu ḏiṭhami kālha"}, {"هَاءِ تَنِجٖيْ حَالَ زُلْفَ جِيْ زَافُ ڪِيٌ", "hāi tanijī hāla zulfa jī zāfu kiyū̃"}, {"ھاءِ تَنِي جي حالَ، زُلفَ جي زافُ ڪِيُون", "hāi tanī jī hāla, zulfa jī zāfu kiyū̃"}, }, sc = "Arab", lang = "sd", func_with_link = link, } a6pkqqsmu6n4p6s9p00fsvi1ksfv0ss 231507 231506 2026-04-15T17:15:51Z Lee 19 [[:en:Module:sd-Arab-translit/testcases]] වෙතින් එක් සංශෝධනයක් 231506 Scribunto text/plain local full_link = require('Module:links').full_link local sd = require('Module:languages').getByCode('sd') local function link(word) return full_link{ term = word, lang = sd, tr = "-" } end return require("Module:transliteration module testcases") { module = "sd-Arab-translit", examples = { {"ٻُڌَڻُ", "ḇudhaṇu"}, {"ٻُڌو", "ḇudho"}, {"ٻُجَهڻُ", "ḇujhaṇu"}, {"چَمِڙو", "camiṛo"}, {"ماسُ", "māsu"}, {"مانسُ", "mā̃su"}, {"ڏاڙِهي", "ḏāṛhī"}, {"نَڪُ", "naku"}, {"هَٿُ", "hathu"}, {"هَٿِيارُ", "hathiyāru"}, {"اَٿَ", "atha"}, {"گَهَرو", "gaharo"}, {"مينهِن", "mẽhĩ"}, {"مِينهُن", "mī̃hũ"}, {"لَوْنگُ", "laũgu"}, {"ڏِئَڻُ", "ḏiaṇu"}, {"اَنوٺو", "anoṭho"}, {"اوڇو", "ocho"}, {"ويچَڻُ", "vecaṇu"}, {"ميو", "mev"}, {"اَرْتَوارُ", "artavāru"}, {"اِنساَنُ", "insānu"}, {"اَنبُ", "ambu"}, {"اِيمَانُ", "īmānu"}, {"ڇَن٘ڇَرُ", "chañcharu"}, {"مَن٘گَلُ", "maṅgalu"}, {"اَن٘بُ ", "ambu"}, {"تَوهِين", "tavhī̃"}, {"مُصَلّو", "muṣallo"}, {"تَصْوِيرَ ", "taṣvīra"}, {"اُوڻِيهَه", "ūṇīha"}, {"ڀَنڀو جو ڀَنڀورُ ۾، ڪاڪُلُ ڏِٺُمِ ڪالَهہ", "bhanbho jo bhanbhoru mẽ, kākulu ḏiṭhami kālha"}, {"هَاءِ تَنِجٖيْ حَالَ زُلْفَ جِيْ زَافُ ڪِيٌ", "hāi tanijī hāla zulfa jī zāfu kiyū̃"}, {"ھاءِ تَنِي جي حالَ، زُلفَ جي زافُ ڪِيُون", "hāi tanī jī hāla, zulfa jī zāfu kiyū̃"}, }, sc = "Arab", lang = "sd", func_with_link = link, } a6pkqqsmu6n4p6s9p00fsvi1ksfv0ss මැතිවරණමය පර්ෂද 0 142093 231508 2026-04-16T02:32:46Z Lee 19 '== සිංහල == === නිරුක්තිය === {{rfe|si}} === නාම පදය === {{si-noun}} # {{rfdef|si}} <!-- ==== පරිවර්තන ==== {{trans-top|පරිවර්තන}} * ඉංග්‍රීසි: {{t|en|<<ඉංග්‍රීසි වචනය>>}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|<<ආශ්‍රිත පවතින වෙනත් ව...' යොදමින් නව පිටුවක් තනන ලදි 231508 wikitext text/x-wiki == සිංහල == === නිරුක්තිය === {{rfe|si}} === නාම පදය === {{si-noun}} # {{rfdef|si}} <!-- ==== පරිවර්තන ==== {{trans-top|පරිවර්තන}} * ඉංග්‍රීසි: {{t|en|<<ඉංග්‍රීසි වචනය>>}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|<<ආශ්‍රිත පවතින වෙනත් වචන>>}} --> 37g9s13iio6w9i2yw7aw46v28viw7h4 231509 231508 2026-04-16T02:34:33Z Lee 19 231509 wikitext text/x-wiki == සිංහල == === නිරුක්තිය === {{rfe|si}} === නාම පදය === {{si-noun}} # {{rfdef|si}} ==== පරිවර්තන ==== {{trans-top|පරිවර්තන}} * ඉංග්‍රීසි: {{t|en|electoral colleges}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|ඡන්ද කොට්ඨාශ}} d058937zyhiw1l78noa8sxbhlcvakq8 231518 231509 2026-04-16T02:37:26Z Lee 19 231518 wikitext text/x-wiki == සිංහල == {{ව්}} === නිරුක්තිය === {{rfe|si}} === නාම පදය === {{si-noun}} # {{rfdef|si}} ==== පරිවර්තන ==== {{trans-top|පරිවර්තන}} * ඉංග්‍රීසි: {{t|en|electoral colleges}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|ඡන්ද කොට්ඨාශ}} 4w5u6b443vt2uxqyrbidbtgwvkwuazc 231519 231518 2026-04-16T02:37:40Z Lee 19 231519 wikitext text/x-wiki == සිංහල == {{ව්කිපීඩියා}} === නිරුක්තිය === {{rfe|si}} === නාම පදය === {{si-noun}} # {{rfdef|si}} ==== පරිවර්තන ==== {{trans-top|පරිවර්තන}} * ඉංග්‍රීසි: {{t|en|electoral colleges}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|ඡන්ද කොට්ඨාශ}} 8ydn8fm5f6nnmjcdozd4xmse9rx4mio 231521 231519 2026-04-16T02:38:18Z Lee 19 231521 wikitext text/x-wiki == සිංහල == {{ව්කිපීඩියාව}} === නිරුක්තිය === {{rfe|si}} === නාම පදය === {{si-noun}} # {{rfdef|si}} ==== පරිවර්තන ==== {{trans-top|පරිවර්තන}} * ඉංග්‍රීසි: {{t|en|electoral colleges}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|ඡන්ද කොට්ඨාශ}} is4yoj5oifhmo3hur06ii1316vx4j4z 231522 231521 2026-04-16T02:38:31Z Lee 19 231522 wikitext text/x-wiki == සිංහල == {{wp}} === නිරුක්තිය === {{rfe|si}} === නාම පදය === {{si-noun}} # {{rfdef|si}} ==== පරිවර්තන ==== {{trans-top|පරිවර්තන}} * ඉංග්‍රීසි: {{t|en|electoral colleges}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|ඡන්ද කොට්ඨාශ}} 6425600f4v2txkiportzdkt1p7cwlyv electoral colleges 0 142094 231510 2019-10-15T14:14:48Z en>WingerBot 0 move lang= to 1= in {{plural of}} 231510 wikitext text/x-wiki {{also|Electoral Colleges}} ==English== ===Noun=== {{head|en|noun form}} # {{plural of|en|electoral college}} tgag694rgy6v08pj37bwerrnld3mbgn 231511 231510 2026-04-16T02:35:05Z Lee 19 [[:en:electoral_colleges]] වෙතින් එක් සංශෝධනයක් 231510 wikitext text/x-wiki {{also|Electoral Colleges}} ==English== ===Noun=== {{head|en|noun form}} # {{plural of|en|electoral college}} tgag694rgy6v08pj37bwerrnld3mbgn electoral college 0 142095 231512 2025-10-31T23:13:05Z en>WingerBot 0 convert {{wikipedia}} to new syntax; convert {{wikipedia}} to {{wp}} 231512 wikitext text/x-wiki {{also|Electoral College}} ==English== {{wp}} ===Noun=== {{en-noun}} # {{lb|en|politics}} A body of [[elector]]s [[empower]]ed to [[elect]] someone to a particular [[office]], such as the [[Holy Roman Emperor]] or the [[president|President]] of the [[United States]]. #: {{ux|en|In Hong Kong, proponents for [[universal suffrage]] criticize that the current '''{{w|Election Committee (Hong Kong)|electoral college}}''' responsible for electing the {{w|Chief Executive of Hong Kong|Chief Executive}} is undemocratic and unrepresentative in nature.}} #* {{quote-book |en |year=1788 |author="Aristocrotis" |title=The Government of Nature Delineated; Or An Exact Picture of the New Federal Constitution |location=Carlisle, PA |url=http://resources.utulsa.edu/law/classes/rice/Constitutional/AntiFederalist/51.htm |passage=[I]f the reigning president pleases his masters, he need be under no apprehensions of being turned out for any severities used to the people, for though the congress may not have influence enough to procure him the majority of the votes of the '''electoral college''', yet they will always be able to prevent any other from having such a majority }} # {{alternative case form of|en|Electoral College}}. ====Related terms==== {{col|en |electoral vote }} ====Translations==== {{trans-top|body of electors empowered to elect someone to a particular office}} * Asturian: {{t+|ast|colexu eleutoral}} * Catalan: {{t|ca|col·legi electoral|m}} * Chinese: *: Mandarin: {{t|cmn|選舉人團|tr=xuǎnjǔ réntuán}} * Danish: {{t|da|valgmandskollegium|n}}, {{t|da|valgkollegium|n}} * Dutch: {{t+|nl|kiescollege|n}} * Finnish: {{t+|fi|valitsijamiehistö}} * French: {{t+|fr|collège électoral|m}} * Galician: {{t+|gl|colexio electoral|m}} * Georgian: {{t|ka|ამომრჩეველთა კოლეგია}} * German: {{t|de|Wahlmännerkollegium|n}}, {{t|de|Wahlkollegium|n}} * Hungarian: {{t|hu|[[elektori]]/[[elnökválasztó]] [[testület]]/[[kollégium]]}} * Icelandic: {{t|is|kjörmannaráð|n}} * Norwegian: *: Bokmål: {{t|nb|valgkollegium|n}} *: Nynorsk: {{t|nn|valkollegium|n}} * Polish: {{t|pl|kolegium elektorów|n}} * Russian: {{t|ru|[[колле́гия]] [[выборщик|вы́борщиков]]|f}} * Spanish: {{t|es|colegio electoral|m}} * Vietnamese: {{t|vi|đại cử tri đoàn}} (代擧知團) {{trans-bottom}} {{C|en|Collectives}} sknhgc2q9ap2j5zjgpesno3mlxvgoud 231513 231512 2026-04-16T02:35:27Z Lee 19 [[:en:electoral_college]] වෙතින් එක් සංශෝධනයක් 231512 wikitext text/x-wiki {{also|Electoral College}} ==English== {{wp}} ===Noun=== {{en-noun}} # {{lb|en|politics}} A body of [[elector]]s [[empower]]ed to [[elect]] someone to a particular [[office]], such as the [[Holy Roman Emperor]] or the [[president|President]] of the [[United States]]. #: {{ux|en|In Hong Kong, proponents for [[universal suffrage]] criticize that the current '''{{w|Election Committee (Hong Kong)|electoral college}}''' responsible for electing the {{w|Chief Executive of Hong Kong|Chief Executive}} is undemocratic and unrepresentative in nature.}} #* {{quote-book |en |year=1788 |author="Aristocrotis" |title=The Government of Nature Delineated; Or An Exact Picture of the New Federal Constitution |location=Carlisle, PA |url=http://resources.utulsa.edu/law/classes/rice/Constitutional/AntiFederalist/51.htm |passage=[I]f the reigning president pleases his masters, he need be under no apprehensions of being turned out for any severities used to the people, for though the congress may not have influence enough to procure him the majority of the votes of the '''electoral college''', yet they will always be able to prevent any other from having such a majority }} # {{alternative case form of|en|Electoral College}}. ====Related terms==== {{col|en |electoral vote }} ====Translations==== {{trans-top|body of electors empowered to elect someone to a particular office}} * Asturian: {{t+|ast|colexu eleutoral}} * Catalan: {{t|ca|col·legi electoral|m}} * Chinese: *: Mandarin: {{t|cmn|選舉人團|tr=xuǎnjǔ réntuán}} * Danish: {{t|da|valgmandskollegium|n}}, {{t|da|valgkollegium|n}} * Dutch: {{t+|nl|kiescollege|n}} * Finnish: {{t+|fi|valitsijamiehistö}} * French: {{t+|fr|collège électoral|m}} * Galician: {{t+|gl|colexio electoral|m}} * Georgian: {{t|ka|ამომრჩეველთა კოლეგია}} * German: {{t|de|Wahlmännerkollegium|n}}, {{t|de|Wahlkollegium|n}} * Hungarian: {{t|hu|[[elektori]]/[[elnökválasztó]] [[testület]]/[[kollégium]]}} * Icelandic: {{t|is|kjörmannaráð|n}} * Norwegian: *: Bokmål: {{t|nb|valgkollegium|n}} *: Nynorsk: {{t|nn|valkollegium|n}} * Polish: {{t|pl|kolegium elektorów|n}} * Russian: {{t|ru|[[колле́гия]] [[выборщик|вы́борщиков]]|f}} * Spanish: {{t|es|colegio electoral|m}} * Vietnamese: {{t|vi|đại cử tri đoàn}} (代擧知團) {{trans-bottom}} {{C|en|Collectives}} sknhgc2q9ap2j5zjgpesno3mlxvgoud Electoral College 0 142096 231514 2025-02-22T02:13:12Z en>Vinyl126 0 t+pl:[[Kolegium Elektorów]] ([[WT:EDIT|Assisted]]) 231514 wikitext text/x-wiki {{also|electoral college}} ==English== ===Proper noun=== {{en-proper noun|head=[[electoral|Electoral]] [[college|College]]|def=1}} # {{lb|en|US politics}} An [[electoral college]] [[chosen]], within a state, to formally cast that state's votes for the [[president]] and [[vice president]] of the [[United States]]. #: {{ux|en|Each state's '''Electoral College''' submits its votes to the President of the Senate.}} #* {{quote-book |en |year=1898 |author=Charles N. Kent |chapter=Field and Staff of the Seventeenth |title=History of the Seventeenth Regiment, New Hampshire Volunteer Infantry, 1862-1863 |url=https://archive.org/details/historyofsevente00kent/ |location=[[Concord]], N.H. |OCLC=1536265 |page=44 |pageurl=https://archive.org/details/historyofsevente00kent/page/44/ |text=In later years Colonel Kent has been much in public life. He was an alternate delegate to the National Convention which nominated Abraham Lincoln in 1860, and in 1864 a member of the New Hampshire '''Electoral College''', voting for Lincoln and Johnson.}} #* {{quote-book |en |year=1957 |author=w:Stephen G. Kurtz |title=The Presidency of John Adams: The Collapse of Federalism, 1795-1800 |url=https://archive.org/details/presidencyofjohn0000kurt/ |location=[[Philadelphia]] |publisher=w:University of Pennsylvania Press |ISBN=0-8122-7101-7 |LCCN=57-7764 |OCLC=426043 |pages=171–172 |pageurl=https://archive.org/details/presidencyofjohn0000kurt/page/172/ |text=Oliver Wolcott, Jr., in summing up the election reports for his father at the end of November, pointed to the Rutledge faction as holding the presidency within its hands; the result would turn upon the balloting of the South Carolina '''electoral college''', he stated, and Edward Rutledge was believed to command its decision. His reports from all over the Union indicated that Adams lacked but three votes for a majority. His fear was that Rutledge’s disgust with the Jay treaty would give the decision to Jefferson.}} #* {{quote-book |en |year=1961 |chapter=Guilford, "The Metropolis" |title=Official History of Guilford, Vermont, 1678-1961 With Genealogies and Biographical Sketches |url=https://archive.org/details/officialhistoryo00unse/ |publisher={{w|Guilford, Vermont|Town of Guilford}} |OCLC=3984069 |page=157 |pageurl=https://archive.org/details/officialhistoryo00unse/page/157/ |text=Although not now generally known, Guilford had a vital part in the presidential election of 1800, one of its citizens, John Noyes, being a member of the Vermont '''electoral college''', which supported Thomas Jefferson, who was opposed by Aaron Burr, each receiving the same number of electoral votes.}} # {{lb|en|US politics}} All of the [[electoral college]]s of the [[United States]], [[considered]] as one [[body]]. #* {{quote-book |en |year=1954 |author=w:Alben W. Barkley |title=That Reminds Me |url=https://archive.org/details/thatremindsme0000bark/ |location=[[Garden City]], NY |publisher=w:Doubleday & Company |LCCN=54-10775 |OCLC=1222881612 |OL=6156719M |page=276 |pageurl=https://archive.org/details/thatremindsme0000bark/page/276/ |text=In accordance with these opinions I believe that the ancient and outmoded '''Electoral College''' system should be abolished and that the people should vote directly in all the states for President and Vice President. The '''Electoral College''' was established in the beginning of our history for the same reasons which actuated our forefathers in providing that United States senators should be elected by the legislatures rather than by the people of the respective states.}} # {{alternative case form of|en|electoral college}}. {{defdate|from 1647}} #* {{quote-journal |en |year=1647 |author= |work=Journal of the House of Lords |title=Some Observations upon the Articles delivered by the Ambassadors of the Emperor |location=London |volume=ix |url=https://hdl.handle.net/2027/mdp.39015056722518?urlappend=%3Bseq=180 |page=174 |passage=The '''Electoral College''' is composed of Six Electors; Three Ecclesiatical, ''Mentz'', ''Trier'', ''Collen''; and Three Secular, ''The Palatine'', ''Sane'', and ''Brandebourg'' }} ====Translations==== {{trans-top|body chosen to elect the president and vice president of the US}} * Belarusian: {{t|be|[[кале́гія]] [[выбаршчык|вы́баршчыкаў]]|f}} * Chinese: *: Mandarin: {{t|cmn|選舉人團|tr=xuǎnjǔ réntuán}} * Danish: {{t|da|valgmandskollegium|n}}, {{t|da|valgkollegium|n}} * Finnish: {{t+|fi|valitsijamiehistö}} * French: {{t+|fr|collège électoral|m}} * German: {{t|de|Electoral College|n}}, {{t|de|Wahlmännergremium|n}}, {{t|de|Wahlmänner-Gremium|n}}, {{t|de|Wahlmännerkollegium|n}} * Japanese: {{t|ja|選挙人団|tr=せんきょにんだん, senkyo nindan}} * Korean: {{t+|ko|선거인단}} * Norwegian: *: Bokmål: {{t|nb|valgmannskollegium|n}} *: Nynorsk: {{t|nn|valmannskollegium|n}} * Polish: {{t|pl|Kolegium Elektorów|n}} * Romanian: {{t|ro|Colegiu Electoral|n}} * Russian: {{t|ru|[[колле́гия]] [[выборщик|вы́борщиков]]|f}} * Turkish: {{t+|tr|seçiciler kurulu}} * Ukrainian: {{t|uk|[[коле́гія]] [[виборник|виборникі́в]]|f}} {{trans-bottom}} ==German== ===Noun=== {{de-noun|n.sg|nolinkhead=1}} # {{lb|de|politics}} [[electoral college]] rwxavbhd5pycguj8hvl31o7ow8obnmy 231515 231514 2026-04-16T02:36:00Z Lee 19 [[:en:Electoral_College]] වෙතින් එක් සංශෝධනයක් 231514 wikitext text/x-wiki {{also|electoral college}} ==English== ===Proper noun=== {{en-proper noun|head=[[electoral|Electoral]] [[college|College]]|def=1}} # {{lb|en|US politics}} An [[electoral college]] [[chosen]], within a state, to formally cast that state's votes for the [[president]] and [[vice president]] of the [[United States]]. #: {{ux|en|Each state's '''Electoral College''' submits its votes to the President of the Senate.}} #* {{quote-book |en |year=1898 |author=Charles N. Kent |chapter=Field and Staff of the Seventeenth |title=History of the Seventeenth Regiment, New Hampshire Volunteer Infantry, 1862-1863 |url=https://archive.org/details/historyofsevente00kent/ |location=[[Concord]], N.H. |OCLC=1536265 |page=44 |pageurl=https://archive.org/details/historyofsevente00kent/page/44/ |text=In later years Colonel Kent has been much in public life. He was an alternate delegate to the National Convention which nominated Abraham Lincoln in 1860, and in 1864 a member of the New Hampshire '''Electoral College''', voting for Lincoln and Johnson.}} #* {{quote-book |en |year=1957 |author=w:Stephen G. Kurtz |title=The Presidency of John Adams: The Collapse of Federalism, 1795-1800 |url=https://archive.org/details/presidencyofjohn0000kurt/ |location=[[Philadelphia]] |publisher=w:University of Pennsylvania Press |ISBN=0-8122-7101-7 |LCCN=57-7764 |OCLC=426043 |pages=171–172 |pageurl=https://archive.org/details/presidencyofjohn0000kurt/page/172/ |text=Oliver Wolcott, Jr., in summing up the election reports for his father at the end of November, pointed to the Rutledge faction as holding the presidency within its hands; the result would turn upon the balloting of the South Carolina '''electoral college''', he stated, and Edward Rutledge was believed to command its decision. His reports from all over the Union indicated that Adams lacked but three votes for a majority. His fear was that Rutledge’s disgust with the Jay treaty would give the decision to Jefferson.}} #* {{quote-book |en |year=1961 |chapter=Guilford, "The Metropolis" |title=Official History of Guilford, Vermont, 1678-1961 With Genealogies and Biographical Sketches |url=https://archive.org/details/officialhistoryo00unse/ |publisher={{w|Guilford, Vermont|Town of Guilford}} |OCLC=3984069 |page=157 |pageurl=https://archive.org/details/officialhistoryo00unse/page/157/ |text=Although not now generally known, Guilford had a vital part in the presidential election of 1800, one of its citizens, John Noyes, being a member of the Vermont '''electoral college''', which supported Thomas Jefferson, who was opposed by Aaron Burr, each receiving the same number of electoral votes.}} # {{lb|en|US politics}} All of the [[electoral college]]s of the [[United States]], [[considered]] as one [[body]]. #* {{quote-book |en |year=1954 |author=w:Alben W. Barkley |title=That Reminds Me |url=https://archive.org/details/thatremindsme0000bark/ |location=[[Garden City]], NY |publisher=w:Doubleday & Company |LCCN=54-10775 |OCLC=1222881612 |OL=6156719M |page=276 |pageurl=https://archive.org/details/thatremindsme0000bark/page/276/ |text=In accordance with these opinions I believe that the ancient and outmoded '''Electoral College''' system should be abolished and that the people should vote directly in all the states for President and Vice President. The '''Electoral College''' was established in the beginning of our history for the same reasons which actuated our forefathers in providing that United States senators should be elected by the legislatures rather than by the people of the respective states.}} # {{alternative case form of|en|electoral college}}. {{defdate|from 1647}} #* {{quote-journal |en |year=1647 |author= |work=Journal of the House of Lords |title=Some Observations upon the Articles delivered by the Ambassadors of the Emperor |location=London |volume=ix |url=https://hdl.handle.net/2027/mdp.39015056722518?urlappend=%3Bseq=180 |page=174 |passage=The '''Electoral College''' is composed of Six Electors; Three Ecclesiatical, ''Mentz'', ''Trier'', ''Collen''; and Three Secular, ''The Palatine'', ''Sane'', and ''Brandebourg'' }} ====Translations==== {{trans-top|body chosen to elect the president and vice president of the US}} * Belarusian: {{t|be|[[кале́гія]] [[выбаршчык|вы́баршчыкаў]]|f}} * Chinese: *: Mandarin: {{t|cmn|選舉人團|tr=xuǎnjǔ réntuán}} * Danish: {{t|da|valgmandskollegium|n}}, {{t|da|valgkollegium|n}} * Finnish: {{t+|fi|valitsijamiehistö}} * French: {{t+|fr|collège électoral|m}} * German: {{t|de|Electoral College|n}}, {{t|de|Wahlmännergremium|n}}, {{t|de|Wahlmänner-Gremium|n}}, {{t|de|Wahlmännerkollegium|n}} * Japanese: {{t|ja|選挙人団|tr=せんきょにんだん, senkyo nindan}} * Korean: {{t+|ko|선거인단}} * Norwegian: *: Bokmål: {{t|nb|valgmannskollegium|n}} *: Nynorsk: {{t|nn|valmannskollegium|n}} * Polish: {{t|pl|Kolegium Elektorów|n}} * Romanian: {{t|ro|Colegiu Electoral|n}} * Russian: {{t|ru|[[колле́гия]] [[выборщик|вы́борщиков]]|f}} * Turkish: {{t+|tr|seçiciler kurulu}} * Ukrainian: {{t|uk|[[коле́гія]] [[виборник|виборникі́в]]|f}} {{trans-bottom}} ==German== ===Noun=== {{de-noun|n.sg|nolinkhead=1}} # {{lb|de|politics}} [[electoral college]] rwxavbhd5pycguj8hvl31o7ow8obnmy Electoral Colleges 0 142097 231516 2023-03-17T16:56:33Z en>WingerBot 0 remove horizontal rule separators per [[Wiktionary:Votes/2023-02/Removing the horizontal rule]] 231516 wikitext text/x-wiki {{also|electoral colleges}} ==English== ===Noun=== {{head|en|noun form|head=Electoral Colleges}} # {{plural of|en|Electoral College}} ==German== ===Noun=== {{head|de|noun form}} # {{inflection of|de|Electoral College||gen|s}} kgyq1s97vdfc2pq0nlgebey2aghk1bo 231517 231516 2026-04-16T02:36:30Z Lee 19 [[:en:Electoral_Colleges]] වෙතින් එක් සංශෝධනයක් 231516 wikitext text/x-wiki {{also|electoral colleges}} ==English== ===Noun=== {{head|en|noun form|head=Electoral Colleges}} # {{plural of|en|Electoral College}} ==German== ===Noun=== {{head|de|noun form}} # {{inflection of|de|Electoral College||gen|s}} kgyq1s97vdfc2pq0nlgebey2aghk1bo ඡන්ද කොට්ඨාශ 0 142098 231520 2026-04-16T02:37:58Z Lee 19 '== සිංහල == {{විකිපීඩියා}} === නිරුක්තිය === {{rfe|si}} === නාම පදය === {{si-noun}} # {{rfdef|si}} <!-- ==== පරිවර්තන ==== {{trans-top|පරිවර්තන}} * ඉංග්‍රීසි: {{t|en|<<ඉංග්‍රීසි වචනය>>}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|<<ආශ්‍රිත...' යොදමින් නව පිටුවක් තනන ලදි 231520 wikitext text/x-wiki == සිංහල == {{විකිපීඩියා}} === නිරුක්තිය === {{rfe|si}} === නාම පදය === {{si-noun}} # {{rfdef|si}} <!-- ==== පරිවර්තන ==== {{trans-top|පරිවර්තන}} * ඉංග්‍රීසි: {{t|en|<<ඉංග්‍රීසි වචනය>>}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|<<ආශ්‍රිත පවතින වෙනත් වචන>>}} --> 4ef76akcng5chsixpguneno7tj85p3l ප්‍රවර්ගය:Pali pronouns, Brahmi අක්ෂරක්‍රමය භාවිතා කොට ලියන ලද 14 142099 231525 2026-04-16T10:50:19Z Pinthura 2424 සේවා: මෘදු ප්‍රවර්ග යළියොමුවක් නිර්මාණය. 231525 wikitext text/x-wiki {{category redirect|පාලි සර්වනාම, බ්‍රාහ්මී අක්ෂරක්‍රමය භාවිතා කොට ලියන ලද}} 543nyc9evra60ageraph9wn2v2gxayf ප්‍රවර්ගය:පාලි සර්වනාම in බ්‍රාහ්මී script 14 142100 231526 2026-04-16T10:50:29Z Pinthura 2424 සේවා: මෘදු ප්‍රවර්ග යළියොමුවක් නිර්මාණය. 231526 wikitext text/x-wiki {{category redirect|පාලි සර්වනාම, බ්‍රාහ්මී අක්ෂරක්‍රමය භාවිතා කොට ලියන ලද}} 543nyc9evra60ageraph9wn2v2gxayf ප්‍රවර්ගය:පාලි noun forms 14 142101 231528 2026-04-16T10:50:49Z Pinthura 2424 සේවා: මෘදු ප්‍රවර්ග යළියොමුවක් නිර්මාණය. 231528 wikitext text/x-wiki {{category redirect|පාලි නාම පද ස්වරූප}} bjc3xjyafzu2nrt5gf93rrf2w25a0u9 කිරි හල 0 142102 231532 2026-04-16T11:11:07Z WrdSrchSi 3305 Page created: + IPA|si + මූලාශ්‍ර 231532 wikitext text/x-wiki == සිංහල == === නිරුක්තිය === {{rfe|si}} ===උච්චාරණය=== * {{IPA|si|/ˈkiri ɦalə/}} === නාම පදය === {{si-noun|කිරි හල්}} # {{rfdef|si}} === මූලාශ්‍ර === <references/> * "11. කිරි හළ ද; කිරි හල ද? [https://www.ahubudu.lk/assets/pdf/EDIYA_Veluma4_Kalaba1.pdf]", '''''පහදුව''''', — එඩිය, 4 වෙළුම, 1 කලබ, 15 පිටුව, 2506 බක් - 1963 අප්රේල් <!-- ==== පරිවර්තන ==== {{trans-top|පරිවර්තන}} * ඉංග්‍රීසි: {{t|en|<<ඉංග්‍රීසි වචනය>>}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|<<ආශ්‍රිත පවතින වෙනත් වචන>>}} --> cb45oh1v43w6zmjyn8217rhk7vzabo8 වෙළෙඳ පොළ 0 142103 231533 2026-04-16T11:18:29Z WrdSrchSi 3305 Page created: + IPA|si + මූලාශ්‍ර 231533 wikitext text/x-wiki == සිංහල == === නිරුක්තිය === {{rfe|si}} ===උච්චාරණය=== * {{IPA|si|/ˈʋeɭeⁿd̪ə poɭə/}} === නාම පදය === {{si-noun}} # {{rfdef|si}} === මූලාශ්‍ර === <references/> * "11. වෙළඳ පොළ ද; වෙළෙඳ පොළ ද? [https://www.ahubudu.lk/assets/pdf/EDIYA_Veluma4_Kalaba1.pdf]", '''''පහදුව''''', — එඩිය, 4 වෙළුම, 1 කලබ, 15 පිටුව, 2506 බක් - 1963 අප්රේල් <!-- ==== පරිවර්තන ==== {{trans-top|පරිවර්තන}} * ඉංග්‍රීසි: {{t|en|<<ඉංග්‍රීසි වචනය>>}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|<<ආශ්‍රිත පවතින වෙනත් වචන>>}} --> nivga7ger0y6389s1l8bv7uapczgprd කුමරදස් 0 142104 231534 2026-04-16T11:21:40Z WrdSrchSi 3305 Page created. 231534 wikitext text/x-wiki == සිංහල == === නිරුක්තිය === {{rfe|si}} === සංඥා නාම පදය === {{head|si|proper noun}} # {{rfdef|si}} <!-- ==== පරිවර්තන ==== {{trans-top|පරිවර්තන}} * ඉංග්‍රීසි: {{t|en|<<ඉංග්‍රීසි වචනය>>}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|<<ආශ්‍රිත පවතින වෙනත් වචන>>}} --> kh23eljxc95gheigca0f62yqxquw0e2 අසුනට 0 142105 231535 2026-04-16T11:25:06Z WrdSrchSi 3305 Page created: + definitions (2) 231535 wikitext text/x-wiki ==සිංහල== === නිරුක්තිය === {{rfe|si}} ===නාම පදය=== {{head|si|noun form}} # {{l|si|ආසනයට}} # {{l|si|අශ්වයනට}} rkksatbdm7qrj1gpjulan0yfvjy8epa පැවිද්දෝ 0 142106 231536 2026-04-16T11:26:46Z WrdSrchSi 3305 Page created: + plural of|si 231536 wikitext text/x-wiki ==සිංහල== === නිරුක්තිය === {{rfe|si}} ===නාම පදය=== {{head|si|noun form}} # {{plural of|si|පැවිද්දා}} tfpw7n5fjv4rj5usfzjr52ft8uyzmea අබාවන් 0 142107 231537 2026-04-16T11:29:53Z WrdSrchSi 3305 Page created. 231537 wikitext text/x-wiki == සිංහල == === නිරුක්තිය === {{rfe|si}} === සංඥා නාම පදය === {{head|si|proper form}} # {{rfdef|si}} <!-- ==== පරිවර්තන ==== {{trans-top|පරිවර්තන}} * ඉංග්‍රීසි: {{t|en|<<ඉංග්‍රීසි වචනය>>}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|<<ආශ්‍රිත පවතින වෙනත් වචන>>}} --> ggq7n5525re74imqi2kl0qkzwgdvju4 සිරිසඟබෝවරු 0 142108 231538 2026-04-16T11:31:25Z WrdSrchSi 3305 Page created. 231538 wikitext text/x-wiki ==සිංහල== === නිරුක්තිය === {{rfe|si}} ===නාම පදය=== {{head|si|noun form}} # {{rfdef|si}} 4bk4s1b4g1dyqtbh6fxsfy4ojsiyxw3 නැටැවීමෙහි 0 142109 231539 2026-04-16T11:35:33Z WrdSrchSi 3305 Page created. 231539 wikitext text/x-wiki ==සිංහල== === නිරුක්තිය === {{rfe|si}} ===නාම පදය=== {{head|si|noun form}} # {{rfdef|si}} 4bk4s1b4g1dyqtbh6fxsfy4ojsiyxw3 තෙල 0 142110 231540 2026-04-16T11:41:49Z WrdSrchSi 3305 Page created: + definition + මූලාශ්‍ර 231540 wikitext text/x-wiki == සිංහල == === නිරුක්තිය === {{rfe|si}} === නාම පදය === {{si-noun}} # {{rfdef|si}} === නාම විශේෂණ පදය === {{si-adj}} # {{l|si|අර}} (that) # {{rfdef|si}} === මූලාශ්‍ර === <references/> * "18. [https://www.ahubudu.lk/assets/pdf/EDIYA_Veluma4_Kalaba1.pdf]", '''''පහදුව''''', — එඩිය, 4 වෙළුම, 1 කලබ, 17 පිටුව, 2506 බක් - 1963 අප්රේල් <!-- ==== පරිවර්තන ==== {{trans-top|පරිවර්තන}} * ඉංග්‍රීසි: {{t|en|<<ඉංග්‍රීසි වචනය>>}} {{trans-bottom}} === අමතර අවධානයට === * {{l|si|<<ආශ්‍රිත පවතින වෙනත් වචන>>}} --> 35s7f9c7aai6sy9pg6q1u6ggmi8tq2c Module:place 828 142111 231542 2025-11-15T02:11:59Z en>Benwing2 0 format_form_of() now returns categories as second argument 231542 Scribunto text/plain local export = {} local force_cat = false -- set to true for testing local m_placetypes = require("Module:place/placetypes") local m_links = require("Module:links") local memoize = require("Module:memoize") local m_strutils = require("Module:string utilities") local m_table = require("Module:table") local debug_track_module = "Module:debug/track" local en_utilities_module = "Module:en-utilities" local form_of_module = "Module:form of" local languages_module = "Module:languages" local parse_interface_module = "Module:parse interface" local parse_utilities_module = "Module:parse utilities" local parameter_utilities_module = "Module:parameter utilities" local utilities_module = "Module:utilities" local enlang = require(languages_module).getByCode("en") local rmatch = m_strutils.match local rfind = m_strutils.find local ulen = m_strutils.len local split = m_strutils.split local dump = mw.dumpObject local insert = table.insert local concat = table.concat local pluralize = require(en_utilities_module).pluralize local extend = m_table.extend local unpack = unpack or table.unpack -- Lua 5.2 compatibility local internal_error = m_placetypes.internal_error local process_error = m_placetypes.process_error local placetype_data = m_placetypes.placetype_data --[==[ intro: ===Introduction=== This module implements {{tl|place}}, which is a template for standardizing the description and categorization of toponyms (terms that refer to locations such as cities, countries, rivers, etc.). The following modules support this template: * [[Module:place]]: The main module. * [[Module:place/placetypes]]: A module containing data on placetypes, as well as utilities for working with placetypes; category generation handlers for adding categories based on placetypes; and display handlers for displaying holonyms (i.e. containing locations) of a specific type. FIXME: Maybe split out the code from the data. * [[Module:place/locations]]: A module containing data on known locations, as well as utilities for working with such locations. FIXME: Maybe split out the code from the data. * [[Module:category tree/topic/Places]]: A category tree module for generating the descriptions of all categories generated by {{tl|place}}. * [[Module:place doc]]: A module that generates documentation tables describing known placetypes and locations. ===Basic terminology=== The basic terminology used in this and associated {{tl|place}} modules is: * A ''location'' (or equivalently, a ''place'') is any geographic feature (either natural or geopolitical), either on the surface of the Earth or elsewhere. Examples of types of natural places are rivers, mountains, seas and moons; examples of types of geopolitical places are cities, countries, neighborhoods and roads. A ''known location'' is specifically a location whose properties are specified in the {{tl|place}} modules; more on them below. * Specific places are identified by names, referred to as ''toponyms'' or ''placenames''. A given place will often have multiple names, and a given toponym may be ambiguous, referring to multiple possible locations. Specifically: ** There may be names including different amounts of disambiguating information (`Tucson` vs. `Tucson, Arizona` vs. `Tucson, Arizona, USA` or `New York` vs. `New York City` vs. `New York, New York`); abbreviations (`NYC` for `New York City`, `USA` for `United States of America`); ''official'' vs. ''short'' names (e.g. `Union of Soviet Socialist Republics` vs. `Soviet Union`); spelling variations (`Cracow` vs. `Krakow` vs. `Kraków`); current vs. former names (`Saint Petersburg` vs. `Leningrad` vs. `Petrograd`); [[exonym]]s vs. [[endonym]]s (e.g. `Tavastia Proper` vs. `Kanta-Häme`, both referring to the same administrative region in Finland); alternative names not due to any of the above reasons (`Bashkiria` vs. `Bashkortostan`); etc. In addition, each language that has an opportunity to refer to the place will have its own name, with the same sorts of variations as exist in English. ** Examples of ambiguous toponyms are `New York` (either a city or a state); `Georgia` (either a state of the US or an independent country in the Caucasus Mountains); `Paris` (either the capital of France or various small cities and towns in the US); `Mexico` (either a country, a state of that country, or the capital city of that country); and `San Antonio` (besides being a major city in Texas, it is the name of dozens of settlements of all sorts throughout the US and Latin America, and a least 181 distinct [[barangay]]s in the Philippines). * A ''placetype'' is the (or a) type that a location belongs to (e.g. `city`, `state`, `river`, `administrative region`, `[[regional county municipality]]`, etc.). ** It is common for locations to be described using multiple placetypes, and even sometimes known locations have multiple placetypes that they may be identified by (e.g. American Samoa can be identified either as an `unincorporated territory`, an `overseas territory` or just a `territory`). Both the {{tl|place}} template and the known location data allow a given location to be identified by multiple placetypes. When in doubt as to the correct placetype or placetypes for a given location, generally follow how Wikipedia describes the place. ** Some placetypes themselves are ambiguous; e.g. an ''area'' can variously refer to a top-level administrative division (specifically of Kuwait); a geographic region, generally without unambiguously defined borders; or a section of a city, similar to a neighborhood. The term ''district'' is similarly ambiguous. A ''[[prefecture]]'' in the context of Japan is similar to a province, but a prefecture in France is the capital of a ''[[department]]'' (which is similar to a county). Some of this ambiguity is currently handled automatically; e.g. the ambiguity of areas and districts is handled by looking at the ''holonyms'', or containing locations, specified for a given place. But sometimes it is necessary to use a qualifier before the placetype to disambiguate; for example to refer to a French prefecture, use the placetype `French prefecture` instead of just `prefecture`. (FIXME: Handle this automatically.) * A ''holonym'', in the context of a description of a place, is a placename that refers to a larger-sized entity that contains the location being described. For example, `Arizona` and `United States` are holonyms of `Tucson`, and `United States` is a holonym of `Arizona`. * A ''place invocation'' consists of the invocation of {{tl|place}}, including all its parameters. Place invocations may contain one or more ''place descriptions'', each of which provides a description of the location, including its placetype or types, any holonyms, and any additional raw text needed to properly explain the place in context. Place invocations may also contain named parameters specifying zero or more English ''glosses'' or translations (for foreign-language toponyms) and any attached ''extra information'' such as the capital, largest city, official name, modern name or full name. Multiple place descriptions in a single invocation are separated by a numbered parameter starting with a semicolon, and are used when it is necessary to provide two or more definitions of a single location for proper categorization. For example, [[Vatican City]] is defined both as a city-state in Southern Europe and as an enclave within the city of Rome, follows: : {{tl|place|en|city-state|r/Southern Europe|;,|an <<enclave>> within the city of [[Rome]], [[Italy]]|cat=Places in Rome|official=Vatican City State}}. Similar things need to be done for places like [[Crimea]] that are claimed by two different countries with different definitions and administrative structures. ** There are two types of place descriptions, ''new-style'' and ''old-style''. (The use of the terms "new" and "old" indicates chronological precedence in the development of {{tl|place}}, but is not meant to pass any value judgments on the two types, and does not indicate any intent to deprecate old-style descriptions. Both types of descriptions are useful; for example, old-style descriptions are generally more succinct but less flexible.) The above invocation shows both types: an old-style description followed by a new-style description. Old style descriptions use multiple numbered parameters, where the first parameter (after the language code) specifies the placetype or types, and following parameters specify either holonyms (which are always of the form ` ``placetype``/``placename`` `) or raw text (which is identifiable by not having a slash in it). New-style descriptions use a single parameter, where both placetypes and holonyms are surrounded by double angle brackets, and all remaining text is raw (displayed as-is). In both types of descriptions, holonyms include a slash in them to separate the placetype (which is mandatory and often abbreviated) from the placename. ** In the context of a place description, there are two types of placetypes. The ''entry placetypes'' are the placetypes of the place being described, while the ''holonym placetypes'' are the placetypes of the holonyms that the place being described is located within. Currently, a given place can have multiple placetypes specified (e.g. [[Normandy]] is specified using the ''compound placetype'' `administrative region/former province/and/medieval kingdom`) while a given holonym can have only one placetype associated with it. Holonym placetypes are frequently abbreviated (e.g. `r` for `region`, `s` for `state`, `co` for `county`, etc.), while stylistically it is preferred to spell out the entry placetype (except for some long placetypes with well-known abbreviations, such as `CDP` or `cdp` for `[[census-designated place]]`). ** All holonyms in place descriptions are automatically linked as if surrounded by {{tl|l|en|...}}; i.e. if double brackets do not occur in the holonym, the entire holonym will be linked to the corresponding Wiktionary article. For this reason, the holonym should generally be in the same format as the canonical Wiktionary article describing the location; see below). * A ''known location'' is a location whose properties are specifically defined in the {{tl|place}} modules. Generally each such location has an associated category, and known locations exist in a containment hierarchy, where the immediately containing known location is known as the ''container'' of the location and the chain of successive containing locations is known as the ''container trail''. Generally the location's container corresponds to the first parent of its category. Note that some known locations belong to more than one immediate container; for example, Russia belongs to both Europe and Asia. ===More about placetypes=== # The following general categories of placetypes exist: ## ''Natural features'' such as lakes, mountains, mountain ranges, islands, archipelagoes, moons, stars, asteroids, etc. ## ''Continents'', ''supercontinents'' (groupings of continents where it makes sense, such as `America` and `Eurasia`) and ''continent-level regions'' (grouping of countries in a given continent, such as `Central America` and `Polynesia`). ## ''Political entities'', which are generally classified as either ''polities'' (top-level entities such as countries), ''subpolities'' or ''political divisions'' (non-sovereign divisions, often specifically ''administrative divisions'', of a polity, where an administrative division has a governmental or statistical function and almost always has unambiguously defined boundaries), or ''settlements'' (e.g. cities; towns; villages; and divisions of a city such as neighborhoods, wards, [[barrio]]s and [[barangay]]s, which may or may not be formal administrative divisions and may or may not have unambiguous boundaries). ## ''Geographic regions'', which refer to recognized areas of the Earth (either with a natural geographic, political or cultural significance, often of a historical nature). Such regions can be of greatly varying size, may exist either within a single country or spanning multiple countries or (more often) parts of multiple countries, and may not have well-defined boundaries. They should be distinguished from ''administrative regions'', which exist within a single country and have well-defined boundaries and a political or administrative function. Geographic regions are categorized using the generic term ''geographic and cultural areas'' to emphasize that (a) they have no administrative significance; (b) they may vary greatly in size; and (c) their cohesion is due either to natural geographic boundaries, such as rivers or mountain ranges, or to sharing some cultural characteristics. ## ''Man-made structures'' below the level of a settlement or neighborhood, such as airports, roads, individual buildings, and the like. (Note that such structures, even if named, often do not meet the [[WT:CFI]] criteria; this is particularly the case for roads.) # Placetypes support aliases, and the mapping to canonical form happens early on in the processing. For example, `state` can be abbreviated as `s`; `administrative region` as `adr`; `regional county municipality` as `rcomun`; etc. Some placetype aliases handle alternative spellings rather than abbreviations. For example, `departmental capital` maps to `department capital`, and `home-rule city` maps to `home rule city`. Placetype abbreviations are particularly useful in holonym specs, because every holonym must be accompanied by its placetype, for disambiguation purposes. # A ''placetype qualifier'' is an adjective prepended to the placetype to give additional information about the place being described. For example, a given place may be described as a `small city`; logically this is still a city, but the qualifier `small` gives additional information about the place. Multiple qualifiers can be stacked, e.g. `small affluent beachfront unincorporated community`, where `unincorporated community` is a recognized placetype and `small`, `affluent` and `beachfront` are qualifiers. (As shown here, it may not always be obvious where the qualifiers end and the placetype begins.) For the most part, placetype qualifiers do not affect categorization; a `small city` is still a city and an `affluent beachfront unincorporated community` is still an unincorporated community, and both should still be categorized as such. But some qualifiers do change the categorization. In particular, a `former province` is no longer a province and should not be categorized in e.g. [[:Category:Provinces of Italy]], but instead in a different set of categories, e.g. [[:Category:Historical political subdivisions]]. There are several terms treated as equivalent for this purpose: `abandoned` `ancient`, `extinct`, `historic(al)`, `medi(a)eval` and `traditional`. Another set of qualifiers that change categorization are `fictional` and `mythological`, which cause any term using the qualifier to be categorized respectively into [[:Category:Fictional locations]] and [[:Category:Mythological locations]]. ===More about toponyms=== # Toponyms may be: ## ''simple'' (not including any containing location in its name, such as `Tucson`) or ''multipart'' (including one or more containing locations, such as `Tucson, Arizona` or `Tucson, USA` or even `Tucson, Arizona, USA`); ## ''bare'' (not including the word `the` if the location normally requires this article when following a preposition, such as `United States`, `Gambia` or 'Community of Madrid') or ''prefixed'' (including the word `the` as needed, such as `the United States`, `the Gambia` or `the Community of Madrid`); ## ''elliptical'' (just the placename without any disambiguating placetype, such as `Durham`, `New York` or `Mexico`) or ''full'' (containing a disambiguating placetype or similar identifier if one is commonly included, such as the city of `Durham` (in England) vs. its containing county `County Durham`; the US city `New York City` vs. its containing state `New York`; or the three-way distinction between `Mexico` (the country), `Mexico City` (the capital of this country) and `(the) State of Mexico` (one of the states of the country Mexico, mostly surrounding but not including Mexico City)). # The ''canonical Wiktionary article'' is the main article on Wiktionary where a location is described. Canonical articles, per the above terminology, are generally ''simple'' and ''bare'', but may be either ''full'' or ''elliptical''. The fact that a given article is canonical is often identifiable by the fact that translations are housed there an not somewhere else. For example, most counties of the US and Canada include the word `County` in their canonical article name, but most counties elsewhere do not. `Washington, D.C.` is one of the few cases where a non-simple toponym is used as the canonical article; this is based on common usage, especially by residents of the city in question (who commonly refer to it as "D.C." but rarely just as "Washington"). ===More about known locations=== # The following types of known locations are defined in this module: ## Continents, supercontinents and continent-level regions, into which countries are grouped. Specifically: ### At the top level below `Earth` are the supercontinents `America` and `Eurasia` and the continents `Africa`, `Oceania` and `Antartica`. ### `America` is further broken down into the continents `North America` (in turn containing the continental regions `Central America` and `Caribbean`, with the United States, Canada and Mexico directly under North America) and `South America`. ### `Eurasia` is further broken down into the continents `Europe` and `Asia`. ### `Oceania` is further broken down into the continental regions `Melanesia`, `Micronesia` and `Polynesia`, with Australia` directly under `Oceania. ### Under the above-specified divisions are countries. Some countries are placed in more than one continent or continent-level region, either because they actually span two continents (e.g. Russia, Turkey, Kazakhstan, Egypt) or because they are politically considered to belong to a continent different from the one they are geographically in (Cyprus, Georgia, Armenia, etc.). ## Political entities, including: ### Top-level political entities, which includes: #### Countries, with a fairly liberal definition, notably including all UN-recognized countries plus some others that are commonly considered countries, even if not all other countries recognize them as such or consider them completely independent (notably, Kosovo, Palestine, Taiwan, Western Sahara, Niue and the Cook Islands). #### Pseudo-countries, which include areas calling themselves countries that are de-facto not under the control of the country that they are internationally considered part of (e.g. Abkhazia, South Ossetia, Transnistria); dependent/external/etc. territories of countries (e.g. American Samoa [US], Bermuda [UK], Christmas Island [Australia], Easter Island [Chile]); constituent countries, autonomous territories and the like (Aruba, Curaçao and Sint Maarten of the Netherlands; Greenland and the Faroe Islands of Denmark; etc.; but notably not including England, Scotland, Northern Ireland and Wales, which are treated as regular countries); and a grab bag of other entities that have a semi-independent existence, such as Hong Kong, Macau, Guadeloupe, Martinique and the like. Currently, the actual distinction in treatment between "countries" and "country-like entities" is minimal, but in the future we might restrict the sorts of subcategories of country-like entities more than regular countries. #### Former countries, e.g. the Soviet Union, Yugoslavia, West Germany and the Roman Empire. These are much more limited in the sorts of subcategories allowed, because generally locations, especially cities, should be described from the perspective of which political entity they are currently located in (e.g. "an ancient Roman town in modern Syria") and categorized as such. ### Subpolities. Generally we only list top-level administrative divisions of countries (and only fairly major countries are usually included), but sometimes we list second-level administrative divisions, as in the case of the United Kingdom (where the top-level administrative divisions of the four constituent countries are listed) and China (where major prefecture-level cities are listed, and are considered administrative divisions rather than cities). ### Cities. Only major cities get categories, with the definition of "major" varying by country but often including those where the city population itself (sometimes the metro area) is >= 1,000,000 people. # A distinction should be made in the {{tl|place}} modules between ''keys'' and ''placenames''. Placenames are as the location appears in a holonym, and are generally in the same format as the canonical Wiktionary article describing the location so that when formatted as a link, the link goes to the right article; i.e. they are simple and bare, and may be full or elliptical according to Wiktionary conventions. The ''canonical key'' of a location is how the location's category is named, and always uniquely identifies the location from among the known locations in this module (but not necessarily among all possible locations). In particular, subpolities usually have multipart keys that include the containing location, such as `Anhui, China` (not just `Anhui`); `Arizona, USA` (not just `Arizona`, and also not `Arizona, United States`); and `Herefordshire, England` (not just `Herefordshire`, and also in this case not `Herefordshire, UK` or `Herefordshire, England, UK` or any other possible variation). Cities are normally simple, but some cities are multipart for disambiguation purposes (e.g. `Newcastle, New South Wales` for the city in Australia vs. `Newcastle upon Tyne` for the identically-named city in England). Canonical keys may have ''key aliases'', other ways of referring to the location that are not necessarily unique (e.g. `Newcastle` is a key alias for both of the above-mentioned cities), and city keys with diacritics generally have diacriticless aliases, such as canonical key `Düsseldorf` vs. key alias `Dusseldorf`, or canonical key `Łódź` vs. key alias `Lodz`. # Known locations are gathered into ''groups'' with similar properties, such as all the states of the United States; all the (ceremonial) counties of England (see below); and all the "sufficiently major" prefecture-level cities in China (where a prefecture-level city is a prefecture surrounding a major city with a unified government and is more like a prefecture, i.e. a major administrative division just underneath a province, than like a city, and where "sufficiently major" is defined according to the population of either the total prefecture or the urban area of the city). Note that there are multiple types of counties in England, with overlapping but non-identical names and boundaries; there are, in particular, ''ceremonial counties'', ''local government counties'' and ''historic counties''; ''ceremonial counties'' have only ceremonial administrative functionality but unlike local government counties (a) don't frequently change their boundaries or nature, (b) correspond more closely to historic county boundaries and names, and (c) are what Englanders usually identify themselves with, and so they are used as top-level divisions rather than local government counties. # Some known locations have ''aliases'' defined, which are of two types. ''Display aliases'' map holonyms to their canonical form near the beginning of processing (in particular before the displayed output is formatted). For example, `US`, `U.S.`, `USA`, `U.S.A.` and `United States of America` are all canonicalized to `United States` (if identified as a country), and display as `United States`. Similarly, the foreign forms `Occitanie` (as a region or administrative region) and `Noord-Brabant` (as a province) are mapped to `Occitania` and `North Brabant` for display purposes. There are also ''category aliases'', so that if e.g. `Republic of Macedonia` is encountered, it will display as such but categorize as `North Macedonia`. (This is because, among other reasons, `Republic of Macedonia` is normally preceded by `"the"` while `North Macedonia` is not, so a call {{tl|place|en|a <<city>> in the <<c/Republic of Macedonia>>}} would look wrong if `Republic of Macedonia` were converted to `North Macedonia` during display, as the result would be `a city in the North Macedonia`. There are also frequently political connotations to different category aliases, e.g. `Burma` vs. `Myanmar`.) All of these aliases are sensitive to the placetype specified. For example, `Mexico` as a state is categorized under `State of Mexico, Mexico` but `Mexico` the country is categorized as just `Mexico`. ===Categories=== There are two main types of categories: # Categories for known locations, divided into: ## Top-level polity categories (e.g. [[:Category:United States]], [[:Category:Taiwan]], [[:Category:South Ossetia]], [[:Category:Bermuda]], [[:Category:Soviet Union]], [[:Category:West Germany]]). ## Subpolity categories ([[:Category:Arizona, USA]], [[:Category:Hunan]], [[:Category:Kagoshima Prefecture]], [[:Category:Cluj County, Romania]]). For historical reasons, different formats are used for the subpolities of different polities. Increasingly, we are moving towards always including the polity name in the subpolity category, but whether the subpolity type is included and where it is included (cf. [[:Category:Cluj County, Romania]] vs. [[:Category:County Cork, Ireland]] is still inconsistent and will probably remain that way, based on how the subpolity is normally referred to. ## City categories ([[:Category:Tokyo]], [[:Category:New York City]], [[:Category:Jaipur]]). Normally these do not include the containing subpolity, but may do so in order to disambiguate. # Categories for placetypes, divided into: ## "Immediate" political and non-political division categories ([[:Category:States of the United States]], [[:Category:Municipalities of Tocantins, Brazil]], [[:Category:Ghost towns in Arizona, USA]]). These are name categories, whose purpose is to contain locations of the specified type. "Immediate" here refers to the fact that the location in the category name is the immediately-containing polity. Usually these categories use the preposition "of", but sometimes "in". (Specifically, "of" typically implies that the placetype in question has an official or semi-official status, whereas "in" implies there is no such official status, but common usage may override this.) The form of the toponym appearing in these categories is always the same as that of the corresponding toponym category except that the word "the" may appear (e.g. [[:Category:States of the United States]]), whereas it doesn't appear in the toponym category itself ([[:Category:United States]], no "the"). ## "Skip-polity" categories for second-level political and non-political divisions of a country or other top-level polity (e.g. [[:Category:Counties of the United States]], [[:Category:Municipalities of Brazil]] and [[:Category:Subprefectures of Japan]]). These have several purposes: * They group the immediate division categories mentioned previously. * They categorize "straggler" topoynms that (often improperly) fail to mention the subpolity they belong to, but only the top-level polity. * If categories do not exist for the first-level divisions of a country (and sometimes even when they do), they group all toponyms of the specified type for the specified country. For example, Lithuania is divided into first-level counties and second-level municipalities, but since we don't currently have categories for Lithuanian counties, all municipalities go under [[:Category:Municipalities of Lithuania]] rather than under a category for a specific county. In addition, even though we do have categories for Japanese prefectures (a first-level division), all subprefectures (a second-level division) go under [[:Category:Subprefectures of Japan]] because there aren't very many of them (see below). ## "Generic placetype" categories, both of the immediate and skip-polity type (immediate [[:Category:Cities in California, USA]] and [[:Category:Neighborhoods of the Bronx]]; skip-polity [[:Category:Villages in Ivory Coast]], [[:Category:Geographic and cultural areas of England]], [[:Category:Rivers in Egypt]] and [[:Category:Places in the Philippines]]). As mentioned above, "generic" placetypes occur in every polity (although the set of generic placetypes allowed for cities is a subset of those allowed for top-level polities and subpolities). Usually these categories use the preposition "in", but sometimes "of". As above, skip-polity categories group immediate categories, and in addition there are various reasons a toponym entry is categorized into a skip-polity category. (For example, as a general rule, geographic and cultural areas only categorize at the country level, not the subpolity level, both because there often aren't very many in a given country and because they often span multiple subpolities.) The parent categories of a given category depend on its type. Generally, location categories have placetype categories as their first parent, and vice-versa. Specifically: # Top-level country categories have as their parent e.g. [[:Category:Countries in Europe]], [[:Category:Countries in Central America]] or [[:Category:Countries in Polynesia]], using the most specific continental-level region the country is contained in. # Pseudo-countries are under [[:Category:Country-like entities]] as a neutral designation. There aren't enough of them to subcategorize under continent-level regions. # Former countries are under [[:Category:Former countries and country-like entities]]. # Subpolity categories are usually under a placetype category whose placetype is the canonical (first-listed) placetype of the subpolity and whose toponym is the immediately containing polity, but there are exceptions. Specifically, sometimes if a polity has multiple types of subpolities, they are combined (e.g. [[:Category:States and territories of Australia]], [[:Category:Federal subjects of Russia]]). In addition, sometimes a less specific but more identifiable placetype is used instead of the canonical one (e.g. [[:Category:Regions of France]] when the canonical placetype is "administrative region"). The same rules and exceptions generally apply when categorizing subpolities themselves; e.g. both the Australian state of Queensland and territory of Northern Territory go under [[:Category:en:States and territories of Australia]] rather than separately under [[:Category:en:States of Australia]] and [[:Category:en:Territories of Australia]]. In addition, sometimes subpolities may "skip a level" if there aren't very many. For example, there are only 26 subprefectures of Japan (14 under Hokkaido and 12 more scattered under five other prefectures). Rather than have e.g. [[:Category:en:Subprefectures of Kagoshima Prefecture]] containing at most two entries and [[:Category:en:Subprefectures of Miyazaki Prefecture]] containing at most one, they are all grouped under the so-called "skip-subpolity category" [[:Category:en:Subprefectures of Japan]]. # City categories are always under e.g. [[:Category:Cities in the United States]] (e.g. [[:Category:New York City]] is so-placed, even though [[:Category:Cities in New York, USA]] exists). However, they may have a second, more-specific parent (e.g. [[:Category:Cities in New York, USA]] in the case of New York City). The city entries themselves will go under the more specific parent if it exists. # Immediate placetype categories for second-level divisions of a country generally have, respectively, a "toponym parent" that is the toponym mentioned in the category and a "skip-polity parent" that groups all subpolity placetype categories of a specific type and containing polity. For example, [[:Category:Counties of Arizona, USA]] has toponym parent [[:Category:en:Arizona, USA]] and skip-polity parent [[:Category:en:Counties of the United States]]. Sometimes the default skip-polity parent is overridden or disabled entirely. For example, in the US, most states are divided into counties but Louisiana is divided into parishes and Alaska into boroughs. It would make no sense to put [[:Category:Parishes of Louisiana, USA]] under [[:Category:Parishes of the United States]] (which would only have one subcategory), so we include them under [[:Category:Counties of the United States]]. An alternative would be to name the skip-polity category to explicitly include parishes and boroughs; this would get awkward here but is done in some cases. Similarly, [[:Category:Regional county municipalities of Quebec]] is placed under [[:Category:Regional municipalities of Canada]] since that name is used in other provinces. Meanwhile, [[:Category:Regional districts of British Columbia]] disables its skip-polity category since no other province or territory of Canada has regional districts or comparable subpolities under a different name (an alternative would be to place them under [[:Category:Counties of Canada]], since they are sort of comparable to counties). # Placetype categories for first-level divisions of a country similarly (e.g. [[:Category:States of the United States]]) have a toponym parent (in this case [[:Category:United States]]), but in place of the skip-polity parent they have two other parents: a "bare placetype" parent (in this case [[:Category:States]]) and the "generic" parent [[:Category:Political divisions of specific countries]]. (There is also a bare [[:Category:Political divisions]] that groups "bare placetype" categories.) Skip-polity placetype categories for second-level divisions of a country (e.g. [[:Category:Counties of the United States]]) work the same. Placetype categories for countries work likewise except they are missing the generic parent. ===Place descriptions=== A given place description is defined internally in a table of the following form: ```{ placetypes = {"``placetype``", "``placetype``", ...}, holonyms = { { -- holonym object; see below placetype = "``placetype``" or nil, display_placename = "``placename``", unlinked_placename = "``placename``", langcode = "``langcode``" or nil, no_display = BOOLEAN, needs_article = BOOLEAN, force_the = BOOLEAN, affix_type = "``affix_type``" or nil, pluralize_affix = BOOLEAN, suppress_affix = BOOLEAN, continue_cat_loop = BOOLEAN, }, ... }, order = { ``order_item``, ``order_item``, ... }, -- (only for new-style place descriptions), joiner = "``joiner_string``" or nil, holonyms_by_placetype = { ``holonym_placetype`` = {"``placename``", "``placename``", ...}, ``holonym_placetype`` = {"``placename``", "``placename``", ...}, ... }, }``` Holonym objects have the following fields: * `placetype`: The canonicalized placetype if specified as e.g. `c/Australia`; nil if no slash is present (in which case the placename in `display_placename` refers to raw text). * `display_placename`: The placename or raw text, in the format to be displayed. Placename display aliases have already been resolved. It is raw text if `placetype` is nil. * `unlinked_placename`: Same as `display_placename` but with links and HTML removed. * `langcode`: The language code prefix if specified as e.g. `c/fr:Australie`; otherwise nil. * `no_display`: If true (holonym prefixed with !), don't display the holonym but use it for categorization. * `needs_article`: If true, prepend an article if the placename needs one (e.g. `United States`). * `force_the`: If true, always prepend the article `the`. Example use: holoynm 'city:pref:the/Gold Coast', which gets formatted as `(the) city of the [[Gold Coast]]`. * `affix_type`: Type of affix to prepend (values `pref` or `Pref`) or append (values `suf` or `Suf`). The actual affix added is the placetype (capitalized if values `Pref` or `Suf` are given), or its plural if `pluralize_affix` is given. Note that some placetypes (e.g. `district` and `department`) have inherent affixes displayed after (or sometimes before) them. * `pluralize_affix`: Pluralize any displayed affix. Used for holonyms like `c:pref/Canada,US`, which displays as `the countries of Canada and the United States`. * `suppress_affix`: Don't display any affix even if the placetype has an inherent affix. Used for the non-last placenames when there are multiple and a suffix is present, and for the non-first placenames when there are multiple and a prefix is present. * `continue_cat_loop`: If true (holonym used :also), continue producing categories starting with this holonym when preceding holonyms generated categories. Note that new-style place descs (those specified as a single argument using <<...>> to denote placetypes, placetype qualifiers and holonyms) have an additional `order` field to properly capture the raw text surrounding the items denoted in double angle brackets. The ``order_item`` items in the `order` field are objects of the following form: ```{ type = "``order_type``", value = "STRING" or INDEX, }``` Here, the ``order_type`` is one of `"raw"`, `"qualifier"`, `"placetype"` or `"holonym"`: * `"raw"` is used for raw text surrounding `<<...>>` specs. * `"qualifier"` is used for `<<...>>` specs without slashes in them that consist only of qualifiers (e.g. the spec `<<former>>` in `<<former>> French <<colony>>`). * `"placetype"` is used for `<<...>>` `specs without slashes that do not consist only of qualifiers. * `"holonym"` is used for holonyms, i.e. `<<...>>` specs with a slash in them. For all types but `"holonym"`, the value is a string, specifying the text in question. For `"holonym"`, the value is a numeric index into the `holonyms` field. It should be noted that placetypes and placenames occurring inside the holonyms structure are canonicalized, but placetypes inside the placetypes structure are as specified by the user. Stripping off of qualifiers and canonicalization of qualifiers and bare placetypes happens later. The information under `holonyms_by_placetype` is redundant to the information in holonyms but makes categorization easier. The holonym placenames listed here already have category aliases applied. For example, the call {{tl|place|en|city|s/Pennsylvania|c/US}} will result in the return value ```{ placetypes = {"city"}, holonyms = { { placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania" }, { placetype = "country", display_placename = "United States", unlinked_placename = "United States" }, }, holonyms_by_placetype = { state = {"Pennsylvania"}, country = {"United States"}, }, }``` Here, the placetype aliases `s` and `c` have been expanded into `state` and `country` respectively, and the placename display alias `US` has been expanded into `United States`. PLACETYPES is a list because there may be more than one. For example, the call {{tl|place|en|city/and/municipality|p/[[Kwango]] Province|c/Congo}} will result in the return value ``` { placetypes = {"city", "and", "municipality"}, holonyms = { { placetype = "province", display_placename = "[[Kwango]] Province", unlinked_placename = "Kwango Province" }, { placetype = "country", display_placename = "Congo", unlinked_placename = "Congo" }, }, holonyms_by_placetype = { country = {"Congo"}, }, }``` Here, the `unlinked_placename` field has removed links from `display_placename`. The value in the key/value pairs is likewise a list; e.g. the call {{tl|place|en|city|s/Kansas|and|s/Missouri}} will return ``` { placetypes = {"city"}, holonyms = { { placetype = "state", display_placename = "Kansas", unlinked_placename = "Kansas" }, { display_placename = "and", unlinked_placename = "and" }, { placetype = "state", display_placename = "Missouri", unlinked_placename = "Missouri" }, }, holonyms_by_placetype = { state = {"Kansas", "Missouri"}, }, } ``` Note that in `get_cats()` (which runs after the display form has been generated), further changes to the holonym structure are made to aid in categorization. For example, after `handle_category_implications()` and `augment_holonyms_with_container()` are called, the above structure will look more like ``` { placetypes = {"city"}, holonyms = { { placetype = "state", display_placename = "Kansas", unlinked_placename = "Kansas" }, { placetype = "country", unlinked_placename = "United States" }, { display_placename = "and", unlinked_placename = "and" }, { placetype = "state", display_placename = "Missouri", unlinked_placename = "Missouri" }, { placetype = "country", unlinked_placename = "United States" }, }, holonyms_by_placetype = { state = {"Kansas", "Missouri"}, country = {"United States"} }, } ``` ===Overall place specs=== The overall place spec parsed by `parse_overall_place_spec` has the following fields: * `lang`: The language object (from {{para|1}}). * `args`: The parsed arguments from the {{tl|place}} call. * `directives`: List of form-of directives (starting with `@`) parsed from the numeric args beginning with {{para|2}}. Each directive contains fields `directive` (the directive as specified by the user, e.g. `"former name of"`); `terms` (list of term objects for the terms specified by the user); `conj` (conjunction specified by the user using inline modifier `<conj:...>`, or {nil}); `spec` (the corresponding directive spec from `all_form_of_directives`); `pretext` (the text to display directly before the directive); `posttext` (the text to display directly after the directive; {nil} except for the last directive). * `descs`: List of one or more place description objects parsed from the numeric args beginning with {{para|2}}, as described above. * `extra_info`: List of extra-info objects for extra info specified using arguments such as {{para|capital}}, {{para|modern}}, etc. Objects are in the order they should be displayed, and each object contains fields `spec` (the spec for the type of extra info, taken from `export.extra_info_args`), `terms` (list of term objects for the terms specified by the user); and `conj` (conjunction specified by the user using inline modifier `<conj:...>`, or {nil}). ===Category determination=== The algorithm to find the categories to which a given place belongs works off of a place description (which specifies the entry placetype(s) and holonym(s); see above). If there are multiple place descriptions, each is processed independently to generate categories. Likewise, if there are multiple entry placetypes in a given place description, each is processed independently with all the holonyms of the description to generate categories. Furthermore, before the category-generation algorithm runs, earlier steps have modified the holonyms of the place description (inserting containing polities whenever possible; see the description above of `handle_category_implications()` and `augment_holonyms_with_container()`). Given a single entry placetype and a place description, the algorithm to generate categories processes holonyms from left to right until it finds one that "matches" in that it produces one or more categories. At that point it attempts to generate categories for all other holonyms in the place description of the same placetype. Normally, it then stops processing holonyms, but if a holonym is marked using the `:also` modifier, the category generation process starts over starting with that holonym (or the leftmost such remaining holonym, if there is more than one marked with `:also`). This makes it possible, for example, to specify the description of a river that passes through two different types of political divisions (e.g. Alberta and the Northwest Territories), or categorize a geographic region at both the continent and country level, such as this: <pre> {{place|en|historical region|r/Eastern Europe|located in southeastern|c:also/Poland|*and western|c/Ukraine}} </pre> Here, `r/Eastern Europe` has a category implication that adds `cont/Europe` as a holonym directly after it, which causes the page to be categorized into [[:Category:en:Geographic and cultural areas of Europe]]. The category generation process would normally stop at this point, but the presence of `:also` causes it to restart with `c/Poland` and generate the category [[:Category:en:Geographic and cultural areas of Poland]]. After doing this, it looks for other holonyms of the same placetype as `c/Poland` (i.e. other countries), which causes it to process `c/Ukraine` and generate the category [[:Category:en:Geographic and cultural areas of Ukraine]]. The category generation process works off of the `placetype_data` table, which specifies various properties for placetypes, such as how to display a holonym of that placetype as well as how to categorize certain pages where the {{tl|place}} call contains the specified placetype as an entry placetype. For example, the entry for `city-state` in [[Module:place/placetypes]] might look like ``` ["city-state"] = { link = true, category_link = "[[sovereign]] [[microstate]]s consisting of a single [[city]] and [[w:dependent territory|dependent territories]]", has_neighborhoods = true, class = "settlement", ["continent/*"] = {"City-states", "Cities", "Countries", "Countries in +++", "National capitals"}, default = {"City-states", "Cities", "Countries", "National capitals"}, }, ``` Here, the keys specify, respectively: # If `city-state` occurs as an entry placetype, link it to the corresponding Wiktionary entry (that is what `true` means in `link = true`). # Use the specified `category_link` text for categories such as [[:Category:City-states]]. # City-states are "city-like", i.e. they have neighborhoods; this controls the handling of entry placetypes such as `neighborhood`, `district`, `area`, etc. # City-states should be treated as settlements for determining how to handle the placetype `former city-state` and for categorizing the bare category [[:Category:City-states]] and language-specific equivalents such as [[:Category:en:City-states]]. # When the entry placetype `city-state` occurs along with a continent holonym, categorize into the specified categories under `continent/*`. Here, `+++` stands for the holonym in question. # When the entry placetype `city-state` occurs in any other context, categorize into the specified categories under `default`. It's important to realize that the only categorization keys under a given placetype entry that are specified explicitly in [[Module:place/placetypes]] are certain wildcard keys such as `continent/*` above (i.e. containing a slash followed by `*`) and under the key `default`. All the remaining categorization happens through category handlers, based on the information on known locations in [[Module:place/locations]]. For example, [[Module:place/locations]] has an "England group" specified similarly to the following: ``` export.england_group = { default_container = {key = "England", placetype = "constituent country"}, default_placetype = "county", default_divs = { "districts", {type = "local government districts", cat_as = "districts"}, { type = "local government districts with borough status", cat_as = {"districts", "boroughs"}, }, {type = "boroughs", cat_as = {"districts", "boroughs"}}, "civil parishes", }, default_british_spelling = true, data = export.england_counties, } ``` The `default_divs` key here specifies the divisions that exist for each of the counties listed under the `data` key (unless the key overrides them). Here, the entry `{type = "boroughs", cat_as = {"districts", "boroughs"}}` directs the category handler `political_division_cat_handler` in [[Module:place/placetypes]] (which is one of two category handlers that run for all entry placetypes, along with `generic_place_cat_handler`) to categorize boroughs specified under any of the counties listed under `data` as both districts and boroughs. Now, the categorization process proceeds as follows, given an entry placetype and place description, which specifies a set of holonyms (the code to do this is in `get_placetype_cats()`): # First, look up the entry placetype and any equivalent placetypes in `placetype_data`, which is defined in [[Module:place/placetypes]]. Note that the entry in `placetype_data` that specifies the placetype information that is used to determine the category or categories may not directly correspond to the entry placetype as specified in the place description. For example, if the entry placetype is `small town`, the placetype whose data is fetched will be `town` since `small` is a recognized qualifier and there is no entry in `placetype_data` for `small town`. As another example, if the entry placetype is `administrative capital`, the code will first look up `administrative capital` and then look up `capital city`, which is where the category handler is found, because `administrative capital` specifies `capital city` as its fallback. # Then, iterate over holonyms from left to right, as described above. For each holonym, we proceed as follows: ## First, call `political_division_cat_handler` to check if the entry placetype and holonym match a division in the `locations` data in [[Module:place/locations]], as in the example above. Note that when doing this, holonyms are canonicalized so that e.g. `co/Bedfordshire` gets mapped to `county/Bedfordshire` (because there is an entry in `placetype_aliases` in [[Module:place/placetypes]] that maps `co` to `county`) and `c/USA` gets mapped to `country/United States` (because there is an entry in the location data for the list of countries that maps `country/USA` to `country/United States` for both display and categorization purposes). This category handler, as with all such handlers, is passed the entry placetype and holonym being processed, but is also passed the entire place description, so it can look at other specified holonyms (particularly those that follow). It either returns {nil} or a list of category specs (which are the actual categories minus the preceding language code). ## If `political_division_cat_handler` doesn't generate any categories, check if there is a category handler defined using the `cat_handler` key for the entry placetype. If so, call it to generate the categories (if any). ## If the category handler returns {nil}, or there is no category handler, look for a ''wildcard key'' of the format e.g. `country/*`, which matches any holonym of placetype `country`. If found, the value is a list of category specs, which are processed as above. ## If we get this far without generating any categories, move to the next holonym. ## If we do generate any categories, process all other holonyms of the same placetype. For example, if the user says {{tl|place|en|city|s/Kansas|and|s/Missouri}}, when we get to the holonym `s/Kansas`, we generate the category [[:Category:en:Cities in Kansas, USA]]. This causes us to look for other holonyms of the same placetype `state`, and process them accordingly, generating a category [[:Category:en:Cities in Missouri, USA]] as well. The same thing happens in an invocation like {{tl|place|pl|river|c/Poland,Ukraine,Belarus}}. # Once we generate categories for a holonym and any other holonyms of the same placetype, we normally stop processing holonyms. But if a holonym has the `:also` modifier, we restart the left-to-right loop at that holonym. For example, in the invocation {{tl|place|en|river|flowing through|p/Alberta|p/British Columbia|and the|terr/Northwest Territories}}, we will generate a category [[:Category:en:Rivers in Alberta, Canada]] as well as [[:Category:en:Rivers in British Columbia, Canada]] (because British Columbia is of the same placetype as Alberta); but no category will be generated for the Northwest Territories, which is of a different placetype. To fix this, write {{tl|place|en|river|flowing through|p/Alberta|p/British Columbia|and the|terr:also/Northwest Territories}}. The use of `:also` will cause holonym processing to resume at `Northwest Territories` after `Alberta` is processed, leading to an additional category [[:Category:en:Rivers in the Northwest Territories, Canada]]. (The presence of `the` in this last category is because `Northwest Territories` is a known location with a spec indicating that it should be preceded by `the`; it has nothing to do with the raw text `and the` in the invocation.) # Finally, if we process all holonyms and don't end up producing any categories, we check the entry placetype's data for a `default` key. If found, it lists category specs, which are processed to generate categories. This is used, for example, in the placetype `city-state`, as described above. # It should be noted that the above process runs independently for each combination of entry placetype and place description. Thus, for example, an invocation {{tl|place|en|city/and/county|s/Kansas,Missouri|c/USA}} will generate categories for both cities and counties in both Kansas and Missouri. # Two additional sources of categories are ''bare location'' categories and ''generic place'' categories. These categories are added by appropriate calls in the outer function `get_cats`, which iterates over placetypes and place descriptions, calling `get_placetype_cats` on each combination. ## Bare location categories are categories like [[:Category:Arizona, USA]] that are related-to categories containing terms related to the specified location. The bare location code, for example, adds the term [[Arizona]], and its equivalents in other languages, to [[:Category:Arizona, USA]]. When looking for terms to consider, it checks the pagename, the glosses specified using {{para|t}}, and the terms specified using {{para|modern}}, {{para|short}} and {{para|full}}. It looks to see if any of these parameters match any known locations, but only adds them to a bare location category if (a) the specified entry placetype matches, so that for example Russian `[[Джорджия]]` goes into [[:Category:Georgia, USA]] while `[[Грузия]]` goes into [[:Category:Georgia]] (the country), even though both have a gloss `Georgia`; and (b) there are no conflicting holonyms, so that for example the Old English term [[Munucceaster]] if defined similarly to {{tl|place|ang|city|in modern|cc/England|t=Newcastle}} won't get added to [[:Category:Newcastle, New South Wales]] (even though it is also a city) because the latter city is known to be in Australia, which conflicts with the country `United Kingdom` (added internally to the Old English place description through the holonym augmentation process, based on the holonym `cc/England`). ## Generic place categories are categories like [[:Category:Places in Kansas, USA]] and [[:Category:Places in England]] that contain places of arbitrary placetype. These are added through a special category handler that operates like other category handlers but is run for all placetypes, rather than only for the specified one(s). ]==] --[=[ TODO/FIXME: 1. [DONE] Neighborhoods should categorize at the city level. Categories like [[:Category:Places in Los Angeles]] exist but not [[:Category:Neighborhoods in Los Angeles]]; we can refactor the code in generic_cat_handler() to support this use case. 2. Display handlers should be smarter. For example, 'co/Travis' as a holonym should display as 'Travis County' in the United States, but (I think) display handlers don't currently have the full context of holonyms passed in to allow this to happen. 3. Connected to this, we have various display handlers that add the name of the holonym after or (sometimes) before the placename if it's not already there. An example is the county_display_handler() in [[Module:place/placetypes]], which adds "County" before Ireland and Northern Ireland counties and after Taiwan and Romania counties. This should be integrated into the polity group for these respective polities through a setting rather than requiring a separate handler that has special casing for various polities. 4. Placetypes for toponyms should also have display handlers rather than just fixed text. This should allow us to dispense with the need for special types for "fpref" = "French prefecture" (which displays as "prefecture" but links to the appropriate Wikipedia article on Frenc prefectures, which are completely different from the more general concept of prefecture). Similarly for "Polish colony" and "Welsh community". ("Israeli settlement" should probably stay as-is because it displays as "Israeli settlement" not just "settlement".) 5. [DONE] Currently, categories for e.g. states and territories of Australia go into [[:Category:States and territories of Australia]] but terms for states and territories of Australia go into (respectively) [[:Category:States of Australia]] and [[:Category:Territories of Australia]]. We should fix this; maybe this is as easy as setting cat_as in the respective divs definitions. 6. Probably cat_as should support raw categories as well as category types; raw categories would be indicated by being prefixed with "Category:". 7. [MOSTLY DONE] Update documentation. 8. [DONE] Rename remaining political division categories to include name of country in them. 9. [DONE] Add Pakistan provinces and territories. 10. [DONE] Add a polity group for continents and continent-level regions instead of special-casing. This should make it possible e.g. to have Jerusalem as a city under "Asia". 11. [DONE] Add better handling of cities that are their own states, like Mexico City. 12. [DONE] Breadcrumb for e.g. [[Category:Aguascalientes, Mexico]] is "Aguascalientes, Mexico" instead of just "Aguascalientes". 13. [DONE] Unify aliasing system; cities have a completely different mechanism (alias_of) vs. polities/subpolities (which use`placename_cat_aliases` and `placename_display_aliases` in [[Module:place/placetypes]]). 14. [DONE] More generally, cities should be unified into the polity grouping system to the extent possible; this would allow for divs of cities (see #17 below). 15. [DONE] We have `no_containing_polity_cat` set for Lebanon, Malta and Saudi Arabia to prevent country-level implications from being added due to generically-named divisions like "North Governorate", "Central Region" and "Eastern Province" but (a) this setting seems to do multiple things and should be split, (b) it should be possible to set this at the division level instead of the country level. 16. Split out the data from the handlers so we can use loadData() on the data because it's becoming very big. 17. [DONE] Cities like Tokyo have special wards; "prefecture-level cities" like Wuhan (which aren't really cities but we treat them as such) have districts, subdistricts, etc. We need to support divs for cities and even named divisions of cities (such as we already have for boroughs of New York City). 18. [DONE] It should be allowed to set 'true' to any qualifier (which links it) and have it work correctly; qualifier lookup in [[Module:place]] needs to remove links first. 19. [DONE] Categories 'Historical polities' and 'Historical political subdivisions' should be renamed 'Former ...' since "historic(al)" is ambiguous (cf. "historic counties" in England which are not former, but still have a legal definition). 20. [PARTLY DONE; SUPPORT IS THERE BUT FORMER PROVINCES NOT YET CATEGORIZED] It should be possible to categorize former subpolities of certain polities; cf. [[:Category:ja:Provinces of Japan]], which contains former provinces. 21. [DONE] In subpolity_keydesc(), we need to generate the correct indefinite article and have a huge hack to check specifically for "union territory", which is the only placetype that shows up in this function where the default indefinite article generating function fails. To fix this properly, we need to separate out the non-category placetype data from `cat_data` in [[Module:place/placetypes]] and move it to [[Module:place/locations]], because we don't have access to the data in [[Module:place/placetypes]], and that data indicates the correct article for placetypes like "union territory". 22. [DONE] Simplify the specs in `cat_data`, eliminating the distinction between "inner" and "outer" matching. There should not be two levels, just one. For example, in "district", instead of ["country/Portugal"] = { ["itself"] = {"Districts and autonomous regions of +++"}, } we should just have ["country/Portugal"] = {"Districts and autonomous regions of +++"}, And in "dependent territory", instead of ["default"] = { ["itself"] = {true}, ["country"] = {true}, }, we should just have ["itself"] = {true}, ["country/*"] = {true}, It appears the only remaining spec that can't be easily converted in this fashion is for "subdistrict": ["country/Indonesia"] = { ["municipality"] = {true}, }, This seems to be specifically for Jakarta and doesn't seem to work anyway, as the two entries in [[:Category:en:Subdistricts of Jakarta]] and the one entry in [[:Category:id:Subdistricts of Jakarta]] are manually categorized. 23. [DONE] Consolidate the remaining stuff in [[Module:category tree/topic cat/data/Earth]] into [[Module:category tree/topic cat/data/Places]]. 24. [DONE] The `generic_cat_handler` that categorizes into `Places in FOO` is smart enough not to categorize cities that are in different polities from the specified containing polity/polities of the city, but doesn't do the same for larger-level divisions. Likewise for the `city_type_cat_handler`. There are some sufficiently generically-named divisions that this issue can occur; for example, [[Koforidua]], the capital city of Eastern Region, Ghana, is incorrectly categorized under [[:Category:en:Cities in Eastern Region, Malta]] and [[:Category:en:Places in Eastern Region, Malta]]. Note that the function `augment_holonyms_with_container` ''DOES'' do such checks, so we should be able to refactor the code out of that function and use it elsewhere. 25. [DONE] The `generic_cat_handler` that categorizes into `Places in FOO` is smart enough not to categorize cities that are in different polities from the specified containing polity/polities of the city; but how smart is it? It will successfully avoid categorizing a neighborhood in e.g. [[Columbus]], [[Georgia]] that doesn't explicitly mention the US (only `s/Georgia`) into [[:Category:en:Places in Columbus]], which is for Columbus, Ohio, but will it do the same for a hypothetical neighborhood of Columbus in say Merseyside, England? This should be investigated. It will probably work for a hypothetical Columbus in [[Canada]] because `augment_holonyms_with_container` would auto-add Canada as an additional holonym once say `p/Ontario` is mentioned, but I think there's a setting preventing this augmentation from happening for the UK. (This relates to FIXME #15. `no_containing_polity_cat` is set on England, Scotland, etc. to prevent the toponyms from being added to [[:Category:en:Places in the United Kingdom]], but this same setting is used to prevent augmentation, which it should not be; there should be different settings.) 26. [DONE] The `generic_cat_handler` (or more specifically `find_holonym_keys_for_categorization`) checks for city holonyms by looking specifically for holonym type `city`. But some cities (particularly those in China) can be specified using different holonym types, e.g. `prefecture-level city`, `subprovincial city`, etc. We should allow these when appropriate (which means the cities in China need to have a `placetype` set that indicates their regional-level status as well as just `city`). I'm not sure if cities support specifying a custom `placetype` at the moment; this relates to FIXME #14 above concerning unifying cities and political divisions internally. 27. [DONE] The bare category handler (`get_bare_categories` in [[Module:place/placetypes]]) is not smart enough to avoid overcategorizing cities or other divisions that are of the right placetype but in the wrong containing polity. For example, Asturian [[Llión]] "León (city in Spain)" gets put in [[:Category:ast:León]] even though the latter is supposed to refer to a city in Mexico. We can borrow the check-containing-polity code from `generic_cat_handler`. 28. [DONE] Redo handling of singular and plural to respect overrides specified in placetype_data. Check more carefully for things that may not singularize correctly, e.g. 'passes' -> 'passe'? Definitely 'headquarters' and variants. 29. [DONE] Combine placetype_equivs and other placetype data into `placetype_data`. Figure out if we need the distinction between `placetype_equivs` and `fallback`. 30. `has_neighborhoods` may need to be a function that can look at the containing holonyms to determine whether the entity in question is city-like. 31. [DONE] Bare placenames as they appear in holonyms (e.g. `Riau Islands`) instead of category keys (e.g. `the Riau Islands, Indonesia`) should appear in the polity data tables. As a first pass, the word "the" should not appear but should instead be a property of the polity. 32. [DONE] `capital_city_cat_handler` should use `get_holonyms_to_check()`. 33. [PARTLY DONE] The code to generate and parse the correct preposition ("in" or "of") is very convoluted, and the actual preposition used is specified in various locations with various defaults, sometimes hardcoded. This should be simplified. It is made more difficult by the fact that the in/of distinction occurs in several places: (a) when generating the {{place}} text in old-style descriptions where the preposition isn't explicitly given, which uses the `preposition` setting in placetype_data, defaulting to "in"; (b) when generating categories based on explicit category specs in placetype_data (which are gradually being deprecated), which likewise uses the `preposition` setting in placetype_data, defaulting to "in"; (c) when generating categories based on political_division_cat_handler, originating in the `divs` placetypes for specific known locations in [[Module:place/locations]], which uses the `prep` setting embedded in the `divs` specifications, defaulting to "of"; (d) when generating categories based on category handlers specified using the `cat_handler` property of entries in placetype_data, which tend to hardcode "in" or "of" depending on the specific category handler; (e) when generating category descriptions in [[Module:category tree/topic/Places]] for `divs` categories generated in (c), which (correctly) uses the same `prep` setting embedded in the `divs` settings that is used when generating the categories themselves; (f) when generating category descriptions for categories generated in (b) and (d) above, which relies on the `generic_before_non_cities` and `generic_before_cities` settings in placetype_data, which need to match the corresponding prepositions hardcoded in the category generation handlers. Instead of the hardcoding, the category generation handler should respect the `generic_before_*` settings. 34. [[Krakow]] defined as {{place|en|A <<city>> on the [[Vistula]] River, the <<capital>> of the <<voi/Lesser Poland Voivodeship>> in southern <<c/Poland>>}} categorizes under [[:Category:Voivodeship capitals]] when it should probably instead be under [[:Category:Voivodeship capitals of Poland]]. Possibly this is because the various voivodeships haven't yet been entered as known locations, but this should happen regardless of that. 35. {{tcl}} bugs: a. [DONE] Lowercase initial letter in new-style {{place}} descriptions in {{tcl}}. Maybe we can have a setting tcl_nolc=1 to prevent this from happening. b. [DONE] tcl= and probably new-style {{place}} descriptions in general should recognize ;; to separate distinct {{place}} descriptions, and similarly ;;and as the equivalent of regular `;and`, etc. c. [DONE] The value supplied in `modern=` should be displayed in {{tcl}} descriptions regardless of the setting that normally disables this, so that e.g. the foreign-language equivalent of [[British Honduras]] doesn't just say it's a former British colony in Central America but specifically identifies it as modern Belize. If the user gives, place_modern= in {{tcl}}, that should override the modern= value and still display. d. [DONE] The page supplied to {{tcl}} should be used for generating bare categories even if t= is supplied and overrides the English term displayed. [DONE] e. [DONE] If text follows {{place}} and begins with a semicolon, the semicolon isn't copied into {{tcl}}. 36. County boroughs used as holonyms currently display 'borough county borough' because there's an affix setting for 'county borough' and a fallback display handler for 'borough'. We need to rethink this; maybe merge the affix setting and display handlers. 37. Implement known-location groups and specs in a more standardly object-oriented way using metatables. 38. Implement caching of known location lookup in the holonym. This may have to be keyed by placetype, but we can have a special field for when the lookup placetype is the same as the user-specified placetype of the holonym. Use this known location in place of looking up known locations and store the appropriate known location there in `augment_holonyms_with_container()` instead of calling `key_to_placename`. 39. Bug fixes with 'the': (a) [DONE] [[Kazaň]] defined as {{place|cs|caplc|rep:Pref/Tatarstan|c/Russia|t1=Kazan}} displays as "Republic of the Tatarstan". (b) [[Valday]] defined as {{place|en|town/administrative center|dist:Suf/Valdaysky|obl/Novgorod|c/Russia}} displays as "a town, the administrative center of the Valdaysky District". Changing to `dist:suf/Valdaysky` displays as "... of Valdaysky district". 40. [DONE] Bug fix with 'the': [[Verkhoyansk]] defined as {{place|en|town|rep/Sakha|c/Russia}} displays as "a town in the Sakha". 41. [DONE] [[Category:Cities in Asia]] has [[Category:Cities in Eurasia]] as a parent, which in turn has [[Category:Cities in the Earth]] as a parent. Continents should not have the second parent like this. 42. [DONE] When checking `british_spelling`, it should check all containers as well; otherwise it's too hard to keep this in sync across cities, administrative divisions and countries. 43. [DONE] `skip_polity_parent_type` should be renamed to container_parent_type or similar. 44. There should be a flag to allow e.g. departments of France that are currently categorized as departments of their region to also be categorized as departments of France. 45. [DONE] Aliases are causing iterate_matching_holonym_location() to fail, e.g. if [[براق]] "Prague" is specified as {{place|acw|capital city|c/Czechia|t1=Prague}}, this fails add a bare category [[Category:acw:Prague]] because the code in iterate_matching_holonym_location() isn't resolving aliases when comparing the known container 'Czech Republic'. Probably we want to build an alias table to speed up these sorts of lookups. 46. [DONE; DUE TO TYPO IN HANDLER] The district cat handler is failing to work right, e.g. in [[Saint-Gaudérique]] defined as {{place|fr|district|city/Perpignan|in|dept/Pyrénées-Orientales|r/Occitania|c/France|t=Saint-Gaudérique}}, only the 'Places in ...' categories are getting triggered. 47. Suburbs of a given city aren't generally in the city and may not even be in the same country or country division, so they should not categorize as "Places in ..." based on the city and specified country and division. Same goes for "enclave" (within somewhere) and "exclave". 48. When converting display aliases, we should automatically convert full placenames to full placenames and elliptical placenames to elliptical placenames instead of always either doing elliptical or full placenames depending on the value of `display_as_full`. 49. `@obsolete form of` and `@archaic form of` should automatically trigger nocat=1. 50. The handler that adds bare categories should pick up values in <eq:...>. ]=] --[==[ var: List specifying the allowed form-of directives, used for former names, official names, abbreviations, etc. of places. The key is the form-of directive and the value is an object with the following properties: * `text`: The actual text displayed before the terms. If the value is `+`, the key is used as the text. If the value is a function, it is passed a single argument, the overall place spec (see comment at top of file) and should return the text to be displayed. * `type_prefix`: The prefix used to generate the placetype for looking up the appropriate category or categories in the placetype data structure. Can be omitted if there are no categories associated with the directive. * `conjunction`: The conjunction used to join multiple terms, defaulting to `and`. * `cat`: Additional category or categories to add the term to, whenever this particular directive is used. Normally the value is a topic-style category minus the langcode prefix, but if prefixed with `cln:`, it is a langname-style category. For example, the value `"Abbreviations"` would correspond to a category [[:Category:en:Abbreviations]] (assuming the language of the {{tl|place}} call is English), while the value `"cln:abbreviations"` corresponds to a category [[:Category:English abbreviations]]. Use a list of such specs for multiple categories. * `default_foreign`: If specified, the default language of terms given along with this directive is the language in {{para|1}}; otherwise it is English. ]==] export.all_form_of_directives = { ["former name of"] = {text = "+", type_prefix = "FORMER_NAME_OF"}, ["fmr of"] = {alias_of = "former name of"}, ["ancient name of"] = {text = "+", type_prefix = "FORMER_NAME_OF"}, ["official name of"] = {text = "+", type_prefix = "OFFICIAL_NAME_OF"}, ["former official name of"] = {text = "+", type_prefix = "FORMER_OFFICIAL_NAME_OF"}, ["long form of"] = {text = "+", type_prefix = "LONG_FORM_OF"}, ["former long form of"] = {text = "+", type_prefix = "FORMER_LONG_FORM_OF"}, ["nickname for"] = {text = "+", type_prefix = "NICKNAME_FOR"}, ["official nickname for"] = {text = "+", type_prefix = "OFFICIAL_NICKNAME_FOR"}, ["former nickname for"] = {text = "+", type_prefix = "FORMER_NICKNAME_FOR"}, ["derogatory name for"] = {text = "[[Appendix:Glossary#derogatory|derogatory]] name for", type_prefix = "DEROGATORY_NAME_FOR"}, ["synonym of"] = {text = "+"}, ["syn of"] = {alias_of = "synonym of"}, ["abbreviation of"] = {text = "[[Appendix:Glossary#abbreviation|abbreviation]] of", type_prefix = "ABBREVIATION_OF", cat = "cln:abbreviations", default_foreign = true}, ["abbr of"] = {alias_of = "abbreviation of"}, ["abbrev of"] = {alias_of = "abbreviation of"}, ["initialism of"] = {text = "[[Appendix:Glossary#initialism|initialism]] of", type_prefix = "ABBREVIATION_OF", cat = "cln:initialisms", default_foreign = true}, ["init of"] = {alias_of = "initialism of"}, ["acronym of"] = {text = "[[Appendix:Glossary#acronym|acronym]] of", type_prefix = "ABBREVIATION_OF", cat = "cln:acronyms", default_foreign = true}, ["syllabic abbreviation of"] = {text = "[[Appendix:Glossary#syllabic abbreviation|syllabic abbreviation]] of", type_prefix = "ABBREVIATION_OF", cat = "cln:syllabic abbreviations", default_foreign = true}, ["sylabbr of"] = {alias_of = "syllabic abbreviation of"}, ["sylabbrev of"] = {alias_of = "syllabic abbreviation of"}, ["ellipsis of"] = {text = "[[Appendix:Glossary#ellipsis|ellipsis]] of", type_prefix = "ELLIPSIS_OF", cat = "cln:ellipses", default_foreign = true}, ["ellip of"] = {alias_of = "ellipsis of"}, ["clipping of"] = {text = "[[Appendix:Glossary#clipping|clipping]] of", type_prefix = "CLIPPING_OF", cat = "cln:clippings", default_foreign = true}, ["clip of"] = {alias_of = "clipping of"}, ["alternative form of"] = {text = "+", default_foreign = true}, ["alt form"] = {alias_of = "alternative form of"}, ["alternative spelling of"] = {text = "+", default_foreign = true}, ["alt spell"] = {alias_of = "alternative spelling of"}, ["alt sp"] = {alias_of = "alternative spelling of"}, ["dated form of"] = {text = "[[Appendix:Glossary#dated|dated]] form of", type_prefix = "DATED_FORM_OF", cat = "cln:dated forms", default_foreign = true}, ["dated form"] = {alias_of = "dated form of"}, ["dated spelling of"] = {text = "[[Appendix:Glossary#dated|dated]] spelling of", type_prefix = "DATED_FORM_OF", cat = "cln:dated forms", default_foreign = true}, ["dated spell"] = {alias_of = "dated spelling of"}, ["dated sp"] = {alias_of = "dated spelling of"}, ["archaic form of"] = {text = "[[Appendix:Glossary#archaic|archaic]] form of", type_prefix = "ARCHAIC_FORM_OF", cat = "cln:archaic forms", default_foreign = true}, ["arch form"] = {alias_of = "archaic form of"}, ["archaic spelling of"] = {text = "[[Appendix:Glossary#archaic|archaic]] spelling of", type_prefix = "ARCHAIC_FORM_OF", cat = "cln:archaic forms", default_foreign = true}, ["arch spell"] = {alias_of = "archaic spelling of"}, ["arch sp"] = {alias_of = "archaic spelling of"}, ["obsolete form of"] = {text = "[[Appendix:Glossary#obsolete|obsolete]] form of", type_prefix = "OBSOLETE_FORM_OF", cat = "cln:obsolete forms", default_foreign = true}, ["obs form"] = {alias_of = "obsolete form of"}, ["obsolete spelling of"] = {text = "[[Appendix:Glossary#obsolete|obsolete]] spelling of", type_prefix = "OBSOLETE_FORM_OF", cat = "cln:obsolete forms", default_foreign = true}, ["obs spell"] = {alias_of = "obsolete spelling of"}, ["obs sp"] = {alias_of = "obsolete spelling of"}, } local function get_seat_text(overall_place_spec) local placetype = overall_place_spec.descs[1].placetypes[1] if placetype == "county" or placetype == "counties" then return "county seat" elseif placetype == "parish" or placetype == "parishes" then return "parish seat" elseif placetype == "borough" or placetype == "boroughs" then return "borough seat" else return "seat" end end --[==[ var: List specifying the allowed arguments containing extra information that is sometimes added to a definition, such as the capital, largest city, modern name, official name, etc., along with associated properties; displayed in the order given. Each element is an object with the following properties: * `arg`: The argument name. * `text`: The actual text displayed before the terms. If the value is `+`, the argument name is used as the text. If the value is a function, it is passed a single argument, the overall place spec (see the comment at the top of the file) and should return the text to be displayed. * `conjunction`: The conjunction used to join multiple terms, defaulting to `and`. * `display_even_when_dropped`: Display this piece of extra info even when it would normally be dropped (e.g. in {{tl|tcl}} when the language is other than English). * `match_sentence_style`: If true, the text will be capitalized and preceded by a period when ''sentence style'' is in effect (essentially, when the language is English and there is no translation specified using {{para|t}} or similar parameter); otherwise, the text will be displayed as-is and preceded by a semicolon. If false, the semicolon style will always be used. * `auto_plural`: If true, pluralize the text when there is more than one term. * `with_colon`: If true, follow the text with a colon. (This colon cannot easily be included in the text itself because if pluralized, the pluralized text goes before the colon.) ]==] export.extra_info_args = { {arg = "modern", text = "+", conjunction = "or", display_even_when_dropped = true}, {arg = "now", text = "now,", conjunction = "or", display_even_when_dropped = true}, {arg = "full", text = "in full,", conjunction = "or", display_even_when_dropped = true}, {arg = "short", text = "short form", conjunction = "or"}, {arg = "abbr", text = "abbreviation", conjunction = "or"}, {arg = "former", text = "formerly,"}, {arg = "official", text = "official name", match_sentence_style = true, auto_plural = true, with_colon = true}, {arg = "capital", text = "+", match_sentence_style = true, auto_plural = true, with_colon = true}, {arg = "largest city", text = "+", match_sentence_style = true, auto_plural = true, with_colon = true}, {arg = "caplc", text = "capital and largest city", match_sentence_style = true, auto_plural = false, with_colon = true}, {arg = "seat", text = get_seat_text, match_sentence_style = true, auto_plural = true, with_colon = true}, {arg = "shire town", text = "+", match_sentence_style = true, auto_plural = true, with_colon = true}, {arg = "headquarters", text = "+", match_sentence_style = true, auto_plural = false, with_colon = true}, {arg = "center", text = "administrative center", match_sentence_style = true, auto_plural = false, with_colon = true}, {arg = "centre", text = "administrative centre", match_sentence_style = true, auto_plural = false, with_colon = true}, } export.extra_info_arg_map = {} for _, spec in ipairs(export.extra_info_args) do export.extra_info_arg_map[spec.arg] = spec end ----------- Wikicode utility functions -- Return a wikilink link {{l|language|text}} local function link(text, langcode, id) if not langcode then return text end return m_links.full_link( {term = text, lang = require(languages_module).getByCode(langcode, true, "allow etym"), id = id}, nil, "allow self link" ) end ---------- Basic utility functions -- Add the page to a tracking "category". To see the pages in the "category", -- go to [[Wiktionary:Tracking/place/PAGE]] and click on "What links here". local function track(page) require(debug_track_module)("place/" .. page) return true end local function ucfirst_all(text) if text:find(" ") then local parts = split(text, " ", true) for i, part in ipairs(parts) do parts[i] = m_strutils.ucfirst(part) end return concat(parts, " ") else return m_strutils.ucfirst(text) end end local function lc(text) return mw.getContentLanguage():lc(text) end ---------- Argument parsing functions and utilities -- Split an argument on comma, but not comma followed by whitespace. local function split_on_comma(val) if val:find(",") then return require(parse_interface_module).split_on_comma(val) else return {val} end end -- Split an argument on slash, but not slash occurring inside of HTML tags like </span> or <br />. local function split_on_slash(arg) if arg:find("<") then local m_parse_utilities = require(parse_utilities_module) -- We implement this by parsing balanced segment runs involving <...>, and splitting on slash in the remainder. -- The result is a list of lists, so we have to rejoin the inner lists by concatenating. local segments = m_parse_utilities.parse_balanced_segment_run(arg, "<", ">") local slash_separated_groups = m_parse_utilities.split_alternating_runs(segments, "/") for i, group in ipairs(slash_separated_groups) do slash_separated_groups[i] = concat(group) end return slash_separated_groups else return split(arg, "/", true) end end -- Implement "implications", i.e. where the presence of a given holonym causes additional holonym(s) to be added. -- Implications apply only to categorization. There used to be support for "general implications" that applied to both -- display and categorization, but there ended up not being any such implications, so we've removed the support. It is -- a bad idea in any case to have such implications; the user might purposely leave out a higher-level polity to avoid -- redundancy in several successive definitions, and we wouldn't want to override that. Note that in practice the -- mechanism implemented by this function is used specifically for non-administrative geographic regions such as -- Eastern Europe and the West Bank; there is a similar mechanism for administrative regions handled by -- `augment_holonyms_with_containing_polity` in [[Module:place/placetypes]]. -- -- `place_descriptions` is a list of place descriptions (see top of file, collectively describing the data passed to -- {{place}}). `implication_data` is the data used to implement the implications, i.e. a table indexed by holonym -- placetype, each value of which is a table indexed by holonym placename, each value of which is a list of -- "PLACETYPE/PLACENAME" holonyms to be added to the end of the list of holonyms. local function handle_category_implications(place_descriptions, implication_data) for i, desc in ipairs(place_descriptions) do if desc.holonyms then local new_holonyms = {} for _, holonym in ipairs(desc.holonyms) do insert(new_holonyms, holonym) local imp_data = m_placetypes.get_equiv_placetype_prop(holonym.placetype, function(pt) local implication = implication_data[pt] and implication_data[pt][holonym.unlinked_placename] if implication then return implication end end) if imp_data then for _, holonym_to_add in ipairs(imp_data) do local split_holonym = split_on_slash(holonym_to_add) if #split_holonym ~= 2 then internal_error("Invalid holonym in implications: %s", holonym_to_add) end local holonym_placetype, holonym_placename = unpack(split_holonym, 1, 2) local new_holonym = { -- By the time we run, the display has already been generated so we don't need to set -- display_placename. placetype = holonym_placetype, unlinked_placename = holonym_placename } insert(new_holonyms, new_holonym) m_placetypes.key_holonym_into_place_desc(desc, new_holonym) end end end desc.holonyms = new_holonyms end end end -- Split a holonym (e.g. "continent/Europe" or "country/en:Italy" or "in southern" or "r:suf/O'Higgins" or -- "c/Austria,Germany,Czech Republic") into its components. Return a list of holonym objects (see top of file). Note -- that if there isn't a slash in the holonym (e.g. "in southern"), the `placetype` field of the holonym will be nil. -- Placetype aliases (e.g. "r" for "region") and placename aliases (e.g. "US" or "USA" for "United States") will be -- expanded. local function split_holonym(raw) local no_display, combined_holonym = raw:match("^(!)(.*)$") no_display = not not no_display combined_holonym = combined_holonym or raw local suppress_comma, combined_holonym_without_comma = combined_holonym:match("^(%*)(.*)$") suppress_comma = not not suppress_comma combined_holonym = combined_holonym_without_comma or combined_holonym local holonym_parts = split_on_slash(combined_holonym) if #holonym_parts == 1 then -- `unlinked_placename` should not be used. return {{display_placename = combined_holonym, no_display = no_display, suppress_comma = suppress_comma}} end -- Rejoin further slashes in case of slash in holonym placename, e.g. Admaston/Bromley. local placetype = holonym_parts[1] local placename = concat(holonym_parts, "/", 2) -- Check for modifiers after the holonym placetype. local split_holonym_placetype = split(placetype, ":", true) placetype = split_holonym_placetype[1] local affix_type local saw_also local saw_the for i = 2, #split_holonym_placetype do local modifier = split_holonym_placetype[i] if modifier == "also" then if saw_also then error(("Modifier ':also' occurs twice in holonym '%s'"):format(combined_holonym)) end saw_also = true elseif modifier == "the" then if saw_the then error(("Modifier ':the' occurs twice in holonym '%s'"):format(combined_holonym)) end saw_the = true elseif modifier == "pref" or modifier == "Pref" or modifier == "suf" or modifier == "Suf" or modifier == "noaff" then if affix_type then error(("Affix-type modifier ':%s' occurs twice in holonym '%s'"):format(modifier, combined_holonym)) end affix_type = modifier else error(("Unrecognized holonym placetype modifier '%s', should be one of " .. "'pref', 'Pref', 'suf', 'Suf', 'noaff', 'also' or 'the'"):format(modifier)) end end placetype = m_placetypes.resolve_placetype_aliases(placetype) local holonyms = split_on_comma(placename) local pluralize_affix = #holonyms > 1 local affix_holonym_index = (affix_type == "pref" or affix_type == "Pref") and 1 or affix_type == "noaff" and 0 or #holonyms for i, placename in ipairs(holonyms) do -- Check for langcode before the holonym placename, but don't get tripped up by Wikipedia links, which begin -- "[[w:...]]" or "[[wikipedia:]]". local langcode, placename_without_langcode = rmatch(placename, "^([^%[%]]-):(.*)$") if langcode then placename = placename_without_langcode end placename = m_placetypes.resolve_placename_display_aliases(placetype, placename) holonyms[i] = { placetype = placetype, display_placename = placename, unlinked_placename = m_placetypes.remove_links_and_html(placename), langcode = langcode, affix_type = i == affix_holonym_index and affix_type or nil, pluralize_affix = i == affix_holonym_index and pluralize_affix, suppress_affix = i ~= affix_holonym_index, no_display = no_display, suppress_comma = suppress_comma, continue_cat_loop = saw_also, force_the = i == 1 and saw_the, } end return holonyms end local get_param_mods = memoize(function() local m_param_utils = require(parameter_utilities_module) return m_param_utils.construct_param_mods { {group = {"link", "q", "l", "ref"}}, {param = "eq"}, -- FIXME: Finish [[Module:format utilities]]. --{param = "conj", set = require(format_utilities_module).allowed_conjs_for_join_segments, overall = true}, {param = "conj", set = {["and"] = true, ["or"] = true, ["and/or"] = true}, overall = true}, } end) local function parse_term_with_inline_modifiers(term, paramname, default_lang) -- FIXME: Finish changes to [[Module:parameter utilities]] and [[Module:parse utilities]] that support continuations -- and new-format generate_obj(). --local function generate_obj(data) -- local m_param_utils = require(parameter_utilities_module) -- data.parse_lang_prefix = true -- data.special_continuations = m_param_utils.default_special_continuations -- data.default_lang = default_lang -- return m_param_utils.generate_obj_maybe_parsing_lang_prefix(data) --end local function generate_obj(raw_term, parse_err) local obj = require(parameter_utilities_module).generate_obj_maybe_parsing_lang_prefix { term = raw_term, parse_err = parse_err, parse_lang_prefix = true, } obj.lang = obj.lang or default_lang return obj end return require(parse_interface_module).parse_inline_modifiers(term, { paramname = paramname, param_mods = get_param_mods(), generate_obj = generate_obj, -- FIXME: See above. --generate_obj_new_format = true, splitchar = ",", outer_container = {}, }) end local function parse_form_of_directive(arg, lang, form_of_overridden_args) local form_of_directive, raw_terms = arg:match("^@([a-z -]+):(.*)$") if not form_of_directive then error("Misformatted @-directive: " .. dump(arg)) end if not export.all_form_of_directives[form_of_directive] then local known_directives = {} for k, _ in pairs(export.all_form_of_directives) do insert(known_directives, '"' .. k .. '"') end table.sort(known_directives) error(("Unrecognized form-of directive %s in @-directive %s; recognized directives are %s"):format( dump(form_of_directive), dump(arg), concat(known_directives, ", "))) end local spec = export.all_form_of_directives[form_of_directive] local canonical_directive = form_of_directive if spec.alias_of then canonical_directive = spec.alias_of spec = export.all_form_of_directives[canonical_directive] if not spec then internal_error("Form-of directive alias %s points to %s, which is not a directive", "@" .. form_of_directive, canonical_directive) elseif spec.alias_of then internal_error("Form-of directive alias %s points to %s, which is also an alias", "@" .. form_of_directive, canonical_directive) end end local default_foreign = spec.default_foreign local directive_param = "@" .. form_of_directive if form_of_overridden_args and form_of_overridden_args[canonical_directive] then raw_terms = form_of_overridden_args[canonical_directive].new_value local new_directive = form_of_overridden_args[canonical_directive].new_directive local new_spec = export.all_form_of_directives[new_directive] if not new_spec then error(("Internal error: [[Module:transclude]] passed in unrecognized replacement directive '@%s'"): format(new_directive)) end if new_spec.alias_of then error(("Internal error: [[Module:transclude]] passed in replacement directive alias '@%s', " .. "should be canonical"):format(new_directive)) end if new_directive ~= canonical_directive then directive_param = directive_param .. (" (replaced with @%s)"):format(new_directive) canonical_directive = new_directive spec = new_spec end default_foreign = true end local terms = parse_term_with_inline_modifiers(raw_terms, directive_param, default_foreign and lang or enlang) return { directive = canonical_directive, terms = terms.terms, conj = terms.conj, spec = spec, } end -- Parse an argument containing extra information that is sometimes added to a definition, such as the capital, largest -- city, modern name, official name, etc. `args` is the value from the parsed argument structure and can be either nil, -- a string or a list (depending on whether it was declared as a single parameter or a list). `spec` is the extra info -- spec corresponding to the type of extra info. Each value in `args` can be a comma-separated list of terms with inline -- modifiers attached. [FIXME: we should switch to always using the comma-separated format and disallow list parameters -- such as |capital=, |capital2=, etc.] The return value is a structure containing fields `terms` (a list of term -- objects, each of which is in the format expected by full_link() in [[Module:links]]), `conj` (an explicit -- conjunction to join multiple terms, or nil if no explicit conjunction was given) and `spec` (the passed-in spec). local function parse_extra_info_arg(args, spec, default_lang) if not args then return nil end if type(args) ~= "table" then args = {args} end if not args[1] then return nil end local terms = nil local conj for i, arg in ipairs(args) do local this_terms = parse_term_with_inline_modifiers(arg, spec.arg .. (i == 1 and "" or i), default_lang) local thisconj = this_terms.conj if not conj then conj = thisconj elseif thisconj and conj ~= thisconj then error(("Two different conjunctions '%s' and '%s' specified for |%s=; you only need to specify the " .. "conjunction once"):format(conj, thisconj)) end if not terms then terms = this_terms.terms else m_table.extend(terms, this_terms.terms) end end return { spec = spec, terms = terms, conj = conj, } end --[==[ Parse a "new-style" place description, with placetypes and holonyms surrounded by `<<...>>` amid otherwise raw text. Return value is a place description object as documented at the top of the file. Exported for use by [[Module:demonyms]]. ]==] function export.parse_new_style_place_desc(text, lang, form_of_directives, form_of_overridden_args) local placetypes = {} local segments = split(text, "<<(.-)>>") local retval = {holonyms = {}, order = {}} local form_of_directives_already_present = form_of_directives and not not form_of_directives[1] for i, segment in ipairs(segments) do if i % 2 == 1 then insert(retval.order, {type = "raw", value = segment}) elseif segment:find("@") then if not form_of_directives then error(("Form-of directive '%s' not allowed in this context"):format(segment)) elseif form_of_directives_already_present then error(("Saw form-of directive '%s' in new-style place desc followed by direct (separate-parameter) form-of directives; not allowed"):format( segment)) elseif placetypes[1] or retval.holonyms[1] then error(("Form-of directive '%s' must come first, before placetypes and holonyms"):format(segment)) else local form_of_directive = parse_form_of_directive(segment, lang, form_of_overridden_args) if not retval.order[1] or retval.order[1].type ~= "raw" or retval.order[2] then internal_error("`retval.order` should have a single raw element: %s", retval.order) end form_of_directive.pretext = retval.order[1].value retval.order[1] = nil insert(form_of_directives, form_of_directive) end elseif segment:find("/") then local holonyms = split_holonym(segment) for j, holonym in ipairs(holonyms) do if j > 1 then if not holonym.no_display then if j == #holonyms then insert(retval.order, {type = "raw", value = " and "}) else insert(retval.order, {type = "raw", value = ", "}) end end -- All but the first in a multi-holonym need an article. For the first one, the article is -- specified in the raw text if needed. (Currently, needs_article is only used when displaying the -- holonym, so it wouldn't matter when no_display is set, but we set it anyway in case we need it -- for something else.) holonym.needs_article = true end insert(retval.holonyms, holonym) if not holonym.no_display then insert(retval.order, {type = "holonym", value = #retval.holonyms}) end m_placetypes.key_holonym_into_place_desc(retval, holonym) end else local treat_as, display = segment:match("^(..-):(.+)$") if treat_as then segment = treat_as else display = segment end -- see if the placetype segment is just qualifiers local only_qualifiers = true local split_segments = split(segment, " ", true) for _, split_segment in ipairs(split_segments) do if m_placetypes.placetype_qualifiers[split_segment] == nil then only_qualifiers = false break end end insert(placetypes, {placetype = segment, only_qualifiers = only_qualifiers}) if only_qualifiers then insert(retval.order, {type = "qualifier", value = display}) else insert(retval.order, {type = "placetype", value = display}) end end end if not form_of_directives_already_present and form_of_directives and form_of_directives[1] then form_of_directives[#form_of_directives].posttext = "" end local final_placetypes = {} for i, placetype in ipairs(placetypes) do if i > 1 and placetypes[i - 1].only_qualifiers then final_placetypes[#final_placetypes] = final_placetypes[#final_placetypes] .. " " .. placetypes[i].placetype else insert(final_placetypes, placetypes[i].placetype) end end retval.placetypes = final_placetypes return retval end --[==[ Parse one or more "new-style" place descriptions, with placetypes and holonyms surrounded by `<<...>>` amid otherwise raw text. Multiple descriptions are separated by two semicolons in a row. Return value is a list of place description objects as documented at the top of the file. ]==] local function parse_conjoined_new_style_place_desc(text, lang, form_of_directives, form_of_overridden_args) local separate_specs = split(text, ";(;[^ ]*)") local descs = {} for i = 1, #separate_specs do if i % 2 == 1 then insert(descs, export.parse_new_style_place_desc(separate_specs[i], lang, form_of_directives, form_of_overridden_args)) form_of_directives = nil else descs[#descs].separator = separate_specs[i] end end return descs end --[=[ Process numeric and "extra info" arguments into an overall place spec, as described at the top of the file. `data` is an object with the following fields: * `args`: The parsed arguments of {{tl|place}}. * `from_tcl`: True if we're being invoked from {{tl|tcl}}. * `extra_info_overridden_set`, `form_of_overridden_args`: Same as the corresponding fields in the `data` object passed to `export.format`. ]=] local function parse_overall_place_spec(data) local args, from_tcl, extra_info_overridden_set, form_of_overridden_args = data.args, data.from_tcl, data.extra_info_overridden_set, data.form_of_overridden_args local descs = {} local this_desc -- Index of separate (semicolon-separated) place descriptions within `descs`. local desc_index = 1 -- Index of separate holonyms within a place description. 0 means we've seen no holonyms and have yet to process -- the placetypes that precede the holonyms. 1 means we've seen no holonyms but have already processed the -- placetypes. local holonym_index = 0 local in_place_desc = false local form_of_directives = {} local function set_desc_joiner(desc, separator) if separator == ";" then this_desc.joiner = "; " this_desc.include_following_article = true elseif separator == ";;" then this_desc.joiner = " " else local joiner = separator:sub(2) if rfind(joiner, "^%a") then this_desc.joiner = " " .. joiner .. " " else this_desc.joiner = joiner .. " " end end end for _, arg in ipairs(args[2]) do if arg:find("^@") then if not (desc_index == 1 and holonym_index == 0) then error("@-directives cannot follow place descriptions") end local form_of_directive = parse_form_of_directive(arg, args[1], form_of_overridden_args) if form_of_directives[1] then form_of_directive.pretext = ", " else form_of_directive.pretext = "" end insert(form_of_directives, form_of_directive) elseif arg == ";" or arg:find("^;[^ ]") then if not this_desc then error("Saw semicolon joiner without preceding place description") end set_desc_joiner(this_desc, arg) desc_index = desc_index + 1 holonym_index = 0 in_place_desc = false else if arg:find("<<") then if in_place_desc then error("New-style place description must come first or following a separator (semicolon or similar), not directly following another description") end in_place_desc = true local this_descs = parse_conjoined_new_style_place_desc(arg, args[1], form_of_directives, form_of_overridden_args) for j, desc in ipairs(this_descs) do this_desc = desc if holonym_index > 0 then desc_index = desc_index + 1 holonym_index = 0 end if j < #this_descs then set_desc_joiner(this_desc, this_desc.separator) end descs[desc_index] = this_desc last_was_new_style = true holonym_index = #this_desc.holonyms + 1 end else -- Old-style arguments can directly follow a new-style argument; they become additional holonyms -- tacked onto the end of the holonym list, and are displayed old-style except that there is no -- prefix before the first one following the new-style argument. in_place_desc = true if holonym_index == 0 then local entry_placetypes = split_on_slash(arg) this_desc = {placetypes = entry_placetypes, holonyms = {}} descs[desc_index] = this_desc holonym_index = holonym_index + 1 else local holonyms = split_holonym(arg) for j, holonym in ipairs(holonyms) do if j > 1 then -- All but the first in a multi-holonym need an article. Not for the first one because e.g. -- {{place|en|city|s/Arizona|c/United States}} should not display as "a city in Arizona, the -- United States". The overall first holonym in the place description gets an article if -- needed regardless of our setting here. holonym.needs_article = true -- Insert "and" before the last holonym. if j == #holonyms then this_desc.holonyms[holonym_index] = { -- Use the no_display value from the first holonym; it should be the same for all -- holonyms. `unlinked_placename` should not be used. display_placename = "and", no_display = holonyms[1].no_display } holonym_index = holonym_index + 1 end end this_desc.holonyms[holonym_index] = holonym m_placetypes.key_holonym_into_place_desc(this_desc, this_desc.holonyms[holonym_index]) holonym_index = holonym_index + 1 end end end end end if form_of_directives[1] and not form_of_directives[#form_of_directives].posttext then form_of_directives[#form_of_directives].posttext = (args.def and args.def ~= "-" or not args.def and descs[1]) and ": " or "" end -- Tracking code. This does nothing but add tracking for seen placetypes and qualifiers. The place will be linked to -- [[Wiktionary:Tracking/place/entry-placetype/PLACETYPE]] for all entry placetypes seen; in addition, if PLACETYPE -- has qualifiers (e.g. 'small city'), there will be links for the bare placetype minus qualifiers and separately -- for the qualifiers themselves: -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/BARE_PLACETYPE]] -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-qualifier/QUALIFIER]] -- Note that if there are multiple qualifiers, there will be links for each possible split. For example, for -- 'small maritime city'), there will be the following links: -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/small maritime city]] -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/maritime city]] -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/city]] -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-qualifier/small]] -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-qualifier/maritime]] -- Finally, there are also links for holonym placetypes, e.g. if the holonym 'c/Italy' occurs, there will be the -- following link: -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/holonym-placetype/country]] for _, desc in ipairs(descs) do for _, entry_placetype in ipairs(desc.placetypes) do local splits = m_placetypes.split_qualifiers_from_placetype(entry_placetype, "no canon qualifiers") for _, split in ipairs(splits) do local prev_qualifier, this_qualifier, bare_placetype = unpack(split, 1, 3) track("entry-placetype/" .. bare_placetype) if this_qualifier then track("entry-qualifier/" .. this_qualifier) end end end for _, holonym in ipairs(desc.holonyms) do if holonym.placetype then track("holonym-placetype/" .. holonym.placetype) end end end local extra_info = {} for _, extra_info_spec in ipairs(export.extra_info_args) do local extra_info_terms = parse_extra_info_arg(args[extra_info_spec.arg], extra_info_spec, -- If called from {{tcl}} and extra info argument was set by {{tcl}}, interpret the argument -- according to the language in 1=; otherwise interpret as English. To override this, prefix -- with the appropriate language. from_tcl and extra_info_overridden_set and extra_info_overridden_set[extra_info_spec.arg] and args[1] or enlang) if extra_info_terms then insert(extra_info, extra_info_terms) end end return { lang = args[1], args = args, directives = form_of_directives, descs = descs, extra_info = extra_info, } end -------- Definition-generating functions -- Return a string with the wikilinks to the English translations of the word. local function get_translations(transl, ids) local ret = {} for i, t in ipairs(transl) do local arg_transls = split_on_comma(t) local arg_ids = ids[i] if arg_ids then arg_ids = split_on_comma(arg_ids) if #arg_transls ~= #arg_ids then error(("Saw %s translation%s in t%s=%s but %s ID%s in tid%s=%s"):format( #arg_transls, #arg_transls > 1 and "s" or "", i == 1 and "" or i, t, #arg_ids, #arg_ids > 1 and "'s" or "", i == 1 and "" or i, ids[i])) end end for j, arg_transl in ipairs(arg_transls) do insert(ret, link(arg_transl, "en", arg_ids and arg_ids[j] or nil)) end end return concat(ret, ", ") end -- Return the article (currently always `"the"`) to be prepended to the given placename, or nil. `decorated_placename` -- is the placename as specified by the user along with any affix added to it. `placename` is the raw unlinked -- placename, defaulting to the unlinked version of `decorated_placename` if not given. `placetypes` is a placetype or -- list of placetypes for the placename. `suppress_holonym_use_the_check` suppresses checking the placetypes for -- `holonym_use_the`. local function get_placename_article(decorated_placename, placetypes, placename, suppress_holonym_use_the_check) local unlinked_decorated_placename = m_placetypes.remove_links_and_html(decorated_placename) if unlinked_decorated_placename:find("^the ") then return nil end placename = placename or unlinked_decorated_placename if type(placetypes) == "string" then placetypes = {placetypes} end for _, placetype in ipairs(placetypes) do local art = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) local art = m_placetypes.placename_article[pt] and m_placetypes.placename_article[pt][placename] if art then return art end end) if art then return art end end -- Get equivalent placetypes of the specified placetype so that e.g. -- {{place|en|@official name of:Bahamas|island country|r/Caribbean}} put 'the' before Bahamas ("Bahamas" is just -- specified as a country but "island country" falls back to "country"). local all_equiv_placetypes = {} for _, placetype in ipairs(placetypes) do local this_equiv_placetypes = m_placetypes.get_placetype_equivs(placetype) for _, this_equiv_placetype in ipairs(this_equiv_placetypes) do insert(all_equiv_placetypes, this_equiv_placetype.placetype) end end -- Look for a known location. We should be using find_matching_holonym_location() but that function doesn't -- currently work without alias resolution. Instead we check if any matching location has `the = true` set. -- In practice there aren't any cases where a given placename matches two locations, only one of which has -- `the = true` set. for group, key, spec in m_placetypes.iterate_matching_location { placetypes = all_equiv_placetypes, placename = placename, alias_resolution = "none", } do -- `iterate_holonym_location` doesn't initialize the spec if alias resolution is turned off, so check both -- the spec and group. Be careful in case `the = false` is explicitly given by the spec. if spec.the ~= nil then if spec.the then return "the" end elseif group.default_the then return "the" end end if not suppress_holonym_use_the_check then -- See if the placetype requests an article to be placed before the placename. This occurs e.g. with 'sea'. But -- if the user specifies e.g. "sea:pref/Cortez", we'll wrongly get "the sea of the Cortez", so in that case we -- need to ignore the holonym article specified along with the placetype. for _, placetype in ipairs(placetypes) do local holonym_use_the = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) return placetype_data[pt] and placetype_data[pt].holonym_use_the end) if holonym_use_the then return "the" end end end local universal_res = m_placetypes.placename_the_re["*"] for _, re in ipairs(universal_res) do if unlinked_decorated_placename:find(re) then return "the" end end for _, placetype in ipairs(placetypes) do local matched = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) local res = m_placetypes.placename_the_re[pt] if not res then return nil end for _, re in ipairs(res) do if unlinked_decorated_placename:find(re) then return true end end return nil end) if matched then return "the" end end return nil end -- Prepend the appropriate article if needed to `decorated_placename` (the user-specified placename with any affix -- added), where the underlying holonym object that generated `linked_placename` can be found at `holonym_index` in the -- holonyms in `place_desc`. local function get_holonym_article(decorated_placename, place_desc, holonym_index) local holonym = place_desc.holonyms[holonym_index] local holonym_placetype = holonym.placetype if not holonym_placetype then return nil end return get_placename_article(decorated_placename, holonym_placetype, holonym.unlinked_placename, not not holonym.affix_type) end -- Convert a holonym into display format. This adds wikilinks to holonyms and passes them through any display handlers, -- which may (e.g.) add the placetype to the holonym. If `needs_article` is true, prepend the article `"the"` if the -- holonym requires it (e.g. if the holonym is `United States`). `needs_article` is set to true we are processing the -- first specified holonym in an old-style place description (i.e. the holonym directly following the entry placetype, -- with no raw-text holonym in between). -- -- Examples: -- ({placetype = "country", display_placename = "United States", unlinked_placename = "United States"}, true) returns -- the template-expanded equivalent of "the {{l|en|United States}}". -- ({placetype = "region", display_placename = "O'Higgins", unlinked_placename = "O'Higgins", affix_type = "suf"}, false) -- returns the template-expanded equivalent of "{{l|en|O'Higgins}} region". -- ({display_placename = "in the southern"}, false) returns "in the southern" (without wikilinking because .placetype -- and .langcode are both nil). local function format_holonym(place_desc, holonym_index, needs_article) local holonym = place_desc.holonyms[holonym_index] if holonym.no_display then return "" end local orig_needs_article = needs_article needs_article = needs_article or holonym.needs_article or holonym.force_the local output = holonym.display_placename local placetype = holonym.placetype local affix_type_pt_data, affix_type, affix_is_prefix, affix, prefix, suffix, no_affix_strings local pt_equiv_for_affix_type, already_seen_affix, need_affix -- Implement display handlers. local display_handler = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) return placetype_data[pt] and placetype_data[pt].display_handler end) if display_handler then output = display_handler(placetype, output) end if not holonym.suppress_affix then -- Implement adding an affix (prefix or suffix) based on the holonym's placetype. The affix will be -- added either if the placetype's placetype_data spec says so (by setting 'affix_type'), or if the -- user explicitly called for this (e.g. by using 'r:suf/O'Higgins'). Before adding the affix, -- however, we check to see if the affix is already present (e.g. the placetype is "district" -- and the placename is "Mission District"). The placetype can override the affix to add (by setting -- `prefix`, `suffix` or `affix`) and/or override the strings used for checking if the affix is already -- present (by setting 'no_affix_strings', which defaults to the affix explicitly given through `prefix`, -- `suffix` or `affix` if any are given). `prefix` and `suffix` take precedence over `affix` if both are -- set, but only when the appropriate type of affix is requested. -- Search through equivalent placetypes for a setting of `affix_type`, `affix`, `prefix` or `suffix`. If we -- find any, use them. If `affix_type` is given, it is overridden by the user's explicitly specified affix -- type. If either an `affix_type` is found or the user explicitly specified an affix type, the affix is -- displayed according to the following: -- 1. If `prefix`, `suffix` or `affix` is given by the placetype or equivalent placetypes, use it (e.g. -- placetype `administrative region` requests suffix "region" but doesn't set affix type; if the user -- explicitly specifies `administrative region` as the placetype for a holonym and specifies a suffixal -- affix type, use "region"). In this search, we stop looking if we find an explicit `affix_type` -- setting; if this is found without an associated affix setting, the assumption is the associated -- placetype was intended as the affix, not some explicit affix setting associated with a fallback -- placetype. -- 2. Otherwise, if the user explicitly requested an affix type, use the actual placetype (principle of -- least surprise). -- 3. Finally, fall back to the placetype associated with an explicit `affix_type` setting (which will -- always exist if we get this far). affix_type_pt_data, pt_equiv_for_affix_type = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) local cdpt = placetype_data[pt] return cdpt and cdpt.affix_type and cdpt or nil end ) affix_pt_data, pt_equiv_for_affix = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) local cdpt = placetype_data[pt] return cdpt and (cdpt.affix_type or cdpt.affix or cdpt.prefix or cdpt.suffix) and cdpt or nil end ) if affix_type_pt_data then affix_type = affix_type_pt_data.affix_type need_affix = true end if affix_pt_data then prefix = affix_pt_data.prefix or affix_pt_data.affix suffix = affix_pt_data.suffix or affix_pt_data.affix need_affix = true end no_affix_strings = affix_pt_data and affix_pt_data.no_affix_strings or affix_type_pt_data and affix_type_pt_data.no_affix_strings if holonym.affix_type and placetype then affix_type = holonym.affix_type prefix = prefix or placetype suffix = suffix or placetype need_affix = true end if need_affix then -- At this point the affix_type has been determined and can't change any more, so we can figure out -- whether we need the calculated prefix or suffix. affix_is_prefix = affix_type == "pref" or affix_type == "Pref" if affix_is_prefix then affix = prefix else affix = suffix end if not affix then if not pt_equiv_for_affix_type then internal_error("Something wrong, `pt_equiv_for_affix_type` not set processing holonym: %s", holonym) end affix = pt_equiv_for_affix_type.placetype if not affix then internal_error("Something wrong, no affix could be located in `pt_equiv_for_affix_type` for " .. "holonym %s: %s", holonym, pt_equiv_for_affix_type) end end no_affix_strings = no_affix_strings or lc(affix) if holonym.pluralize_affix then affix = m_placetypes.pluralize_placetype(affix) end already_seen_affix = m_placetypes.check_already_seen_string(output, no_affix_strings) end end output = link(output, holonym.langcode or placetype and "en" or nil) if need_affix and not affix_is_prefix and not already_seen_affix then output = output .. " " .. (affix_type == "Suf" and ucfirst_all(affix) or affix) end if needs_article then local article = holonym.force_the and "the" or get_holonym_article(output, place_desc, holonym_index) if article then output = article .. " " .. output end end if affix_is_prefix and not already_seen_affix then output = (affix_type == "Pref" and ucfirst_all(affix) or affix) .. " of " .. output if orig_needs_article then -- Put the article before the added affix if we're the first holonym in the place description. This is -- distinct from the article added above for the holonym itself; cf. "c:pref/United States,Canada" -> -- "the countries of the United States and Canada". We need to use the value of `needs_article` passed -- in from the function, which indicates whether we're processing the first holonym. output = "the " .. output end end return output end -- Format a holonym for display, taking into account the entry's placetype (specifically, the last placetype if there -- are more than one, excluding conjunctions and parenthetical items); the holonym's index among the holonyms in the -- template (which specifies what the previous holonym is and whether it is the first holonym); and the full place -- description (which helps resolve ambiguities in holonyms when looking up known locations). This may involve putting a -- preposition ("in" or "of") before the formatted holonym, particularly if it is the first one, and may involve -- prepending a comma. If `holonym_no_prefix` is specified, nothing except a space is put before the holonym; used -- when formatting mixed new/old-style descriptions. local function format_holonym_in_context(entry_placetype, place_desc, holonym_index, holonym_no_prefix) local desc = "" -- If holonym.placetype is nil, the holonym is just raw text, e.g. 'in southern'. if holonym_no_prefix then desc = " " else local holonym = place_desc.holonyms[holonym_index] if not holonym.no_display then -- First compute the initial delimiter. if holonym_index == 1 then if holonym.placetype then desc = desc .. " " .. m_placetypes.get_placetype_entry_preposition(entry_placetype) .. " " elseif not holonym.display_placename:find("^,") then desc = desc .. " " end else local prev_holonym = place_desc.holonyms[holonym_index - 1] if prev_holonym.placetype and not holonym.suppress_comma then local dname = holonym.display_placename if dname ~= "and" and dname ~= "in" and dname ~= "and the" and dname ~= "in the" then desc = desc .. "," end end if holonym.placetype or not holonym.display_placename:find("^,") then desc = desc .. " " end end end end return desc .. format_holonym(place_desc, holonym_index, not holonym_no_prefix and holonym_index == 1) end -- Return the linked description of a placetype. This splits off any qualifiers and displays them separately. local function get_placetype_description(placetype) local splits = m_placetypes.split_qualifiers_from_placetype(placetype) local prefix = "" for _, split in ipairs(splits) do local prev_qualifier, this_qualifier, bare_placetype = unpack(split, 1, 3) if this_qualifier then prefix = (prev_qualifier and prev_qualifier .. " " .. this_qualifier or this_qualifier) .. " " else prefix = "" end local display_form = m_placetypes.get_placetype_display_form(bare_placetype) if display_form then return prefix .. display_form end placetype = bare_placetype end return prefix .. placetype end -- Return the linked description of a qualifier (which may be multiple words). local function get_qualifier_description(qualifier) local splits = m_placetypes.split_qualifiers_from_placetype(qualifier .. " foo") local split = splits[#splits] local prev_qualifier, this_qualifier, bare_placetype = unpack(split, 1, 3) return prev_qualifier and prev_qualifier .. " " .. this_qualifier or this_qualifier end -- Format a set of form-of directive terms. local function format_form_of_directive(overall_place_spec, directive_terms, ucfirst, from_tcl) local formatted_terms = {} local placetypes if not overall_place_spec.descs[2] then placetypes = overall_place_spec.descs[1].placetypes else placetypes = {} for _, desc in ipairs(overall_place_spec.descs) do m_table.extend(placetypes, desc.placetypes) end end for _, termobj in ipairs(directive_terms.terms) do local placename_article if not termobj.alt and termobj.term and not termobj.term:find("%[%[") then placename_article = get_placename_article(termobj.term, placetypes) end local linked_term = m_links.full_link(termobj, "term", nil, "show qualifiers") linked_term = "<span class='form-of-definition-link'>" .. linked_term .. "</span>" if termobj.eq then linked_term = linked_term .. " (= " .. m_links.full_link {term = termobj.eq, lang = enlang} .. ")" end if placename_article then linked_term = placename_article .. " " .. linked_term end insert(formatted_terms, linked_term) end local spec = directive_terms.spec local text = spec.text if type(text) == "function" then text = text(overall_place_spec) end if text == "+" then text = directive_terms.directive end if ucfirst then text = m_strutils.ucfirst(text) end if not from_tcl then local tracking_prefix = "form-of/" .. directive_terms.directive track(tracking_prefix) local langcode = overall_place_spec.lang:getCode() local full_langcode = overall_place_spec.lang:getFullCode() track(tracking_prefix .. "/" .. langcode) if full_langcode ~= langcode then track(tracking_prefix .. "/" .. full_langcode) end if full_langcode ~= "en" then track(tracking_prefix .. "/non-english") end end return (require(form_of_module).format_form_of { text = text, lemmas = m_table.serialCommaJoin(formatted_terms, {conj = directive_terms.conj or spec.conjunction or "and"}), lemma_classes = false, -- text_classes = "place-text", }) end -- Format a set of extra-info terms for extra information that is sometimes added to a definition, such as the capital, -- largest city, modern name, official name, etc. `overall_place_spec` is the overall parsed {{tl|place}} spec (see -- comment at top of file); `extra_info_terms` is the terms spec for this type of extra-info (as returned by -- `parse_extra_info_arg`); and `sentence_style` indicates whether we're generating a sentence-style definition (as -- suitable for an English-language term without a translation specified using t=). local function format_extra_info(overall_place_spec, extra_info_terms, sentence_style) local formatted_terms = {} for _, termobj in ipairs(extra_info_terms.terms) do insert(formatted_terms, m_links.full_link(termobj, nil, nil, "show qualifiers")) end local spec = extra_info_terms.spec local text = spec.text if type(text) == "function" then text = text(overall_place_spec) end if text == "+" then text = spec.arg end if spec.auto_plural and formatted_terms[2] then text = pluralize(text) end if spec.with_colon then text = text .. ":" end if sentence_style and spec.match_sentence_style then text = ". " .. m_strutils.ucfirst(text) else text = "; " .. text end -- FIME: Use joinSegments when available. -- return text .. " " .. -- m_table.joinSegments(formatted_terms, {conj = extra_info_terms.conj or spec.conjunction or "and"}) return text .. " " .. m_table.serialCommaJoin(formatted_terms, {conj = extra_info_terms.conj or spec.conjunction or "and"}) end -- Format an old-style place description (with separate arguments for the placetype and each holonym) for display and -- return the resulting string. local function format_old_style_place_desc_for_display(args, place_desc, desc_index, with_article, ucfirst) -- The placetype used to determine whether "in" or "of" follows is the last placetype if there are -- multiple slash-separated placetypes, but ignoring "and", "or" and parenthesized notes -- such as "(one of 254)". local entry_placetype = nil local placetypes = place_desc.placetypes local function is_and_or(item) return item == "and" or item == "or" end local parts = {} local function ins(txt) insert(parts, txt) end local function ins_space() if #parts > 0 then ins(" ") end end local and_or_pos for i, placetype in ipairs(placetypes) do if is_and_or(placetype) then and_or_pos = i -- no break here; we want the last in case of more than one end end local remaining_placetype_index if and_or_pos then track("multiple-placetypes-with-and") if and_or_pos == #placetypes then error("Conjunctions 'and' and 'or' cannot occur last in a set of slash-separated placetypes: " .. concat(placetypes, "/")) end local items = {} for i = 1, and_or_pos + 1 do local pt = placetypes[i] if is_and_or(pt) then -- skip elseif i > 1 and pt:find("^%(") then -- append placetypes beginning with a paren to previous item items[#items] = items[#items] .. " " .. pt else entry_placetype = pt insert(items, get_placetype_description(pt)) end end ins(m_table.serialCommaJoin(items, {conj = placetypes[and_or_pos]})) remaining_placetype_index = and_or_pos + 2 else remaining_placetype_index = 1 end for i = remaining_placetype_index, #placetypes do local pt = placetypes[i] -- Check for and, or and placetypes beginning with a paren (so that things like -- "{{place|en|county/(one of 254)|s/Texas}}" work). if m_placetypes.placetype_is_ignorable(pt) then ins_space() ins(pt) else entry_placetype = pt -- Join multiple placetypes with comma unless placetypes are already -- joined with "and". We allow "the" to precede the second placetype -- if they're not joined with "and" (so we get "city and county seat of ..." -- but "city, the county seat of ..."). if i > 1 then ins(", ") local article = m_placetypes.get_placetype_article(pt) if article ~= "the" and i > remaining_placetype_index then -- Track cases where we are comma-separating multiple placetypes without the second one starting -- with "the", as they may be mistakes. The occurrence of "the" is usually intentional, e.g. -- {{place|zh|municipality/state capital|s/Rio de Janeiro|c/Brazil|t1=Rio de Janeiro}} -- for the city of [[Rio de Janeiro]], which displays as "a municipality, the state capital of ...". track("multiple-placetypes-without-and-or-the") end if article then ins(article) ins(" ") end end ins(get_placetype_description(pt)) end end if place_desc.holonyms then for holonym_index, _ in ipairs(place_desc.holonyms) do ins(format_holonym_in_context(entry_placetype, place_desc, holonym_index)) end end local gloss = concat(parts) if with_article then local article if desc_index == 1 then article = args.a else if not place_desc.holonyms then -- there isn't a following holonym; the place type given might be raw text as well, so don't add -- an article. with_article = false else local saw_placetype_holonym = false for _, holonym in ipairs(place_desc.holonyms) do if holonym.placetype then saw_placetype_holonym = true break end end if not saw_placetype_holonym then -- following holonym(s)s is/are just raw text; the place type given might be raw text as well, -- so don't add an article. with_article = false end end if with_article then track("second-or-higher-description-with-added-article") else track("second-or-higher-description-suppressed-article") end end if with_article then article = article or m_placetypes.get_placetype_article(place_desc.placetypes[1], ucfirst) if article then gloss = article .. " " .. gloss elseif ucfirst then gloss = m_strutils.ucfirst(gloss) end end end return gloss end --[==[ Get the full gloss (English description) of a new-style place description. New-style place descriptions are specified with a single string containing raw text interspersed with placetypes and holonyms surrounded by `<<...>>`. Exported for use by [[Module:demonyms]]. ]==] function export.format_new_style_place_desc_for_display(args, place_desc, with_article) local parts = {} local function ins(txt) insert(parts, txt) end if with_article and args.a then ins(args.a .. " ") end local max_holonym = 0 for _, order in ipairs(place_desc.order) do local segment_type, segment = order.type, order.value if segment_type == "raw" then ins(segment) elseif segment_type == "placetype" then ins(get_placetype_description(segment)) elseif segment_type == "qualifier" then ins(get_qualifier_description(segment)) elseif segment_type == "holonym" then ins(format_holonym(place_desc, segment, false)) if segment > max_holonym then max_holonym = segment end else internal_error("Unrecognized segment type %s", segment_type) end end if place_desc.holonyms and max_holonym < #place_desc.holonyms then local holonym_no_prefix = true for holonym_index = max_holonym + 1, #place_desc.holonyms do ins(format_holonym_in_context(nil, place_desc, holonym_index, holonym_no_prefix)) holonym_no_prefix = false end end return concat(parts) end -- Return a string with the gloss (the description of the place itself, as opposed to translations). If `ucfirst` is -- given, the gloss's first letter is made upper case. If `sentence_style` is given, the "extra info" (modern name, -- capital, largest city, etc.) is displayed as separated sentences; otherwise, it is displayed separated from the main -- definition by semicolons. local function get_display_form(data) local overall_place_spec, ucfirst, sentence_style, drop_extra_info, extra_info_overridden_set, from_tcl = data.overall_place_spec, data.ucfirst, data.sentence_style, data.drop_extra_info, data.extra_info_overridden_set, data.from_tcl local args = overall_place_spec.args local parts = {} local function ins(txt) table.insert(parts, txt) end if overall_place_spec.directives and overall_place_spec.directives[1] then for i, directive_terms in ipairs(overall_place_spec.directives) do ins(directive_terms.pretext) if directive_terms.pretext ~= "" then ucfirst = false end if not args.def or args.def == "-" then ins(format_form_of_directive(overall_place_spec, directive_terms, ucfirst, from_tcl)) ucfirst = false if i == #overall_place_spec.directives and directive_terms.posttext then ins(directive_terms.posttext) end end end end if args.def == "-" then return concat(parts) end if args.def then if args.def:find("<<") then local def_desc = export.parse_new_style_place_desc(args.def, args[1]) ins(export.format_new_style_place_desc_for_display({}, def_desc, false)) else ins(args.def) end else local include_article = true for n, desc in ipairs(overall_place_spec.descs) do if desc.order then ins(export.format_new_style_place_desc_for_display(args, desc, n == 1)) else ins(format_old_style_place_desc_for_display(args, desc, n, include_article, ucfirst)) end if desc.joiner then ins(desc.joiner) end include_article = desc.include_following_article ucfirst = false end end local addl = args.addl if addl then posttext = posttext or "" if addl:find("^[;:]") then ins(addl) elseif addl:find("^_") then ins(" " .. addl:sub(2)) else ins(", " .. addl) end end for _, extra_info_terms in ipairs(overall_place_spec.extra_info) do -- Include a given extra info term either when -- (1) drop_extra_info not set (it's set by {{tcl}}), or -- (2) the extra info term is marked as "display even when dropped" (e.g. modern= or full=, to help understand -- the term's sense), or -- (3) the term was overridden by a `place_*=` setting in {{tcl}}. if not drop_extra_info or extra_info_terms.spec.display_even_when_dropped or extra_info_overridden_set and extra_info_overridden_set[extra_info_terms.spec.arg] then ins(format_extra_info(overall_place_spec, extra_info_terms, sentence_style)) end end return concat(parts) end -- Return the definition line. local function get_def(data) local overall_place_spec, from_tcl, drop_extra_info, extra_info_overridden_set, translation_follows = data.overall_place_spec, data.from_tcl, data.drop_extra_info, data.extra_info_overridden_set, data.translation_follows local args = overall_place_spec.args local sentence_style = overall_place_spec.lang:getCode() == "en" local ucfirst = sentence_style and not args.nocap if #args.t > 0 then local gloss = get_display_form { overall_place_spec = overall_place_spec, ucfirst = false, sentence_style = false, drop_extra_info = drop_extra_info, extra_info_overridden_set = extra_info_overridden_set, from_tcl = from_tcl, } if from_tcl and not args.tcl_nolc then gloss = m_strutils.lcfirst(gloss) end if translation_follows then return (gloss == "" and "" or gloss .. ": ") .. get_translations(args.t, args.tid) else return get_translations(args.t, args.tid) .. (gloss == "" and "" or " (" .. gloss .. ")") end else return get_display_form { overall_place_spec = overall_place_spec, ucfirst = ucfirst, sentence_style = sentence_style, drop_extra_info = drop_extra_info, extra_info_overridden_set = extra_info_overridden_set, from_tcl = from_tcl, } end end ---------- Functions for the category wikicode -- The code in this section finds the categories to which a given place belongs. See comment at top of file. --[=[ Find the appropriate category specs for a given place description and placetype. For example, for the template invocation {{tl|place|en|city/and/county|s/Pennsylvania|c/US}}, which results in the place description ``` { placetypes = {"city", "and", "county"}, holonyms = { {placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania"}, {placetype = "country", display_placename = "United States", unlinked_placename = "United States"}, }, holonyms_by_placetype = { state = {"Pennsylvania"}, country = {"United States"}, }, } ``` the call ``` find_placetype_cat_specs { entry_placetype = "city", place_desc = { placetypes = {"city", "and", "county"}, holonyms = { {placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania"}, {placetype = "country", display_placename = "United States", unlinked_placename = "United States"}, }, holonyms_by_placetype = { state = {"Pennsylvania"}, country = {"United States"}, }, }, } ``` might produce the return value ``` { entry_placetype = "city", cat_specs = {"Cities in Pennsylvania, USA"}, triggering_holonym = {placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania"}, triggering_holonym_index = 1, } ``` See the comment at the top of the section for a description of category specs and the overall algorithm. On entry, `data` is an object with the following fields: * `entry_placetype`: the entry placetype (or equivalent) used to look up the category data in placetype_data, which must have already been resolved to a placetype with an entry in `placetype_data`; * `place_desc`: the full place description as documented at the top of the file (used only for its holonyms); * `first_holonym_index`: the index of the first holonym to consider when iterating through the holonyms (used to implement the `:also` holonym placetype modifier); * `overriding_holonym`: an optional overriding holonym to use, in place of iterating through the holonyms (used to implement categorizing other holonyms of the same type as the triggering holonym, so that e.g. {{tl|place|en|river|s/Kansas,Nebraska}}, or equivalently {{tl|place|en|river|s/Kansas|and|s/Nebraska}}, works); * `from_demonym`: we are called from {{tl|demonym-noun}} or {{tl|demonym-adj}} instead of {{tl|place}}, and should generate categories appropriate to those templates. * `form_of_directive`: A form-of directive prefix such as `FORMER_NAME_OF`. If specified, use that type prefix to generate categories appropriate to the form-of directive (in addition to the regular categories generated for the {{tl|place}} invocation, which happens in a separate call). The return value is {nil} if no category specs could be located, otherwise an object with the following fields: * `entry_placetype`: the placetype that should be used to construct categories when `true` is one of the returned category specs (normally the same as the `entry_placetype` passed in, but will be different when a "fallback" key exists and is used); * `cat_specs`: list of category specs as described above; * `triggering_holonym`: the triggering holonym (see the comment at the top of the section), or nil if there was no triggering holonym; * `triggering_holonym_index`: the index of the triggering holonym in the list of holonyms in `place_desc`, or nil if an overriding holonym was passed in or there was no triggering holonym. ]=] local function find_placetype_cat_specs(data) local entry_placetype, place_desc, first_holonym_index, overriding_holonym, from_demonym = data.entry_placetype, data.place_desc, data.first_holonym_index, data.overriding_holonym, data.from_demonym local form_of_directive = data.form_of_directive local function fetch_cat_specs(holonym_to_match, index, no_fallback) local holonym_placetype = holonym_to_match.placetype if not holonym_placetype then -- raw text in place of holonym return nil end local holonym_placename = holonym_to_match.unlinked_placename if not holonym_placename then internal_error("Missing unlinked_placename in holonym (index %s): %s", index, holonym_to_match) end local cat_specs, equiv_entry_placetype_and_qualifier = m_placetypes.get_equiv_placetype_prop(entry_placetype, function(equiv_entry_pt) return m_placetypes.get_equiv_placetype_prop(holonym_placetype, function(equiv_holonym_pt) return m_placetypes.political_division_cat_handler { entry_placetype = equiv_entry_pt, holonym_placetype = equiv_holonym_pt, holonym_placename = holonym_placename, holonym_index = index, place_desc = place_desc, from_demonym = from_demonym, } end) end, {no_fallback = no_fallback, form_of_directive = form_of_directive} ) if cat_specs and cat_specs[1] then return cat_specs, equiv_entry_placetype_and_qualifier.placetype end local cat_handler, equiv_entry_placetype_and_qualifier = m_placetypes.get_equiv_placetype_prop(entry_placetype, function(equiv_entry_pt) local entry_placetype_data = m_placetypes.placetype_data[equiv_entry_pt] if entry_placetype_data and entry_placetype_data.cat_handler then return entry_placetype_data.cat_handler end end, {no_fallback = no_fallback, form_of_directive = form_of_directive} ) if cat_handler then local cat_specs = m_placetypes.get_equiv_placetype_prop(holonym_placetype, function(equiv_holonym_pt) return cat_handler { entry_placetype = equiv_entry_placetype_and_qualifier.placetype, holonym_placetype = equiv_holonym_pt, holonym_placename = holonym_placename, holonym_index = index, place_desc = place_desc, from_demonym = from_demonym, } end) if cat_specs and cat_specs[1] then return cat_specs, equiv_entry_placetype_and_qualifier.placetype end end if not no_fallback then local cat_specs, equiv_entry_placetype_and_qualifier = m_placetypes.get_equiv_placetype_prop(entry_placetype, function(equiv_entry_pt) local entry_placetype_data = m_placetypes.placetype_data[equiv_entry_pt] if entry_placetype_data then return m_placetypes.get_equiv_placetype_prop(holonym_placetype, function(equiv_holonym_pt) return entry_placetype_data[equiv_holonym_pt .. "/*"] end) end end, {form_of_directive = form_of_directive} ) if cat_specs and cat_specs[1] then return cat_specs, equiv_entry_placetype_and_qualifier.placetype end end return nil end if overriding_holonym then -- FIXME, change the algorithm to eliminate overriding_holonym local cat_specs, fetched_entry_placetype = fetch_cat_specs(overriding_holonym, nil) if cat_specs and cat_specs[1] then return { entry_placetype = fetched_entry_placetype, cat_specs = cat_specs, triggering_holonym = overriding_holonym, -- no triggering_holonym_index } end else -- We loop twice over holonyms, the first time setting `no_fallback` so that we process only category specs for -- the specifically given entry placetype (possibly with preceding qualifiers). The reason for this is to -- correctly handle cases like [[Poblacion IX]]: -- {{place|en|barangay|mun/Roxas|p/Capiz|c/Philippines}}. -- "barangay" falls back to "neighborhood", and without the `no_fallback` loop, the neighborhood cat handler run -- on the mun/Roxas holonym will take precedence over the barangay-specific setting for p/Capiz because we -- check, for each holonym in turn, first for a matching spec through political_division_cat_handler, then a cat -- handler, then a wildcard spec like country/*. During the first no-fallback loop, we disable checking for -- wildcard specs because it seems a fallback matching exactly or through a cat handler on an earlier holonym -- would be better than a wildcard match for the exact entry placetype at a later holonym. (FIXME: But I don't -- know for sure; maybe we should check wildcard holonyms on the exact entry placetype first, or contrariwise -- maybe we should check only exact-match holonyms through political_division_cat_handler on the exact entry -- placetype first, not even checking other cat handlers.) for i, holonym in ipairs(place_desc.holonyms) do if first_holonym_index and i < first_holonym_index then -- continue else local cat_specs, fetched_entry_placetype = fetch_cat_specs(holonym, i, "no_fallback") if cat_specs and cat_specs[1] then return { entry_placetype = fetched_entry_placetype, cat_specs = cat_specs, triggering_holonym = holonym, triggering_holonym_index = i, } end end end for i, holonym in ipairs(place_desc.holonyms) do if first_holonym_index and i < first_holonym_index then -- continue else local cat_specs, fetched_entry_placetype = fetch_cat_specs(holonym, i) if cat_specs and cat_specs[1] then return { entry_placetype = fetched_entry_placetype, cat_specs = cat_specs, triggering_holonym = holonym, triggering_holonym_index = i, } end end end end return nil end -- Turn a list of category specs (see comment at section top) into the corresponding categories (minus the language -- code prefix). The function is given the following arguments: -- (1) the category specs retrieved using find_placetype_cat_specs(); -- (2) the entry placetype used to fetch the entry in `placetype_data` -- (3) the triggering holonym (a holonym object; see comment at top of file) used to fetch the category specs -- (see top-of-section comment); or nil if no triggering holonym. -- The return value is constructed as described in the top-of-section comment. local function cat_specs_to_categories(place_desc, cat_data) local all_cats = {} local cat_specs, entry_placetype, triggering_holonym, triggering_holonym_index = cat_data.cat_specs, cat_data.entry_placetype, cat_data.triggering_holonym, cat_data.triggering_holonym_index if triggering_holonym then for _, cat_spec in ipairs(cat_specs) do local cat if cat_spec == true then cat = m_placetypes.pluralize_placetype(entry_placetype, "ucfirst") .. " " .. m_placetypes.get_placetype_entry_preposition(entry_placetype) .. " +++" else cat = cat_spec end if cat:find("%+%+%+") then local group, key, spec, container_trail = m_placetypes.find_matching_holonym_location { holonym_placetype = triggering_holonym.placetype, holonym_placename = triggering_holonym.unlinked_placename, holonym_index = triggering_holonym_index, place_desc = place_desc, } if group then cat = cat:gsub("%+%+%+", m_strutils.replacement_escape(m_placetypes.get_prefixed_key(key, spec))) insert(all_cats, cat) else mw.log(("Unable to insert category for cat spec '%s' because holonym '%s/%s' did not match a " .. "known location"):format(cat, triggering_holonym.placetype, triggering_holonym.unlinked_placename)) track("cant-match-holonym-for-category-spec") end else insert(all_cats, cat) end end else for _, cat_spec in ipairs(cat_specs) do local cat if cat_spec == true then cat = m_placetypes.pluralize_placetype(entry_placetype, "ucfirst") else cat = cat_spec if cat:find("%+%+%+") then internal_error("Category %s contains +++ but there is no holonym to substitute", cat) end end insert(all_cats, cat) end end return all_cats end -- Return the categories (without initial lang code) that should be added to the entry, given the place description -- (which specifies the entry placetype(s) and holonym(s); see top of file) and a particular entry placetype (e.g. -- "city"). Note that only the holonyms from the place description are looked at, not the entry placetypes in the place -- description. local function get_placetype_cats(place_desc, entry_placetype, from_demonym, form_of_directive) local cats = {} local first_holonym_index = 1 while first_holonym_index <= #place_desc.holonyms do -- Find the category specs (see top-of-file comment) corresponding to the holonym(s) in the place description. local cat_data = find_placetype_cat_specs { entry_placetype = entry_placetype, place_desc = place_desc, first_holonym_index = first_holonym_index, from_demonym = from_demonym, form_of_directive = form_of_directive, } -- Check if no category spec could be found. if not cat_data then break end local triggering_holonym = cat_data.triggering_holonym if not triggering_holonym then internal_error("find_placetype_cat_specs should have returned a triggering holonym: %s", cat_data) end -- Generate categories for the category specs found. extend(cats, cat_specs_to_categories(place_desc, cat_data)) -- Also generate categories for other holonyms of the same placetype, so that e.g. -- {{place|en|city|s/Kansas|and|s/Missouri|c/USA}} generates both [[:Category:en:Cities in Kansas, USA]] and -- [[:Category:en:Cities in Missouri, USA]]. first_holonym_index = cat_data.triggering_holonym_index -- Loop over non-fallback equivalent placetypes to the triggering holonym's placetype, in case it is -- non-canonical (e.g. `cities/San Francisco`). This matches the loop over equivalent places in -- key_holonym_into_place_desc(). local equiv_triggering_placetypes = m_placetypes.get_placetype_equivs(triggering_holonym.placetype, {no_fallback = true}) for _, equiv in ipairs(equiv_triggering_placetypes) do local other_holonyms_of_same_type = place_desc.holonyms_by_placetype[equiv.placetype] if other_holonyms_of_same_type then for _, other_placename_of_same_type in ipairs(other_holonyms_of_same_type) do if other_placename_of_same_type ~= triggering_holonym.unlinked_placename then local overriding_holonym = { placetype = triggering_holonym.placetype, unlinked_placename = other_placename_of_same_type, } local other_cat_data = find_placetype_cat_specs { entry_placetype = entry_placetype, place_desc = place_desc, overriding_holonym = overriding_holonym, from_demonym = from_demonym, form_of_directive = form_of_directive, } if other_cat_data then extend(cats, cat_specs_to_categories(place_desc, other_cat_data)) end end end end end -- If there are any later-specified holonyms that had the modifier :also, try to produce categories for them -- as well. first_holonym_index = first_holonym_index + 1 while first_holonym_index <= #place_desc.holonyms do if place_desc.holonyms[first_holonym_index].continue_cat_loop then break end first_holonym_index = first_holonym_index + 1 end end if cats[1] then return cats end local entry_pt_default, equiv_entry_placetype_and_qualifier = m_placetypes.get_equiv_placetype_prop(entry_placetype, function(pt) return m_placetypes.placetype_data[pt] and m_placetypes.placetype_data[pt].default end, {form_of_directive = form_of_directive}) if entry_pt_default then return cat_specs_to_categories(place_desc, { cat_specs = entry_pt_default, entry_placetype = equiv_entry_placetype_and_qualifier.placetype, -- no triggering holonym }) end return {} end --[==[ Iterate through each type of place and return a list of the categories that need to be added to the entry. The returned categories need to be formatted using `format_cats`, as they can be either topic-style categories (by default) or langname-style categories (if prefixed with `cln:`). The function is passed the overall place spec, which contains all the parsed info on the {{tl|place}} call (see comment at top of file), the parsed arguments (needed for arguments not parsed by `parse_overall_place_spec` and used primarily to add "bare categories" corresponding to toponyms for known locations), and `from_demonym`, which is true if we're being called from {{tl|demonym-noun}} or {{tl|demonym-adj}} (in this case, we only want certain categories added, specifically bare categories corresponding to the specified holonym(s)). ]==] function export.get_cats(args, overall_place_spec, from_demonym) local cats = {} local place_descriptions = overall_place_spec.descs handle_category_implications(place_descriptions, m_placetypes.cat_implications) m_placetypes.augment_holonyms_with_container(place_descriptions) if overall_place_spec.directives then -- not necessarily when called from [[Module:demonym]] for _, directive_terms in ipairs(overall_place_spec.directives) do local spec_cats = directive_terms.spec.cat if spec_cats then if type(spec_cats) == "string" then spec_cats = {spec_cats} end for _, spec_cat in ipairs(spec_cats) do insert(cats, spec_cat) end end if directive_terms.spec.type_prefix then for _, place_desc in ipairs(place_descriptions) do for _, placetype in ipairs(place_desc.placetypes) do if not m_placetypes.placetype_is_ignorable(placetype) then extend(cats, get_placetype_cats(place_desc, placetype, from_demonym, directive_terms.spec.type_prefix)) end end end end end end if not from_demonym then local bare_categories = m_placetypes.get_bare_categories(args, overall_place_spec) extend(cats, bare_categories) end for _, place_desc in ipairs(place_descriptions) do if not from_demonym then for _, placetype in ipairs(place_desc.placetypes) do if not m_placetypes.placetype_is_ignorable(placetype) then extend(cats, get_placetype_cats(place_desc, placetype)) end end end -- Also add generic place categories for the holonyms listed (e.g. a category like -- [[Category:Places in Merseyside, England]]). This is handled through the special placetype "*". extend(cats, get_placetype_cats(place_desc, "*", from_demonym)) end if args.cat then -- not necessarily when called from [[Module:demonym]] for _, cat in ipairs(args.cat) do local split_cats = split_on_comma(cat) extend(cats, split_cats) end end return cats end -- Return the category link for a category, given the language code and the name of the category. local function format_cats(lang, cats, sort_key) local full_cats = {} local langcode = lang:getFullCode() for _, cat in ipairs(cats) do -- 'cln' corresponds to {{cln}}, which generates lang-name categories like [[:Category:English abbreviations]] -- (as opposed to topic categories like [[:Category:en:Abbreviations of states of the United States]]). local cln_cat = cat:match("^cln:(.*)$") if cln_cat then insert(full_cats, lang:getFullName() .. " " .. cln_cat) else insert(full_cats, langcode .. ":" .. cat) end end return require(utilities_module).format_categories(full_cats, lang, sort_key, nil, force_cat or m_placetypes.get_force_cat()) end ----------- Main entry point --[==[ Implementation of {{tl|place}}. Meant to be callable from another module (specifically, [[Module:transclude]]). The single argument `data` is an object with the following fields: * `template_args`: Raw arguments specified by {{tl|place}}, possibly modified by {{tl|tcl}}. * `from_tcl`: True if we're being invoked from {{tl|tcl}}. * `drop_extra_info`: True if we should drop most of the "extra info" specified using extra info arguments (capital, largest city, etc.). Usually true when invoked from {{tl|tcl}}. Note that some extra info is still displayed even when `drop_extra_info` is set in order to establish the context (e.g. {{para|full}} and {{para|modern}}), and any extra info overridden at the {{tl|tcl}} level is displayed regardless. * `extra_info_overridden_set`: Set of booleans specifying, for each extra info arg, whether it was overridden at the {{tl|tcl}} level. This means, for example, that the values are interpreted according to the language in {{para|1}} instead of always defaulting to English, as is the case when {{tl|place}} is called directly. * `form_of_overridden_args`: Set of objects of the form `{new_directive = ``directive``, new_value = ``value``}` for overriding a given form-of directive (the key) with new directive ``directive`` and new unparsed value ``value``. Both the key and the replacing directive should be canonical. ``value`` will be parsed in the same way as a regular form-of directive except that all specified terms are interpreted in the language specified in {{para|1}}, never in English. This is present so that {{tl|tcl}} can be used on abbreviations like [[GDR]] and [[FYROM]], whose equivalents in a foreign language have language-specific expansions but where the rest of the call should stay the same. * `translation_follows`: If true, any translation specified using t= should follow the definition, after a colon, rather than preceding, with the definition in parens. ]==] function export.format(data) local template_args = data.template_args local list_param = {list = true} local boolean_param = {type = "boolean"} local params = { [1] = {required = true, type = "language", default = "und"}, [2] = {required = true, list = true}, ["t"] = list_param, ["tid"] = {list = true, allow_holes = true}, ["cat"] = list_param, ["nocat"] = boolean_param, ["nocap"] = boolean_param, ["sort"] = true, ["pagename"] = true, -- for testing or documentation purposes ["a"] = true, ["addl"] = true, ["def"] = true, -- params that are only used when transcluding using {{tcl}}/{{transclude}}, to transmit information to {{tcl}}. ["tcl"] = true, ["tcl_t"] = list_param, ["tcl_tid"] = list_param, ["tcl_nolb"] = true, ["tcl_nolc"] = boolean_param, ["tcl_noextratext"] = boolean_param, } -- add "extra info" parameters for _, extra_arg_spec in ipairs(export.extra_info_args) do params[extra_arg_spec.arg] = list_param end -- FIXME, once we've flushed out any uses, delete the following clause. That will cause def= to be ignored. if template_args.def == "" then error("Cannot currently pass def= as an empty parameter; use def=- if you want to suppress the definition display") end local args = require("Module:parameters").process(template_args, params) if args.a then track("a") if args.a:find("^[Aa]n?$") or args.a:find("^[Tt]he$") then track("a/article") else error("a= can only be used to specify a definite or indefinite article (and preferably use |nocap=1 instead to get the initial letter lowercase); see especially the documentation on the [[Template:place#Mixed format|mixed format]], which can be used to add arbitrary text before the placetype") end end data.args = args local overall_place_spec = parse_overall_place_spec(data) data.overall_place_spec = overall_place_spec return get_def(data) .. ( args.nocat and "" or format_cats(args[1], export.get_cats(args, overall_place_spec), args.sort)) end --[==[ Actual entry point of {{tl|place}}. ]==] function export.show(frame) return export.format { template_args = frame:getParent().args, } end return export 14006m3e32f16t6msgvk7fsloksykq2 231543 231542 2026-04-16T11:55:43Z Lee 19 [[:en:Module:place]] වෙතින් එක් සංශෝධනයක් 231542 Scribunto text/plain local export = {} local force_cat = false -- set to true for testing local m_placetypes = require("Module:place/placetypes") local m_links = require("Module:links") local memoize = require("Module:memoize") local m_strutils = require("Module:string utilities") local m_table = require("Module:table") local debug_track_module = "Module:debug/track" local en_utilities_module = "Module:en-utilities" local form_of_module = "Module:form of" local languages_module = "Module:languages" local parse_interface_module = "Module:parse interface" local parse_utilities_module = "Module:parse utilities" local parameter_utilities_module = "Module:parameter utilities" local utilities_module = "Module:utilities" local enlang = require(languages_module).getByCode("en") local rmatch = m_strutils.match local rfind = m_strutils.find local ulen = m_strutils.len local split = m_strutils.split local dump = mw.dumpObject local insert = table.insert local concat = table.concat local pluralize = require(en_utilities_module).pluralize local extend = m_table.extend local unpack = unpack or table.unpack -- Lua 5.2 compatibility local internal_error = m_placetypes.internal_error local process_error = m_placetypes.process_error local placetype_data = m_placetypes.placetype_data --[==[ intro: ===Introduction=== This module implements {{tl|place}}, which is a template for standardizing the description and categorization of toponyms (terms that refer to locations such as cities, countries, rivers, etc.). The following modules support this template: * [[Module:place]]: The main module. * [[Module:place/placetypes]]: A module containing data on placetypes, as well as utilities for working with placetypes; category generation handlers for adding categories based on placetypes; and display handlers for displaying holonyms (i.e. containing locations) of a specific type. FIXME: Maybe split out the code from the data. * [[Module:place/locations]]: A module containing data on known locations, as well as utilities for working with such locations. FIXME: Maybe split out the code from the data. * [[Module:category tree/topic/Places]]: A category tree module for generating the descriptions of all categories generated by {{tl|place}}. * [[Module:place doc]]: A module that generates documentation tables describing known placetypes and locations. ===Basic terminology=== The basic terminology used in this and associated {{tl|place}} modules is: * A ''location'' (or equivalently, a ''place'') is any geographic feature (either natural or geopolitical), either on the surface of the Earth or elsewhere. Examples of types of natural places are rivers, mountains, seas and moons; examples of types of geopolitical places are cities, countries, neighborhoods and roads. A ''known location'' is specifically a location whose properties are specified in the {{tl|place}} modules; more on them below. * Specific places are identified by names, referred to as ''toponyms'' or ''placenames''. A given place will often have multiple names, and a given toponym may be ambiguous, referring to multiple possible locations. Specifically: ** There may be names including different amounts of disambiguating information (`Tucson` vs. `Tucson, Arizona` vs. `Tucson, Arizona, USA` or `New York` vs. `New York City` vs. `New York, New York`); abbreviations (`NYC` for `New York City`, `USA` for `United States of America`); ''official'' vs. ''short'' names (e.g. `Union of Soviet Socialist Republics` vs. `Soviet Union`); spelling variations (`Cracow` vs. `Krakow` vs. `Kraków`); current vs. former names (`Saint Petersburg` vs. `Leningrad` vs. `Petrograd`); [[exonym]]s vs. [[endonym]]s (e.g. `Tavastia Proper` vs. `Kanta-Häme`, both referring to the same administrative region in Finland); alternative names not due to any of the above reasons (`Bashkiria` vs. `Bashkortostan`); etc. In addition, each language that has an opportunity to refer to the place will have its own name, with the same sorts of variations as exist in English. ** Examples of ambiguous toponyms are `New York` (either a city or a state); `Georgia` (either a state of the US or an independent country in the Caucasus Mountains); `Paris` (either the capital of France or various small cities and towns in the US); `Mexico` (either a country, a state of that country, or the capital city of that country); and `San Antonio` (besides being a major city in Texas, it is the name of dozens of settlements of all sorts throughout the US and Latin America, and a least 181 distinct [[barangay]]s in the Philippines). * A ''placetype'' is the (or a) type that a location belongs to (e.g. `city`, `state`, `river`, `administrative region`, `[[regional county municipality]]`, etc.). ** It is common for locations to be described using multiple placetypes, and even sometimes known locations have multiple placetypes that they may be identified by (e.g. American Samoa can be identified either as an `unincorporated territory`, an `overseas territory` or just a `territory`). Both the {{tl|place}} template and the known location data allow a given location to be identified by multiple placetypes. When in doubt as to the correct placetype or placetypes for a given location, generally follow how Wikipedia describes the place. ** Some placetypes themselves are ambiguous; e.g. an ''area'' can variously refer to a top-level administrative division (specifically of Kuwait); a geographic region, generally without unambiguously defined borders; or a section of a city, similar to a neighborhood. The term ''district'' is similarly ambiguous. A ''[[prefecture]]'' in the context of Japan is similar to a province, but a prefecture in France is the capital of a ''[[department]]'' (which is similar to a county). Some of this ambiguity is currently handled automatically; e.g. the ambiguity of areas and districts is handled by looking at the ''holonyms'', or containing locations, specified for a given place. But sometimes it is necessary to use a qualifier before the placetype to disambiguate; for example to refer to a French prefecture, use the placetype `French prefecture` instead of just `prefecture`. (FIXME: Handle this automatically.) * A ''holonym'', in the context of a description of a place, is a placename that refers to a larger-sized entity that contains the location being described. For example, `Arizona` and `United States` are holonyms of `Tucson`, and `United States` is a holonym of `Arizona`. * A ''place invocation'' consists of the invocation of {{tl|place}}, including all its parameters. Place invocations may contain one or more ''place descriptions'', each of which provides a description of the location, including its placetype or types, any holonyms, and any additional raw text needed to properly explain the place in context. Place invocations may also contain named parameters specifying zero or more English ''glosses'' or translations (for foreign-language toponyms) and any attached ''extra information'' such as the capital, largest city, official name, modern name or full name. Multiple place descriptions in a single invocation are separated by a numbered parameter starting with a semicolon, and are used when it is necessary to provide two or more definitions of a single location for proper categorization. For example, [[Vatican City]] is defined both as a city-state in Southern Europe and as an enclave within the city of Rome, follows: : {{tl|place|en|city-state|r/Southern Europe|;,|an <<enclave>> within the city of [[Rome]], [[Italy]]|cat=Places in Rome|official=Vatican City State}}. Similar things need to be done for places like [[Crimea]] that are claimed by two different countries with different definitions and administrative structures. ** There are two types of place descriptions, ''new-style'' and ''old-style''. (The use of the terms "new" and "old" indicates chronological precedence in the development of {{tl|place}}, but is not meant to pass any value judgments on the two types, and does not indicate any intent to deprecate old-style descriptions. Both types of descriptions are useful; for example, old-style descriptions are generally more succinct but less flexible.) The above invocation shows both types: an old-style description followed by a new-style description. Old style descriptions use multiple numbered parameters, where the first parameter (after the language code) specifies the placetype or types, and following parameters specify either holonyms (which are always of the form ` ``placetype``/``placename`` `) or raw text (which is identifiable by not having a slash in it). New-style descriptions use a single parameter, where both placetypes and holonyms are surrounded by double angle brackets, and all remaining text is raw (displayed as-is). In both types of descriptions, holonyms include a slash in them to separate the placetype (which is mandatory and often abbreviated) from the placename. ** In the context of a place description, there are two types of placetypes. The ''entry placetypes'' are the placetypes of the place being described, while the ''holonym placetypes'' are the placetypes of the holonyms that the place being described is located within. Currently, a given place can have multiple placetypes specified (e.g. [[Normandy]] is specified using the ''compound placetype'' `administrative region/former province/and/medieval kingdom`) while a given holonym can have only one placetype associated with it. Holonym placetypes are frequently abbreviated (e.g. `r` for `region`, `s` for `state`, `co` for `county`, etc.), while stylistically it is preferred to spell out the entry placetype (except for some long placetypes with well-known abbreviations, such as `CDP` or `cdp` for `[[census-designated place]]`). ** All holonyms in place descriptions are automatically linked as if surrounded by {{tl|l|en|...}}; i.e. if double brackets do not occur in the holonym, the entire holonym will be linked to the corresponding Wiktionary article. For this reason, the holonym should generally be in the same format as the canonical Wiktionary article describing the location; see below). * A ''known location'' is a location whose properties are specifically defined in the {{tl|place}} modules. Generally each such location has an associated category, and known locations exist in a containment hierarchy, where the immediately containing known location is known as the ''container'' of the location and the chain of successive containing locations is known as the ''container trail''. Generally the location's container corresponds to the first parent of its category. Note that some known locations belong to more than one immediate container; for example, Russia belongs to both Europe and Asia. ===More about placetypes=== # The following general categories of placetypes exist: ## ''Natural features'' such as lakes, mountains, mountain ranges, islands, archipelagoes, moons, stars, asteroids, etc. ## ''Continents'', ''supercontinents'' (groupings of continents where it makes sense, such as `America` and `Eurasia`) and ''continent-level regions'' (grouping of countries in a given continent, such as `Central America` and `Polynesia`). ## ''Political entities'', which are generally classified as either ''polities'' (top-level entities such as countries), ''subpolities'' or ''political divisions'' (non-sovereign divisions, often specifically ''administrative divisions'', of a polity, where an administrative division has a governmental or statistical function and almost always has unambiguously defined boundaries), or ''settlements'' (e.g. cities; towns; villages; and divisions of a city such as neighborhoods, wards, [[barrio]]s and [[barangay]]s, which may or may not be formal administrative divisions and may or may not have unambiguous boundaries). ## ''Geographic regions'', which refer to recognized areas of the Earth (either with a natural geographic, political or cultural significance, often of a historical nature). Such regions can be of greatly varying size, may exist either within a single country or spanning multiple countries or (more often) parts of multiple countries, and may not have well-defined boundaries. They should be distinguished from ''administrative regions'', which exist within a single country and have well-defined boundaries and a political or administrative function. Geographic regions are categorized using the generic term ''geographic and cultural areas'' to emphasize that (a) they have no administrative significance; (b) they may vary greatly in size; and (c) their cohesion is due either to natural geographic boundaries, such as rivers or mountain ranges, or to sharing some cultural characteristics. ## ''Man-made structures'' below the level of a settlement or neighborhood, such as airports, roads, individual buildings, and the like. (Note that such structures, even if named, often do not meet the [[WT:CFI]] criteria; this is particularly the case for roads.) # Placetypes support aliases, and the mapping to canonical form happens early on in the processing. For example, `state` can be abbreviated as `s`; `administrative region` as `adr`; `regional county municipality` as `rcomun`; etc. Some placetype aliases handle alternative spellings rather than abbreviations. For example, `departmental capital` maps to `department capital`, and `home-rule city` maps to `home rule city`. Placetype abbreviations are particularly useful in holonym specs, because every holonym must be accompanied by its placetype, for disambiguation purposes. # A ''placetype qualifier'' is an adjective prepended to the placetype to give additional information about the place being described. For example, a given place may be described as a `small city`; logically this is still a city, but the qualifier `small` gives additional information about the place. Multiple qualifiers can be stacked, e.g. `small affluent beachfront unincorporated community`, where `unincorporated community` is a recognized placetype and `small`, `affluent` and `beachfront` are qualifiers. (As shown here, it may not always be obvious where the qualifiers end and the placetype begins.) For the most part, placetype qualifiers do not affect categorization; a `small city` is still a city and an `affluent beachfront unincorporated community` is still an unincorporated community, and both should still be categorized as such. But some qualifiers do change the categorization. In particular, a `former province` is no longer a province and should not be categorized in e.g. [[:Category:Provinces of Italy]], but instead in a different set of categories, e.g. [[:Category:Historical political subdivisions]]. There are several terms treated as equivalent for this purpose: `abandoned` `ancient`, `extinct`, `historic(al)`, `medi(a)eval` and `traditional`. Another set of qualifiers that change categorization are `fictional` and `mythological`, which cause any term using the qualifier to be categorized respectively into [[:Category:Fictional locations]] and [[:Category:Mythological locations]]. ===More about toponyms=== # Toponyms may be: ## ''simple'' (not including any containing location in its name, such as `Tucson`) or ''multipart'' (including one or more containing locations, such as `Tucson, Arizona` or `Tucson, USA` or even `Tucson, Arizona, USA`); ## ''bare'' (not including the word `the` if the location normally requires this article when following a preposition, such as `United States`, `Gambia` or 'Community of Madrid') or ''prefixed'' (including the word `the` as needed, such as `the United States`, `the Gambia` or `the Community of Madrid`); ## ''elliptical'' (just the placename without any disambiguating placetype, such as `Durham`, `New York` or `Mexico`) or ''full'' (containing a disambiguating placetype or similar identifier if one is commonly included, such as the city of `Durham` (in England) vs. its containing county `County Durham`; the US city `New York City` vs. its containing state `New York`; or the three-way distinction between `Mexico` (the country), `Mexico City` (the capital of this country) and `(the) State of Mexico` (one of the states of the country Mexico, mostly surrounding but not including Mexico City)). # The ''canonical Wiktionary article'' is the main article on Wiktionary where a location is described. Canonical articles, per the above terminology, are generally ''simple'' and ''bare'', but may be either ''full'' or ''elliptical''. The fact that a given article is canonical is often identifiable by the fact that translations are housed there an not somewhere else. For example, most counties of the US and Canada include the word `County` in their canonical article name, but most counties elsewhere do not. `Washington, D.C.` is one of the few cases where a non-simple toponym is used as the canonical article; this is based on common usage, especially by residents of the city in question (who commonly refer to it as "D.C." but rarely just as "Washington"). ===More about known locations=== # The following types of known locations are defined in this module: ## Continents, supercontinents and continent-level regions, into which countries are grouped. Specifically: ### At the top level below `Earth` are the supercontinents `America` and `Eurasia` and the continents `Africa`, `Oceania` and `Antartica`. ### `America` is further broken down into the continents `North America` (in turn containing the continental regions `Central America` and `Caribbean`, with the United States, Canada and Mexico directly under North America) and `South America`. ### `Eurasia` is further broken down into the continents `Europe` and `Asia`. ### `Oceania` is further broken down into the continental regions `Melanesia`, `Micronesia` and `Polynesia`, with Australia` directly under `Oceania. ### Under the above-specified divisions are countries. Some countries are placed in more than one continent or continent-level region, either because they actually span two continents (e.g. Russia, Turkey, Kazakhstan, Egypt) or because they are politically considered to belong to a continent different from the one they are geographically in (Cyprus, Georgia, Armenia, etc.). ## Political entities, including: ### Top-level political entities, which includes: #### Countries, with a fairly liberal definition, notably including all UN-recognized countries plus some others that are commonly considered countries, even if not all other countries recognize them as such or consider them completely independent (notably, Kosovo, Palestine, Taiwan, Western Sahara, Niue and the Cook Islands). #### Pseudo-countries, which include areas calling themselves countries that are de-facto not under the control of the country that they are internationally considered part of (e.g. Abkhazia, South Ossetia, Transnistria); dependent/external/etc. territories of countries (e.g. American Samoa [US], Bermuda [UK], Christmas Island [Australia], Easter Island [Chile]); constituent countries, autonomous territories and the like (Aruba, Curaçao and Sint Maarten of the Netherlands; Greenland and the Faroe Islands of Denmark; etc.; but notably not including England, Scotland, Northern Ireland and Wales, which are treated as regular countries); and a grab bag of other entities that have a semi-independent existence, such as Hong Kong, Macau, Guadeloupe, Martinique and the like. Currently, the actual distinction in treatment between "countries" and "country-like entities" is minimal, but in the future we might restrict the sorts of subcategories of country-like entities more than regular countries. #### Former countries, e.g. the Soviet Union, Yugoslavia, West Germany and the Roman Empire. These are much more limited in the sorts of subcategories allowed, because generally locations, especially cities, should be described from the perspective of which political entity they are currently located in (e.g. "an ancient Roman town in modern Syria") and categorized as such. ### Subpolities. Generally we only list top-level administrative divisions of countries (and only fairly major countries are usually included), but sometimes we list second-level administrative divisions, as in the case of the United Kingdom (where the top-level administrative divisions of the four constituent countries are listed) and China (where major prefecture-level cities are listed, and are considered administrative divisions rather than cities). ### Cities. Only major cities get categories, with the definition of "major" varying by country but often including those where the city population itself (sometimes the metro area) is >= 1,000,000 people. # A distinction should be made in the {{tl|place}} modules between ''keys'' and ''placenames''. Placenames are as the location appears in a holonym, and are generally in the same format as the canonical Wiktionary article describing the location so that when formatted as a link, the link goes to the right article; i.e. they are simple and bare, and may be full or elliptical according to Wiktionary conventions. The ''canonical key'' of a location is how the location's category is named, and always uniquely identifies the location from among the known locations in this module (but not necessarily among all possible locations). In particular, subpolities usually have multipart keys that include the containing location, such as `Anhui, China` (not just `Anhui`); `Arizona, USA` (not just `Arizona`, and also not `Arizona, United States`); and `Herefordshire, England` (not just `Herefordshire`, and also in this case not `Herefordshire, UK` or `Herefordshire, England, UK` or any other possible variation). Cities are normally simple, but some cities are multipart for disambiguation purposes (e.g. `Newcastle, New South Wales` for the city in Australia vs. `Newcastle upon Tyne` for the identically-named city in England). Canonical keys may have ''key aliases'', other ways of referring to the location that are not necessarily unique (e.g. `Newcastle` is a key alias for both of the above-mentioned cities), and city keys with diacritics generally have diacriticless aliases, such as canonical key `Düsseldorf` vs. key alias `Dusseldorf`, or canonical key `Łódź` vs. key alias `Lodz`. # Known locations are gathered into ''groups'' with similar properties, such as all the states of the United States; all the (ceremonial) counties of England (see below); and all the "sufficiently major" prefecture-level cities in China (where a prefecture-level city is a prefecture surrounding a major city with a unified government and is more like a prefecture, i.e. a major administrative division just underneath a province, than like a city, and where "sufficiently major" is defined according to the population of either the total prefecture or the urban area of the city). Note that there are multiple types of counties in England, with overlapping but non-identical names and boundaries; there are, in particular, ''ceremonial counties'', ''local government counties'' and ''historic counties''; ''ceremonial counties'' have only ceremonial administrative functionality but unlike local government counties (a) don't frequently change their boundaries or nature, (b) correspond more closely to historic county boundaries and names, and (c) are what Englanders usually identify themselves with, and so they are used as top-level divisions rather than local government counties. # Some known locations have ''aliases'' defined, which are of two types. ''Display aliases'' map holonyms to their canonical form near the beginning of processing (in particular before the displayed output is formatted). For example, `US`, `U.S.`, `USA`, `U.S.A.` and `United States of America` are all canonicalized to `United States` (if identified as a country), and display as `United States`. Similarly, the foreign forms `Occitanie` (as a region or administrative region) and `Noord-Brabant` (as a province) are mapped to `Occitania` and `North Brabant` for display purposes. There are also ''category aliases'', so that if e.g. `Republic of Macedonia` is encountered, it will display as such but categorize as `North Macedonia`. (This is because, among other reasons, `Republic of Macedonia` is normally preceded by `"the"` while `North Macedonia` is not, so a call {{tl|place|en|a <<city>> in the <<c/Republic of Macedonia>>}} would look wrong if `Republic of Macedonia` were converted to `North Macedonia` during display, as the result would be `a city in the North Macedonia`. There are also frequently political connotations to different category aliases, e.g. `Burma` vs. `Myanmar`.) All of these aliases are sensitive to the placetype specified. For example, `Mexico` as a state is categorized under `State of Mexico, Mexico` but `Mexico` the country is categorized as just `Mexico`. ===Categories=== There are two main types of categories: # Categories for known locations, divided into: ## Top-level polity categories (e.g. [[:Category:United States]], [[:Category:Taiwan]], [[:Category:South Ossetia]], [[:Category:Bermuda]], [[:Category:Soviet Union]], [[:Category:West Germany]]). ## Subpolity categories ([[:Category:Arizona, USA]], [[:Category:Hunan]], [[:Category:Kagoshima Prefecture]], [[:Category:Cluj County, Romania]]). For historical reasons, different formats are used for the subpolities of different polities. Increasingly, we are moving towards always including the polity name in the subpolity category, but whether the subpolity type is included and where it is included (cf. [[:Category:Cluj County, Romania]] vs. [[:Category:County Cork, Ireland]] is still inconsistent and will probably remain that way, based on how the subpolity is normally referred to. ## City categories ([[:Category:Tokyo]], [[:Category:New York City]], [[:Category:Jaipur]]). Normally these do not include the containing subpolity, but may do so in order to disambiguate. # Categories for placetypes, divided into: ## "Immediate" political and non-political division categories ([[:Category:States of the United States]], [[:Category:Municipalities of Tocantins, Brazil]], [[:Category:Ghost towns in Arizona, USA]]). These are name categories, whose purpose is to contain locations of the specified type. "Immediate" here refers to the fact that the location in the category name is the immediately-containing polity. Usually these categories use the preposition "of", but sometimes "in". (Specifically, "of" typically implies that the placetype in question has an official or semi-official status, whereas "in" implies there is no such official status, but common usage may override this.) The form of the toponym appearing in these categories is always the same as that of the corresponding toponym category except that the word "the" may appear (e.g. [[:Category:States of the United States]]), whereas it doesn't appear in the toponym category itself ([[:Category:United States]], no "the"). ## "Skip-polity" categories for second-level political and non-political divisions of a country or other top-level polity (e.g. [[:Category:Counties of the United States]], [[:Category:Municipalities of Brazil]] and [[:Category:Subprefectures of Japan]]). These have several purposes: * They group the immediate division categories mentioned previously. * They categorize "straggler" topoynms that (often improperly) fail to mention the subpolity they belong to, but only the top-level polity. * If categories do not exist for the first-level divisions of a country (and sometimes even when they do), they group all toponyms of the specified type for the specified country. For example, Lithuania is divided into first-level counties and second-level municipalities, but since we don't currently have categories for Lithuanian counties, all municipalities go under [[:Category:Municipalities of Lithuania]] rather than under a category for a specific county. In addition, even though we do have categories for Japanese prefectures (a first-level division), all subprefectures (a second-level division) go under [[:Category:Subprefectures of Japan]] because there aren't very many of them (see below). ## "Generic placetype" categories, both of the immediate and skip-polity type (immediate [[:Category:Cities in California, USA]] and [[:Category:Neighborhoods of the Bronx]]; skip-polity [[:Category:Villages in Ivory Coast]], [[:Category:Geographic and cultural areas of England]], [[:Category:Rivers in Egypt]] and [[:Category:Places in the Philippines]]). As mentioned above, "generic" placetypes occur in every polity (although the set of generic placetypes allowed for cities is a subset of those allowed for top-level polities and subpolities). Usually these categories use the preposition "in", but sometimes "of". As above, skip-polity categories group immediate categories, and in addition there are various reasons a toponym entry is categorized into a skip-polity category. (For example, as a general rule, geographic and cultural areas only categorize at the country level, not the subpolity level, both because there often aren't very many in a given country and because they often span multiple subpolities.) The parent categories of a given category depend on its type. Generally, location categories have placetype categories as their first parent, and vice-versa. Specifically: # Top-level country categories have as their parent e.g. [[:Category:Countries in Europe]], [[:Category:Countries in Central America]] or [[:Category:Countries in Polynesia]], using the most specific continental-level region the country is contained in. # Pseudo-countries are under [[:Category:Country-like entities]] as a neutral designation. There aren't enough of them to subcategorize under continent-level regions. # Former countries are under [[:Category:Former countries and country-like entities]]. # Subpolity categories are usually under a placetype category whose placetype is the canonical (first-listed) placetype of the subpolity and whose toponym is the immediately containing polity, but there are exceptions. Specifically, sometimes if a polity has multiple types of subpolities, they are combined (e.g. [[:Category:States and territories of Australia]], [[:Category:Federal subjects of Russia]]). In addition, sometimes a less specific but more identifiable placetype is used instead of the canonical one (e.g. [[:Category:Regions of France]] when the canonical placetype is "administrative region"). The same rules and exceptions generally apply when categorizing subpolities themselves; e.g. both the Australian state of Queensland and territory of Northern Territory go under [[:Category:en:States and territories of Australia]] rather than separately under [[:Category:en:States of Australia]] and [[:Category:en:Territories of Australia]]. In addition, sometimes subpolities may "skip a level" if there aren't very many. For example, there are only 26 subprefectures of Japan (14 under Hokkaido and 12 more scattered under five other prefectures). Rather than have e.g. [[:Category:en:Subprefectures of Kagoshima Prefecture]] containing at most two entries and [[:Category:en:Subprefectures of Miyazaki Prefecture]] containing at most one, they are all grouped under the so-called "skip-subpolity category" [[:Category:en:Subprefectures of Japan]]. # City categories are always under e.g. [[:Category:Cities in the United States]] (e.g. [[:Category:New York City]] is so-placed, even though [[:Category:Cities in New York, USA]] exists). However, they may have a second, more-specific parent (e.g. [[:Category:Cities in New York, USA]] in the case of New York City). The city entries themselves will go under the more specific parent if it exists. # Immediate placetype categories for second-level divisions of a country generally have, respectively, a "toponym parent" that is the toponym mentioned in the category and a "skip-polity parent" that groups all subpolity placetype categories of a specific type and containing polity. For example, [[:Category:Counties of Arizona, USA]] has toponym parent [[:Category:en:Arizona, USA]] and skip-polity parent [[:Category:en:Counties of the United States]]. Sometimes the default skip-polity parent is overridden or disabled entirely. For example, in the US, most states are divided into counties but Louisiana is divided into parishes and Alaska into boroughs. It would make no sense to put [[:Category:Parishes of Louisiana, USA]] under [[:Category:Parishes of the United States]] (which would only have one subcategory), so we include them under [[:Category:Counties of the United States]]. An alternative would be to name the skip-polity category to explicitly include parishes and boroughs; this would get awkward here but is done in some cases. Similarly, [[:Category:Regional county municipalities of Quebec]] is placed under [[:Category:Regional municipalities of Canada]] since that name is used in other provinces. Meanwhile, [[:Category:Regional districts of British Columbia]] disables its skip-polity category since no other province or territory of Canada has regional districts or comparable subpolities under a different name (an alternative would be to place them under [[:Category:Counties of Canada]], since they are sort of comparable to counties). # Placetype categories for first-level divisions of a country similarly (e.g. [[:Category:States of the United States]]) have a toponym parent (in this case [[:Category:United States]]), but in place of the skip-polity parent they have two other parents: a "bare placetype" parent (in this case [[:Category:States]]) and the "generic" parent [[:Category:Political divisions of specific countries]]. (There is also a bare [[:Category:Political divisions]] that groups "bare placetype" categories.) Skip-polity placetype categories for second-level divisions of a country (e.g. [[:Category:Counties of the United States]]) work the same. Placetype categories for countries work likewise except they are missing the generic parent. ===Place descriptions=== A given place description is defined internally in a table of the following form: ```{ placetypes = {"``placetype``", "``placetype``", ...}, holonyms = { { -- holonym object; see below placetype = "``placetype``" or nil, display_placename = "``placename``", unlinked_placename = "``placename``", langcode = "``langcode``" or nil, no_display = BOOLEAN, needs_article = BOOLEAN, force_the = BOOLEAN, affix_type = "``affix_type``" or nil, pluralize_affix = BOOLEAN, suppress_affix = BOOLEAN, continue_cat_loop = BOOLEAN, }, ... }, order = { ``order_item``, ``order_item``, ... }, -- (only for new-style place descriptions), joiner = "``joiner_string``" or nil, holonyms_by_placetype = { ``holonym_placetype`` = {"``placename``", "``placename``", ...}, ``holonym_placetype`` = {"``placename``", "``placename``", ...}, ... }, }``` Holonym objects have the following fields: * `placetype`: The canonicalized placetype if specified as e.g. `c/Australia`; nil if no slash is present (in which case the placename in `display_placename` refers to raw text). * `display_placename`: The placename or raw text, in the format to be displayed. Placename display aliases have already been resolved. It is raw text if `placetype` is nil. * `unlinked_placename`: Same as `display_placename` but with links and HTML removed. * `langcode`: The language code prefix if specified as e.g. `c/fr:Australie`; otherwise nil. * `no_display`: If true (holonym prefixed with !), don't display the holonym but use it for categorization. * `needs_article`: If true, prepend an article if the placename needs one (e.g. `United States`). * `force_the`: If true, always prepend the article `the`. Example use: holoynm 'city:pref:the/Gold Coast', which gets formatted as `(the) city of the [[Gold Coast]]`. * `affix_type`: Type of affix to prepend (values `pref` or `Pref`) or append (values `suf` or `Suf`). The actual affix added is the placetype (capitalized if values `Pref` or `Suf` are given), or its plural if `pluralize_affix` is given. Note that some placetypes (e.g. `district` and `department`) have inherent affixes displayed after (or sometimes before) them. * `pluralize_affix`: Pluralize any displayed affix. Used for holonyms like `c:pref/Canada,US`, which displays as `the countries of Canada and the United States`. * `suppress_affix`: Don't display any affix even if the placetype has an inherent affix. Used for the non-last placenames when there are multiple and a suffix is present, and for the non-first placenames when there are multiple and a prefix is present. * `continue_cat_loop`: If true (holonym used :also), continue producing categories starting with this holonym when preceding holonyms generated categories. Note that new-style place descs (those specified as a single argument using <<...>> to denote placetypes, placetype qualifiers and holonyms) have an additional `order` field to properly capture the raw text surrounding the items denoted in double angle brackets. The ``order_item`` items in the `order` field are objects of the following form: ```{ type = "``order_type``", value = "STRING" or INDEX, }``` Here, the ``order_type`` is one of `"raw"`, `"qualifier"`, `"placetype"` or `"holonym"`: * `"raw"` is used for raw text surrounding `<<...>>` specs. * `"qualifier"` is used for `<<...>>` specs without slashes in them that consist only of qualifiers (e.g. the spec `<<former>>` in `<<former>> French <<colony>>`). * `"placetype"` is used for `<<...>>` `specs without slashes that do not consist only of qualifiers. * `"holonym"` is used for holonyms, i.e. `<<...>>` specs with a slash in them. For all types but `"holonym"`, the value is a string, specifying the text in question. For `"holonym"`, the value is a numeric index into the `holonyms` field. It should be noted that placetypes and placenames occurring inside the holonyms structure are canonicalized, but placetypes inside the placetypes structure are as specified by the user. Stripping off of qualifiers and canonicalization of qualifiers and bare placetypes happens later. The information under `holonyms_by_placetype` is redundant to the information in holonyms but makes categorization easier. The holonym placenames listed here already have category aliases applied. For example, the call {{tl|place|en|city|s/Pennsylvania|c/US}} will result in the return value ```{ placetypes = {"city"}, holonyms = { { placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania" }, { placetype = "country", display_placename = "United States", unlinked_placename = "United States" }, }, holonyms_by_placetype = { state = {"Pennsylvania"}, country = {"United States"}, }, }``` Here, the placetype aliases `s` and `c` have been expanded into `state` and `country` respectively, and the placename display alias `US` has been expanded into `United States`. PLACETYPES is a list because there may be more than one. For example, the call {{tl|place|en|city/and/municipality|p/[[Kwango]] Province|c/Congo}} will result in the return value ``` { placetypes = {"city", "and", "municipality"}, holonyms = { { placetype = "province", display_placename = "[[Kwango]] Province", unlinked_placename = "Kwango Province" }, { placetype = "country", display_placename = "Congo", unlinked_placename = "Congo" }, }, holonyms_by_placetype = { country = {"Congo"}, }, }``` Here, the `unlinked_placename` field has removed links from `display_placename`. The value in the key/value pairs is likewise a list; e.g. the call {{tl|place|en|city|s/Kansas|and|s/Missouri}} will return ``` { placetypes = {"city"}, holonyms = { { placetype = "state", display_placename = "Kansas", unlinked_placename = "Kansas" }, { display_placename = "and", unlinked_placename = "and" }, { placetype = "state", display_placename = "Missouri", unlinked_placename = "Missouri" }, }, holonyms_by_placetype = { state = {"Kansas", "Missouri"}, }, } ``` Note that in `get_cats()` (which runs after the display form has been generated), further changes to the holonym structure are made to aid in categorization. For example, after `handle_category_implications()` and `augment_holonyms_with_container()` are called, the above structure will look more like ``` { placetypes = {"city"}, holonyms = { { placetype = "state", display_placename = "Kansas", unlinked_placename = "Kansas" }, { placetype = "country", unlinked_placename = "United States" }, { display_placename = "and", unlinked_placename = "and" }, { placetype = "state", display_placename = "Missouri", unlinked_placename = "Missouri" }, { placetype = "country", unlinked_placename = "United States" }, }, holonyms_by_placetype = { state = {"Kansas", "Missouri"}, country = {"United States"} }, } ``` ===Overall place specs=== The overall place spec parsed by `parse_overall_place_spec` has the following fields: * `lang`: The language object (from {{para|1}}). * `args`: The parsed arguments from the {{tl|place}} call. * `directives`: List of form-of directives (starting with `@`) parsed from the numeric args beginning with {{para|2}}. Each directive contains fields `directive` (the directive as specified by the user, e.g. `"former name of"`); `terms` (list of term objects for the terms specified by the user); `conj` (conjunction specified by the user using inline modifier `<conj:...>`, or {nil}); `spec` (the corresponding directive spec from `all_form_of_directives`); `pretext` (the text to display directly before the directive); `posttext` (the text to display directly after the directive; {nil} except for the last directive). * `descs`: List of one or more place description objects parsed from the numeric args beginning with {{para|2}}, as described above. * `extra_info`: List of extra-info objects for extra info specified using arguments such as {{para|capital}}, {{para|modern}}, etc. Objects are in the order they should be displayed, and each object contains fields `spec` (the spec for the type of extra info, taken from `export.extra_info_args`), `terms` (list of term objects for the terms specified by the user); and `conj` (conjunction specified by the user using inline modifier `<conj:...>`, or {nil}). ===Category determination=== The algorithm to find the categories to which a given place belongs works off of a place description (which specifies the entry placetype(s) and holonym(s); see above). If there are multiple place descriptions, each is processed independently to generate categories. Likewise, if there are multiple entry placetypes in a given place description, each is processed independently with all the holonyms of the description to generate categories. Furthermore, before the category-generation algorithm runs, earlier steps have modified the holonyms of the place description (inserting containing polities whenever possible; see the description above of `handle_category_implications()` and `augment_holonyms_with_container()`). Given a single entry placetype and a place description, the algorithm to generate categories processes holonyms from left to right until it finds one that "matches" in that it produces one or more categories. At that point it attempts to generate categories for all other holonyms in the place description of the same placetype. Normally, it then stops processing holonyms, but if a holonym is marked using the `:also` modifier, the category generation process starts over starting with that holonym (or the leftmost such remaining holonym, if there is more than one marked with `:also`). This makes it possible, for example, to specify the description of a river that passes through two different types of political divisions (e.g. Alberta and the Northwest Territories), or categorize a geographic region at both the continent and country level, such as this: <pre> {{place|en|historical region|r/Eastern Europe|located in southeastern|c:also/Poland|*and western|c/Ukraine}} </pre> Here, `r/Eastern Europe` has a category implication that adds `cont/Europe` as a holonym directly after it, which causes the page to be categorized into [[:Category:en:Geographic and cultural areas of Europe]]. The category generation process would normally stop at this point, but the presence of `:also` causes it to restart with `c/Poland` and generate the category [[:Category:en:Geographic and cultural areas of Poland]]. After doing this, it looks for other holonyms of the same placetype as `c/Poland` (i.e. other countries), which causes it to process `c/Ukraine` and generate the category [[:Category:en:Geographic and cultural areas of Ukraine]]. The category generation process works off of the `placetype_data` table, which specifies various properties for placetypes, such as how to display a holonym of that placetype as well as how to categorize certain pages where the {{tl|place}} call contains the specified placetype as an entry placetype. For example, the entry for `city-state` in [[Module:place/placetypes]] might look like ``` ["city-state"] = { link = true, category_link = "[[sovereign]] [[microstate]]s consisting of a single [[city]] and [[w:dependent territory|dependent territories]]", has_neighborhoods = true, class = "settlement", ["continent/*"] = {"City-states", "Cities", "Countries", "Countries in +++", "National capitals"}, default = {"City-states", "Cities", "Countries", "National capitals"}, }, ``` Here, the keys specify, respectively: # If `city-state` occurs as an entry placetype, link it to the corresponding Wiktionary entry (that is what `true` means in `link = true`). # Use the specified `category_link` text for categories such as [[:Category:City-states]]. # City-states are "city-like", i.e. they have neighborhoods; this controls the handling of entry placetypes such as `neighborhood`, `district`, `area`, etc. # City-states should be treated as settlements for determining how to handle the placetype `former city-state` and for categorizing the bare category [[:Category:City-states]] and language-specific equivalents such as [[:Category:en:City-states]]. # When the entry placetype `city-state` occurs along with a continent holonym, categorize into the specified categories under `continent/*`. Here, `+++` stands for the holonym in question. # When the entry placetype `city-state` occurs in any other context, categorize into the specified categories under `default`. It's important to realize that the only categorization keys under a given placetype entry that are specified explicitly in [[Module:place/placetypes]] are certain wildcard keys such as `continent/*` above (i.e. containing a slash followed by `*`) and under the key `default`. All the remaining categorization happens through category handlers, based on the information on known locations in [[Module:place/locations]]. For example, [[Module:place/locations]] has an "England group" specified similarly to the following: ``` export.england_group = { default_container = {key = "England", placetype = "constituent country"}, default_placetype = "county", default_divs = { "districts", {type = "local government districts", cat_as = "districts"}, { type = "local government districts with borough status", cat_as = {"districts", "boroughs"}, }, {type = "boroughs", cat_as = {"districts", "boroughs"}}, "civil parishes", }, default_british_spelling = true, data = export.england_counties, } ``` The `default_divs` key here specifies the divisions that exist for each of the counties listed under the `data` key (unless the key overrides them). Here, the entry `{type = "boroughs", cat_as = {"districts", "boroughs"}}` directs the category handler `political_division_cat_handler` in [[Module:place/placetypes]] (which is one of two category handlers that run for all entry placetypes, along with `generic_place_cat_handler`) to categorize boroughs specified under any of the counties listed under `data` as both districts and boroughs. Now, the categorization process proceeds as follows, given an entry placetype and place description, which specifies a set of holonyms (the code to do this is in `get_placetype_cats()`): # First, look up the entry placetype and any equivalent placetypes in `placetype_data`, which is defined in [[Module:place/placetypes]]. Note that the entry in `placetype_data` that specifies the placetype information that is used to determine the category or categories may not directly correspond to the entry placetype as specified in the place description. For example, if the entry placetype is `small town`, the placetype whose data is fetched will be `town` since `small` is a recognized qualifier and there is no entry in `placetype_data` for `small town`. As another example, if the entry placetype is `administrative capital`, the code will first look up `administrative capital` and then look up `capital city`, which is where the category handler is found, because `administrative capital` specifies `capital city` as its fallback. # Then, iterate over holonyms from left to right, as described above. For each holonym, we proceed as follows: ## First, call `political_division_cat_handler` to check if the entry placetype and holonym match a division in the `locations` data in [[Module:place/locations]], as in the example above. Note that when doing this, holonyms are canonicalized so that e.g. `co/Bedfordshire` gets mapped to `county/Bedfordshire` (because there is an entry in `placetype_aliases` in [[Module:place/placetypes]] that maps `co` to `county`) and `c/USA` gets mapped to `country/United States` (because there is an entry in the location data for the list of countries that maps `country/USA` to `country/United States` for both display and categorization purposes). This category handler, as with all such handlers, is passed the entry placetype and holonym being processed, but is also passed the entire place description, so it can look at other specified holonyms (particularly those that follow). It either returns {nil} or a list of category specs (which are the actual categories minus the preceding language code). ## If `political_division_cat_handler` doesn't generate any categories, check if there is a category handler defined using the `cat_handler` key for the entry placetype. If so, call it to generate the categories (if any). ## If the category handler returns {nil}, or there is no category handler, look for a ''wildcard key'' of the format e.g. `country/*`, which matches any holonym of placetype `country`. If found, the value is a list of category specs, which are processed as above. ## If we get this far without generating any categories, move to the next holonym. ## If we do generate any categories, process all other holonyms of the same placetype. For example, if the user says {{tl|place|en|city|s/Kansas|and|s/Missouri}}, when we get to the holonym `s/Kansas`, we generate the category [[:Category:en:Cities in Kansas, USA]]. This causes us to look for other holonyms of the same placetype `state`, and process them accordingly, generating a category [[:Category:en:Cities in Missouri, USA]] as well. The same thing happens in an invocation like {{tl|place|pl|river|c/Poland,Ukraine,Belarus}}. # Once we generate categories for a holonym and any other holonyms of the same placetype, we normally stop processing holonyms. But if a holonym has the `:also` modifier, we restart the left-to-right loop at that holonym. For example, in the invocation {{tl|place|en|river|flowing through|p/Alberta|p/British Columbia|and the|terr/Northwest Territories}}, we will generate a category [[:Category:en:Rivers in Alberta, Canada]] as well as [[:Category:en:Rivers in British Columbia, Canada]] (because British Columbia is of the same placetype as Alberta); but no category will be generated for the Northwest Territories, which is of a different placetype. To fix this, write {{tl|place|en|river|flowing through|p/Alberta|p/British Columbia|and the|terr:also/Northwest Territories}}. The use of `:also` will cause holonym processing to resume at `Northwest Territories` after `Alberta` is processed, leading to an additional category [[:Category:en:Rivers in the Northwest Territories, Canada]]. (The presence of `the` in this last category is because `Northwest Territories` is a known location with a spec indicating that it should be preceded by `the`; it has nothing to do with the raw text `and the` in the invocation.) # Finally, if we process all holonyms and don't end up producing any categories, we check the entry placetype's data for a `default` key. If found, it lists category specs, which are processed to generate categories. This is used, for example, in the placetype `city-state`, as described above. # It should be noted that the above process runs independently for each combination of entry placetype and place description. Thus, for example, an invocation {{tl|place|en|city/and/county|s/Kansas,Missouri|c/USA}} will generate categories for both cities and counties in both Kansas and Missouri. # Two additional sources of categories are ''bare location'' categories and ''generic place'' categories. These categories are added by appropriate calls in the outer function `get_cats`, which iterates over placetypes and place descriptions, calling `get_placetype_cats` on each combination. ## Bare location categories are categories like [[:Category:Arizona, USA]] that are related-to categories containing terms related to the specified location. The bare location code, for example, adds the term [[Arizona]], and its equivalents in other languages, to [[:Category:Arizona, USA]]. When looking for terms to consider, it checks the pagename, the glosses specified using {{para|t}}, and the terms specified using {{para|modern}}, {{para|short}} and {{para|full}}. It looks to see if any of these parameters match any known locations, but only adds them to a bare location category if (a) the specified entry placetype matches, so that for example Russian `[[Джорджия]]` goes into [[:Category:Georgia, USA]] while `[[Грузия]]` goes into [[:Category:Georgia]] (the country), even though both have a gloss `Georgia`; and (b) there are no conflicting holonyms, so that for example the Old English term [[Munucceaster]] if defined similarly to {{tl|place|ang|city|in modern|cc/England|t=Newcastle}} won't get added to [[:Category:Newcastle, New South Wales]] (even though it is also a city) because the latter city is known to be in Australia, which conflicts with the country `United Kingdom` (added internally to the Old English place description through the holonym augmentation process, based on the holonym `cc/England`). ## Generic place categories are categories like [[:Category:Places in Kansas, USA]] and [[:Category:Places in England]] that contain places of arbitrary placetype. These are added through a special category handler that operates like other category handlers but is run for all placetypes, rather than only for the specified one(s). ]==] --[=[ TODO/FIXME: 1. [DONE] Neighborhoods should categorize at the city level. Categories like [[:Category:Places in Los Angeles]] exist but not [[:Category:Neighborhoods in Los Angeles]]; we can refactor the code in generic_cat_handler() to support this use case. 2. Display handlers should be smarter. For example, 'co/Travis' as a holonym should display as 'Travis County' in the United States, but (I think) display handlers don't currently have the full context of holonyms passed in to allow this to happen. 3. Connected to this, we have various display handlers that add the name of the holonym after or (sometimes) before the placename if it's not already there. An example is the county_display_handler() in [[Module:place/placetypes]], which adds "County" before Ireland and Northern Ireland counties and after Taiwan and Romania counties. This should be integrated into the polity group for these respective polities through a setting rather than requiring a separate handler that has special casing for various polities. 4. Placetypes for toponyms should also have display handlers rather than just fixed text. This should allow us to dispense with the need for special types for "fpref" = "French prefecture" (which displays as "prefecture" but links to the appropriate Wikipedia article on Frenc prefectures, which are completely different from the more general concept of prefecture). Similarly for "Polish colony" and "Welsh community". ("Israeli settlement" should probably stay as-is because it displays as "Israeli settlement" not just "settlement".) 5. [DONE] Currently, categories for e.g. states and territories of Australia go into [[:Category:States and territories of Australia]] but terms for states and territories of Australia go into (respectively) [[:Category:States of Australia]] and [[:Category:Territories of Australia]]. We should fix this; maybe this is as easy as setting cat_as in the respective divs definitions. 6. Probably cat_as should support raw categories as well as category types; raw categories would be indicated by being prefixed with "Category:". 7. [MOSTLY DONE] Update documentation. 8. [DONE] Rename remaining political division categories to include name of country in them. 9. [DONE] Add Pakistan provinces and territories. 10. [DONE] Add a polity group for continents and continent-level regions instead of special-casing. This should make it possible e.g. to have Jerusalem as a city under "Asia". 11. [DONE] Add better handling of cities that are their own states, like Mexico City. 12. [DONE] Breadcrumb for e.g. [[Category:Aguascalientes, Mexico]] is "Aguascalientes, Mexico" instead of just "Aguascalientes". 13. [DONE] Unify aliasing system; cities have a completely different mechanism (alias_of) vs. polities/subpolities (which use`placename_cat_aliases` and `placename_display_aliases` in [[Module:place/placetypes]]). 14. [DONE] More generally, cities should be unified into the polity grouping system to the extent possible; this would allow for divs of cities (see #17 below). 15. [DONE] We have `no_containing_polity_cat` set for Lebanon, Malta and Saudi Arabia to prevent country-level implications from being added due to generically-named divisions like "North Governorate", "Central Region" and "Eastern Province" but (a) this setting seems to do multiple things and should be split, (b) it should be possible to set this at the division level instead of the country level. 16. Split out the data from the handlers so we can use loadData() on the data because it's becoming very big. 17. [DONE] Cities like Tokyo have special wards; "prefecture-level cities" like Wuhan (which aren't really cities but we treat them as such) have districts, subdistricts, etc. We need to support divs for cities and even named divisions of cities (such as we already have for boroughs of New York City). 18. [DONE] It should be allowed to set 'true' to any qualifier (which links it) and have it work correctly; qualifier lookup in [[Module:place]] needs to remove links first. 19. [DONE] Categories 'Historical polities' and 'Historical political subdivisions' should be renamed 'Former ...' since "historic(al)" is ambiguous (cf. "historic counties" in England which are not former, but still have a legal definition). 20. [PARTLY DONE; SUPPORT IS THERE BUT FORMER PROVINCES NOT YET CATEGORIZED] It should be possible to categorize former subpolities of certain polities; cf. [[:Category:ja:Provinces of Japan]], which contains former provinces. 21. [DONE] In subpolity_keydesc(), we need to generate the correct indefinite article and have a huge hack to check specifically for "union territory", which is the only placetype that shows up in this function where the default indefinite article generating function fails. To fix this properly, we need to separate out the non-category placetype data from `cat_data` in [[Module:place/placetypes]] and move it to [[Module:place/locations]], because we don't have access to the data in [[Module:place/placetypes]], and that data indicates the correct article for placetypes like "union territory". 22. [DONE] Simplify the specs in `cat_data`, eliminating the distinction between "inner" and "outer" matching. There should not be two levels, just one. For example, in "district", instead of ["country/Portugal"] = { ["itself"] = {"Districts and autonomous regions of +++"}, } we should just have ["country/Portugal"] = {"Districts and autonomous regions of +++"}, And in "dependent territory", instead of ["default"] = { ["itself"] = {true}, ["country"] = {true}, }, we should just have ["itself"] = {true}, ["country/*"] = {true}, It appears the only remaining spec that can't be easily converted in this fashion is for "subdistrict": ["country/Indonesia"] = { ["municipality"] = {true}, }, This seems to be specifically for Jakarta and doesn't seem to work anyway, as the two entries in [[:Category:en:Subdistricts of Jakarta]] and the one entry in [[:Category:id:Subdistricts of Jakarta]] are manually categorized. 23. [DONE] Consolidate the remaining stuff in [[Module:category tree/topic cat/data/Earth]] into [[Module:category tree/topic cat/data/Places]]. 24. [DONE] The `generic_cat_handler` that categorizes into `Places in FOO` is smart enough not to categorize cities that are in different polities from the specified containing polity/polities of the city, but doesn't do the same for larger-level divisions. Likewise for the `city_type_cat_handler`. There are some sufficiently generically-named divisions that this issue can occur; for example, [[Koforidua]], the capital city of Eastern Region, Ghana, is incorrectly categorized under [[:Category:en:Cities in Eastern Region, Malta]] and [[:Category:en:Places in Eastern Region, Malta]]. Note that the function `augment_holonyms_with_container` ''DOES'' do such checks, so we should be able to refactor the code out of that function and use it elsewhere. 25. [DONE] The `generic_cat_handler` that categorizes into `Places in FOO` is smart enough not to categorize cities that are in different polities from the specified containing polity/polities of the city; but how smart is it? It will successfully avoid categorizing a neighborhood in e.g. [[Columbus]], [[Georgia]] that doesn't explicitly mention the US (only `s/Georgia`) into [[:Category:en:Places in Columbus]], which is for Columbus, Ohio, but will it do the same for a hypothetical neighborhood of Columbus in say Merseyside, England? This should be investigated. It will probably work for a hypothetical Columbus in [[Canada]] because `augment_holonyms_with_container` would auto-add Canada as an additional holonym once say `p/Ontario` is mentioned, but I think there's a setting preventing this augmentation from happening for the UK. (This relates to FIXME #15. `no_containing_polity_cat` is set on England, Scotland, etc. to prevent the toponyms from being added to [[:Category:en:Places in the United Kingdom]], but this same setting is used to prevent augmentation, which it should not be; there should be different settings.) 26. [DONE] The `generic_cat_handler` (or more specifically `find_holonym_keys_for_categorization`) checks for city holonyms by looking specifically for holonym type `city`. But some cities (particularly those in China) can be specified using different holonym types, e.g. `prefecture-level city`, `subprovincial city`, etc. We should allow these when appropriate (which means the cities in China need to have a `placetype` set that indicates their regional-level status as well as just `city`). I'm not sure if cities support specifying a custom `placetype` at the moment; this relates to FIXME #14 above concerning unifying cities and political divisions internally. 27. [DONE] The bare category handler (`get_bare_categories` in [[Module:place/placetypes]]) is not smart enough to avoid overcategorizing cities or other divisions that are of the right placetype but in the wrong containing polity. For example, Asturian [[Llión]] "León (city in Spain)" gets put in [[:Category:ast:León]] even though the latter is supposed to refer to a city in Mexico. We can borrow the check-containing-polity code from `generic_cat_handler`. 28. [DONE] Redo handling of singular and plural to respect overrides specified in placetype_data. Check more carefully for things that may not singularize correctly, e.g. 'passes' -> 'passe'? Definitely 'headquarters' and variants. 29. [DONE] Combine placetype_equivs and other placetype data into `placetype_data`. Figure out if we need the distinction between `placetype_equivs` and `fallback`. 30. `has_neighborhoods` may need to be a function that can look at the containing holonyms to determine whether the entity in question is city-like. 31. [DONE] Bare placenames as they appear in holonyms (e.g. `Riau Islands`) instead of category keys (e.g. `the Riau Islands, Indonesia`) should appear in the polity data tables. As a first pass, the word "the" should not appear but should instead be a property of the polity. 32. [DONE] `capital_city_cat_handler` should use `get_holonyms_to_check()`. 33. [PARTLY DONE] The code to generate and parse the correct preposition ("in" or "of") is very convoluted, and the actual preposition used is specified in various locations with various defaults, sometimes hardcoded. This should be simplified. It is made more difficult by the fact that the in/of distinction occurs in several places: (a) when generating the {{place}} text in old-style descriptions where the preposition isn't explicitly given, which uses the `preposition` setting in placetype_data, defaulting to "in"; (b) when generating categories based on explicit category specs in placetype_data (which are gradually being deprecated), which likewise uses the `preposition` setting in placetype_data, defaulting to "in"; (c) when generating categories based on political_division_cat_handler, originating in the `divs` placetypes for specific known locations in [[Module:place/locations]], which uses the `prep` setting embedded in the `divs` specifications, defaulting to "of"; (d) when generating categories based on category handlers specified using the `cat_handler` property of entries in placetype_data, which tend to hardcode "in" or "of" depending on the specific category handler; (e) when generating category descriptions in [[Module:category tree/topic/Places]] for `divs` categories generated in (c), which (correctly) uses the same `prep` setting embedded in the `divs` settings that is used when generating the categories themselves; (f) when generating category descriptions for categories generated in (b) and (d) above, which relies on the `generic_before_non_cities` and `generic_before_cities` settings in placetype_data, which need to match the corresponding prepositions hardcoded in the category generation handlers. Instead of the hardcoding, the category generation handler should respect the `generic_before_*` settings. 34. [[Krakow]] defined as {{place|en|A <<city>> on the [[Vistula]] River, the <<capital>> of the <<voi/Lesser Poland Voivodeship>> in southern <<c/Poland>>}} categorizes under [[:Category:Voivodeship capitals]] when it should probably instead be under [[:Category:Voivodeship capitals of Poland]]. Possibly this is because the various voivodeships haven't yet been entered as known locations, but this should happen regardless of that. 35. {{tcl}} bugs: a. [DONE] Lowercase initial letter in new-style {{place}} descriptions in {{tcl}}. Maybe we can have a setting tcl_nolc=1 to prevent this from happening. b. [DONE] tcl= and probably new-style {{place}} descriptions in general should recognize ;; to separate distinct {{place}} descriptions, and similarly ;;and as the equivalent of regular `;and`, etc. c. [DONE] The value supplied in `modern=` should be displayed in {{tcl}} descriptions regardless of the setting that normally disables this, so that e.g. the foreign-language equivalent of [[British Honduras]] doesn't just say it's a former British colony in Central America but specifically identifies it as modern Belize. If the user gives, place_modern= in {{tcl}}, that should override the modern= value and still display. d. [DONE] The page supplied to {{tcl}} should be used for generating bare categories even if t= is supplied and overrides the English term displayed. [DONE] e. [DONE] If text follows {{place}} and begins with a semicolon, the semicolon isn't copied into {{tcl}}. 36. County boroughs used as holonyms currently display 'borough county borough' because there's an affix setting for 'county borough' and a fallback display handler for 'borough'. We need to rethink this; maybe merge the affix setting and display handlers. 37. Implement known-location groups and specs in a more standardly object-oriented way using metatables. 38. Implement caching of known location lookup in the holonym. This may have to be keyed by placetype, but we can have a special field for when the lookup placetype is the same as the user-specified placetype of the holonym. Use this known location in place of looking up known locations and store the appropriate known location there in `augment_holonyms_with_container()` instead of calling `key_to_placename`. 39. Bug fixes with 'the': (a) [DONE] [[Kazaň]] defined as {{place|cs|caplc|rep:Pref/Tatarstan|c/Russia|t1=Kazan}} displays as "Republic of the Tatarstan". (b) [[Valday]] defined as {{place|en|town/administrative center|dist:Suf/Valdaysky|obl/Novgorod|c/Russia}} displays as "a town, the administrative center of the Valdaysky District". Changing to `dist:suf/Valdaysky` displays as "... of Valdaysky district". 40. [DONE] Bug fix with 'the': [[Verkhoyansk]] defined as {{place|en|town|rep/Sakha|c/Russia}} displays as "a town in the Sakha". 41. [DONE] [[Category:Cities in Asia]] has [[Category:Cities in Eurasia]] as a parent, which in turn has [[Category:Cities in the Earth]] as a parent. Continents should not have the second parent like this. 42. [DONE] When checking `british_spelling`, it should check all containers as well; otherwise it's too hard to keep this in sync across cities, administrative divisions and countries. 43. [DONE] `skip_polity_parent_type` should be renamed to container_parent_type or similar. 44. There should be a flag to allow e.g. departments of France that are currently categorized as departments of their region to also be categorized as departments of France. 45. [DONE] Aliases are causing iterate_matching_holonym_location() to fail, e.g. if [[براق]] "Prague" is specified as {{place|acw|capital city|c/Czechia|t1=Prague}}, this fails add a bare category [[Category:acw:Prague]] because the code in iterate_matching_holonym_location() isn't resolving aliases when comparing the known container 'Czech Republic'. Probably we want to build an alias table to speed up these sorts of lookups. 46. [DONE; DUE TO TYPO IN HANDLER] The district cat handler is failing to work right, e.g. in [[Saint-Gaudérique]] defined as {{place|fr|district|city/Perpignan|in|dept/Pyrénées-Orientales|r/Occitania|c/France|t=Saint-Gaudérique}}, only the 'Places in ...' categories are getting triggered. 47. Suburbs of a given city aren't generally in the city and may not even be in the same country or country division, so they should not categorize as "Places in ..." based on the city and specified country and division. Same goes for "enclave" (within somewhere) and "exclave". 48. When converting display aliases, we should automatically convert full placenames to full placenames and elliptical placenames to elliptical placenames instead of always either doing elliptical or full placenames depending on the value of `display_as_full`. 49. `@obsolete form of` and `@archaic form of` should automatically trigger nocat=1. 50. The handler that adds bare categories should pick up values in <eq:...>. ]=] --[==[ var: List specifying the allowed form-of directives, used for former names, official names, abbreviations, etc. of places. The key is the form-of directive and the value is an object with the following properties: * `text`: The actual text displayed before the terms. If the value is `+`, the key is used as the text. If the value is a function, it is passed a single argument, the overall place spec (see comment at top of file) and should return the text to be displayed. * `type_prefix`: The prefix used to generate the placetype for looking up the appropriate category or categories in the placetype data structure. Can be omitted if there are no categories associated with the directive. * `conjunction`: The conjunction used to join multiple terms, defaulting to `and`. * `cat`: Additional category or categories to add the term to, whenever this particular directive is used. Normally the value is a topic-style category minus the langcode prefix, but if prefixed with `cln:`, it is a langname-style category. For example, the value `"Abbreviations"` would correspond to a category [[:Category:en:Abbreviations]] (assuming the language of the {{tl|place}} call is English), while the value `"cln:abbreviations"` corresponds to a category [[:Category:English abbreviations]]. Use a list of such specs for multiple categories. * `default_foreign`: If specified, the default language of terms given along with this directive is the language in {{para|1}}; otherwise it is English. ]==] export.all_form_of_directives = { ["former name of"] = {text = "+", type_prefix = "FORMER_NAME_OF"}, ["fmr of"] = {alias_of = "former name of"}, ["ancient name of"] = {text = "+", type_prefix = "FORMER_NAME_OF"}, ["official name of"] = {text = "+", type_prefix = "OFFICIAL_NAME_OF"}, ["former official name of"] = {text = "+", type_prefix = "FORMER_OFFICIAL_NAME_OF"}, ["long form of"] = {text = "+", type_prefix = "LONG_FORM_OF"}, ["former long form of"] = {text = "+", type_prefix = "FORMER_LONG_FORM_OF"}, ["nickname for"] = {text = "+", type_prefix = "NICKNAME_FOR"}, ["official nickname for"] = {text = "+", type_prefix = "OFFICIAL_NICKNAME_FOR"}, ["former nickname for"] = {text = "+", type_prefix = "FORMER_NICKNAME_FOR"}, ["derogatory name for"] = {text = "[[Appendix:Glossary#derogatory|derogatory]] name for", type_prefix = "DEROGATORY_NAME_FOR"}, ["synonym of"] = {text = "+"}, ["syn of"] = {alias_of = "synonym of"}, ["abbreviation of"] = {text = "[[Appendix:Glossary#abbreviation|abbreviation]] of", type_prefix = "ABBREVIATION_OF", cat = "cln:abbreviations", default_foreign = true}, ["abbr of"] = {alias_of = "abbreviation of"}, ["abbrev of"] = {alias_of = "abbreviation of"}, ["initialism of"] = {text = "[[Appendix:Glossary#initialism|initialism]] of", type_prefix = "ABBREVIATION_OF", cat = "cln:initialisms", default_foreign = true}, ["init of"] = {alias_of = "initialism of"}, ["acronym of"] = {text = "[[Appendix:Glossary#acronym|acronym]] of", type_prefix = "ABBREVIATION_OF", cat = "cln:acronyms", default_foreign = true}, ["syllabic abbreviation of"] = {text = "[[Appendix:Glossary#syllabic abbreviation|syllabic abbreviation]] of", type_prefix = "ABBREVIATION_OF", cat = "cln:syllabic abbreviations", default_foreign = true}, ["sylabbr of"] = {alias_of = "syllabic abbreviation of"}, ["sylabbrev of"] = {alias_of = "syllabic abbreviation of"}, ["ellipsis of"] = {text = "[[Appendix:Glossary#ellipsis|ellipsis]] of", type_prefix = "ELLIPSIS_OF", cat = "cln:ellipses", default_foreign = true}, ["ellip of"] = {alias_of = "ellipsis of"}, ["clipping of"] = {text = "[[Appendix:Glossary#clipping|clipping]] of", type_prefix = "CLIPPING_OF", cat = "cln:clippings", default_foreign = true}, ["clip of"] = {alias_of = "clipping of"}, ["alternative form of"] = {text = "+", default_foreign = true}, ["alt form"] = {alias_of = "alternative form of"}, ["alternative spelling of"] = {text = "+", default_foreign = true}, ["alt spell"] = {alias_of = "alternative spelling of"}, ["alt sp"] = {alias_of = "alternative spelling of"}, ["dated form of"] = {text = "[[Appendix:Glossary#dated|dated]] form of", type_prefix = "DATED_FORM_OF", cat = "cln:dated forms", default_foreign = true}, ["dated form"] = {alias_of = "dated form of"}, ["dated spelling of"] = {text = "[[Appendix:Glossary#dated|dated]] spelling of", type_prefix = "DATED_FORM_OF", cat = "cln:dated forms", default_foreign = true}, ["dated spell"] = {alias_of = "dated spelling of"}, ["dated sp"] = {alias_of = "dated spelling of"}, ["archaic form of"] = {text = "[[Appendix:Glossary#archaic|archaic]] form of", type_prefix = "ARCHAIC_FORM_OF", cat = "cln:archaic forms", default_foreign = true}, ["arch form"] = {alias_of = "archaic form of"}, ["archaic spelling of"] = {text = "[[Appendix:Glossary#archaic|archaic]] spelling of", type_prefix = "ARCHAIC_FORM_OF", cat = "cln:archaic forms", default_foreign = true}, ["arch spell"] = {alias_of = "archaic spelling of"}, ["arch sp"] = {alias_of = "archaic spelling of"}, ["obsolete form of"] = {text = "[[Appendix:Glossary#obsolete|obsolete]] form of", type_prefix = "OBSOLETE_FORM_OF", cat = "cln:obsolete forms", default_foreign = true}, ["obs form"] = {alias_of = "obsolete form of"}, ["obsolete spelling of"] = {text = "[[Appendix:Glossary#obsolete|obsolete]] spelling of", type_prefix = "OBSOLETE_FORM_OF", cat = "cln:obsolete forms", default_foreign = true}, ["obs spell"] = {alias_of = "obsolete spelling of"}, ["obs sp"] = {alias_of = "obsolete spelling of"}, } local function get_seat_text(overall_place_spec) local placetype = overall_place_spec.descs[1].placetypes[1] if placetype == "county" or placetype == "counties" then return "county seat" elseif placetype == "parish" or placetype == "parishes" then return "parish seat" elseif placetype == "borough" or placetype == "boroughs" then return "borough seat" else return "seat" end end --[==[ var: List specifying the allowed arguments containing extra information that is sometimes added to a definition, such as the capital, largest city, modern name, official name, etc., along with associated properties; displayed in the order given. Each element is an object with the following properties: * `arg`: The argument name. * `text`: The actual text displayed before the terms. If the value is `+`, the argument name is used as the text. If the value is a function, it is passed a single argument, the overall place spec (see the comment at the top of the file) and should return the text to be displayed. * `conjunction`: The conjunction used to join multiple terms, defaulting to `and`. * `display_even_when_dropped`: Display this piece of extra info even when it would normally be dropped (e.g. in {{tl|tcl}} when the language is other than English). * `match_sentence_style`: If true, the text will be capitalized and preceded by a period when ''sentence style'' is in effect (essentially, when the language is English and there is no translation specified using {{para|t}} or similar parameter); otherwise, the text will be displayed as-is and preceded by a semicolon. If false, the semicolon style will always be used. * `auto_plural`: If true, pluralize the text when there is more than one term. * `with_colon`: If true, follow the text with a colon. (This colon cannot easily be included in the text itself because if pluralized, the pluralized text goes before the colon.) ]==] export.extra_info_args = { {arg = "modern", text = "+", conjunction = "or", display_even_when_dropped = true}, {arg = "now", text = "now,", conjunction = "or", display_even_when_dropped = true}, {arg = "full", text = "in full,", conjunction = "or", display_even_when_dropped = true}, {arg = "short", text = "short form", conjunction = "or"}, {arg = "abbr", text = "abbreviation", conjunction = "or"}, {arg = "former", text = "formerly,"}, {arg = "official", text = "official name", match_sentence_style = true, auto_plural = true, with_colon = true}, {arg = "capital", text = "+", match_sentence_style = true, auto_plural = true, with_colon = true}, {arg = "largest city", text = "+", match_sentence_style = true, auto_plural = true, with_colon = true}, {arg = "caplc", text = "capital and largest city", match_sentence_style = true, auto_plural = false, with_colon = true}, {arg = "seat", text = get_seat_text, match_sentence_style = true, auto_plural = true, with_colon = true}, {arg = "shire town", text = "+", match_sentence_style = true, auto_plural = true, with_colon = true}, {arg = "headquarters", text = "+", match_sentence_style = true, auto_plural = false, with_colon = true}, {arg = "center", text = "administrative center", match_sentence_style = true, auto_plural = false, with_colon = true}, {arg = "centre", text = "administrative centre", match_sentence_style = true, auto_plural = false, with_colon = true}, } export.extra_info_arg_map = {} for _, spec in ipairs(export.extra_info_args) do export.extra_info_arg_map[spec.arg] = spec end ----------- Wikicode utility functions -- Return a wikilink link {{l|language|text}} local function link(text, langcode, id) if not langcode then return text end return m_links.full_link( {term = text, lang = require(languages_module).getByCode(langcode, true, "allow etym"), id = id}, nil, "allow self link" ) end ---------- Basic utility functions -- Add the page to a tracking "category". To see the pages in the "category", -- go to [[Wiktionary:Tracking/place/PAGE]] and click on "What links here". local function track(page) require(debug_track_module)("place/" .. page) return true end local function ucfirst_all(text) if text:find(" ") then local parts = split(text, " ", true) for i, part in ipairs(parts) do parts[i] = m_strutils.ucfirst(part) end return concat(parts, " ") else return m_strutils.ucfirst(text) end end local function lc(text) return mw.getContentLanguage():lc(text) end ---------- Argument parsing functions and utilities -- Split an argument on comma, but not comma followed by whitespace. local function split_on_comma(val) if val:find(",") then return require(parse_interface_module).split_on_comma(val) else return {val} end end -- Split an argument on slash, but not slash occurring inside of HTML tags like </span> or <br />. local function split_on_slash(arg) if arg:find("<") then local m_parse_utilities = require(parse_utilities_module) -- We implement this by parsing balanced segment runs involving <...>, and splitting on slash in the remainder. -- The result is a list of lists, so we have to rejoin the inner lists by concatenating. local segments = m_parse_utilities.parse_balanced_segment_run(arg, "<", ">") local slash_separated_groups = m_parse_utilities.split_alternating_runs(segments, "/") for i, group in ipairs(slash_separated_groups) do slash_separated_groups[i] = concat(group) end return slash_separated_groups else return split(arg, "/", true) end end -- Implement "implications", i.e. where the presence of a given holonym causes additional holonym(s) to be added. -- Implications apply only to categorization. There used to be support for "general implications" that applied to both -- display and categorization, but there ended up not being any such implications, so we've removed the support. It is -- a bad idea in any case to have such implications; the user might purposely leave out a higher-level polity to avoid -- redundancy in several successive definitions, and we wouldn't want to override that. Note that in practice the -- mechanism implemented by this function is used specifically for non-administrative geographic regions such as -- Eastern Europe and the West Bank; there is a similar mechanism for administrative regions handled by -- `augment_holonyms_with_containing_polity` in [[Module:place/placetypes]]. -- -- `place_descriptions` is a list of place descriptions (see top of file, collectively describing the data passed to -- {{place}}). `implication_data` is the data used to implement the implications, i.e. a table indexed by holonym -- placetype, each value of which is a table indexed by holonym placename, each value of which is a list of -- "PLACETYPE/PLACENAME" holonyms to be added to the end of the list of holonyms. local function handle_category_implications(place_descriptions, implication_data) for i, desc in ipairs(place_descriptions) do if desc.holonyms then local new_holonyms = {} for _, holonym in ipairs(desc.holonyms) do insert(new_holonyms, holonym) local imp_data = m_placetypes.get_equiv_placetype_prop(holonym.placetype, function(pt) local implication = implication_data[pt] and implication_data[pt][holonym.unlinked_placename] if implication then return implication end end) if imp_data then for _, holonym_to_add in ipairs(imp_data) do local split_holonym = split_on_slash(holonym_to_add) if #split_holonym ~= 2 then internal_error("Invalid holonym in implications: %s", holonym_to_add) end local holonym_placetype, holonym_placename = unpack(split_holonym, 1, 2) local new_holonym = { -- By the time we run, the display has already been generated so we don't need to set -- display_placename. placetype = holonym_placetype, unlinked_placename = holonym_placename } insert(new_holonyms, new_holonym) m_placetypes.key_holonym_into_place_desc(desc, new_holonym) end end end desc.holonyms = new_holonyms end end end -- Split a holonym (e.g. "continent/Europe" or "country/en:Italy" or "in southern" or "r:suf/O'Higgins" or -- "c/Austria,Germany,Czech Republic") into its components. Return a list of holonym objects (see top of file). Note -- that if there isn't a slash in the holonym (e.g. "in southern"), the `placetype` field of the holonym will be nil. -- Placetype aliases (e.g. "r" for "region") and placename aliases (e.g. "US" or "USA" for "United States") will be -- expanded. local function split_holonym(raw) local no_display, combined_holonym = raw:match("^(!)(.*)$") no_display = not not no_display combined_holonym = combined_holonym or raw local suppress_comma, combined_holonym_without_comma = combined_holonym:match("^(%*)(.*)$") suppress_comma = not not suppress_comma combined_holonym = combined_holonym_without_comma or combined_holonym local holonym_parts = split_on_slash(combined_holonym) if #holonym_parts == 1 then -- `unlinked_placename` should not be used. return {{display_placename = combined_holonym, no_display = no_display, suppress_comma = suppress_comma}} end -- Rejoin further slashes in case of slash in holonym placename, e.g. Admaston/Bromley. local placetype = holonym_parts[1] local placename = concat(holonym_parts, "/", 2) -- Check for modifiers after the holonym placetype. local split_holonym_placetype = split(placetype, ":", true) placetype = split_holonym_placetype[1] local affix_type local saw_also local saw_the for i = 2, #split_holonym_placetype do local modifier = split_holonym_placetype[i] if modifier == "also" then if saw_also then error(("Modifier ':also' occurs twice in holonym '%s'"):format(combined_holonym)) end saw_also = true elseif modifier == "the" then if saw_the then error(("Modifier ':the' occurs twice in holonym '%s'"):format(combined_holonym)) end saw_the = true elseif modifier == "pref" or modifier == "Pref" or modifier == "suf" or modifier == "Suf" or modifier == "noaff" then if affix_type then error(("Affix-type modifier ':%s' occurs twice in holonym '%s'"):format(modifier, combined_holonym)) end affix_type = modifier else error(("Unrecognized holonym placetype modifier '%s', should be one of " .. "'pref', 'Pref', 'suf', 'Suf', 'noaff', 'also' or 'the'"):format(modifier)) end end placetype = m_placetypes.resolve_placetype_aliases(placetype) local holonyms = split_on_comma(placename) local pluralize_affix = #holonyms > 1 local affix_holonym_index = (affix_type == "pref" or affix_type == "Pref") and 1 or affix_type == "noaff" and 0 or #holonyms for i, placename in ipairs(holonyms) do -- Check for langcode before the holonym placename, but don't get tripped up by Wikipedia links, which begin -- "[[w:...]]" or "[[wikipedia:]]". local langcode, placename_without_langcode = rmatch(placename, "^([^%[%]]-):(.*)$") if langcode then placename = placename_without_langcode end placename = m_placetypes.resolve_placename_display_aliases(placetype, placename) holonyms[i] = { placetype = placetype, display_placename = placename, unlinked_placename = m_placetypes.remove_links_and_html(placename), langcode = langcode, affix_type = i == affix_holonym_index and affix_type or nil, pluralize_affix = i == affix_holonym_index and pluralize_affix, suppress_affix = i ~= affix_holonym_index, no_display = no_display, suppress_comma = suppress_comma, continue_cat_loop = saw_also, force_the = i == 1 and saw_the, } end return holonyms end local get_param_mods = memoize(function() local m_param_utils = require(parameter_utilities_module) return m_param_utils.construct_param_mods { {group = {"link", "q", "l", "ref"}}, {param = "eq"}, -- FIXME: Finish [[Module:format utilities]]. --{param = "conj", set = require(format_utilities_module).allowed_conjs_for_join_segments, overall = true}, {param = "conj", set = {["and"] = true, ["or"] = true, ["and/or"] = true}, overall = true}, } end) local function parse_term_with_inline_modifiers(term, paramname, default_lang) -- FIXME: Finish changes to [[Module:parameter utilities]] and [[Module:parse utilities]] that support continuations -- and new-format generate_obj(). --local function generate_obj(data) -- local m_param_utils = require(parameter_utilities_module) -- data.parse_lang_prefix = true -- data.special_continuations = m_param_utils.default_special_continuations -- data.default_lang = default_lang -- return m_param_utils.generate_obj_maybe_parsing_lang_prefix(data) --end local function generate_obj(raw_term, parse_err) local obj = require(parameter_utilities_module).generate_obj_maybe_parsing_lang_prefix { term = raw_term, parse_err = parse_err, parse_lang_prefix = true, } obj.lang = obj.lang or default_lang return obj end return require(parse_interface_module).parse_inline_modifiers(term, { paramname = paramname, param_mods = get_param_mods(), generate_obj = generate_obj, -- FIXME: See above. --generate_obj_new_format = true, splitchar = ",", outer_container = {}, }) end local function parse_form_of_directive(arg, lang, form_of_overridden_args) local form_of_directive, raw_terms = arg:match("^@([a-z -]+):(.*)$") if not form_of_directive then error("Misformatted @-directive: " .. dump(arg)) end if not export.all_form_of_directives[form_of_directive] then local known_directives = {} for k, _ in pairs(export.all_form_of_directives) do insert(known_directives, '"' .. k .. '"') end table.sort(known_directives) error(("Unrecognized form-of directive %s in @-directive %s; recognized directives are %s"):format( dump(form_of_directive), dump(arg), concat(known_directives, ", "))) end local spec = export.all_form_of_directives[form_of_directive] local canonical_directive = form_of_directive if spec.alias_of then canonical_directive = spec.alias_of spec = export.all_form_of_directives[canonical_directive] if not spec then internal_error("Form-of directive alias %s points to %s, which is not a directive", "@" .. form_of_directive, canonical_directive) elseif spec.alias_of then internal_error("Form-of directive alias %s points to %s, which is also an alias", "@" .. form_of_directive, canonical_directive) end end local default_foreign = spec.default_foreign local directive_param = "@" .. form_of_directive if form_of_overridden_args and form_of_overridden_args[canonical_directive] then raw_terms = form_of_overridden_args[canonical_directive].new_value local new_directive = form_of_overridden_args[canonical_directive].new_directive local new_spec = export.all_form_of_directives[new_directive] if not new_spec then error(("Internal error: [[Module:transclude]] passed in unrecognized replacement directive '@%s'"): format(new_directive)) end if new_spec.alias_of then error(("Internal error: [[Module:transclude]] passed in replacement directive alias '@%s', " .. "should be canonical"):format(new_directive)) end if new_directive ~= canonical_directive then directive_param = directive_param .. (" (replaced with @%s)"):format(new_directive) canonical_directive = new_directive spec = new_spec end default_foreign = true end local terms = parse_term_with_inline_modifiers(raw_terms, directive_param, default_foreign and lang or enlang) return { directive = canonical_directive, terms = terms.terms, conj = terms.conj, spec = spec, } end -- Parse an argument containing extra information that is sometimes added to a definition, such as the capital, largest -- city, modern name, official name, etc. `args` is the value from the parsed argument structure and can be either nil, -- a string or a list (depending on whether it was declared as a single parameter or a list). `spec` is the extra info -- spec corresponding to the type of extra info. Each value in `args` can be a comma-separated list of terms with inline -- modifiers attached. [FIXME: we should switch to always using the comma-separated format and disallow list parameters -- such as |capital=, |capital2=, etc.] The return value is a structure containing fields `terms` (a list of term -- objects, each of which is in the format expected by full_link() in [[Module:links]]), `conj` (an explicit -- conjunction to join multiple terms, or nil if no explicit conjunction was given) and `spec` (the passed-in spec). local function parse_extra_info_arg(args, spec, default_lang) if not args then return nil end if type(args) ~= "table" then args = {args} end if not args[1] then return nil end local terms = nil local conj for i, arg in ipairs(args) do local this_terms = parse_term_with_inline_modifiers(arg, spec.arg .. (i == 1 and "" or i), default_lang) local thisconj = this_terms.conj if not conj then conj = thisconj elseif thisconj and conj ~= thisconj then error(("Two different conjunctions '%s' and '%s' specified for |%s=; you only need to specify the " .. "conjunction once"):format(conj, thisconj)) end if not terms then terms = this_terms.terms else m_table.extend(terms, this_terms.terms) end end return { spec = spec, terms = terms, conj = conj, } end --[==[ Parse a "new-style" place description, with placetypes and holonyms surrounded by `<<...>>` amid otherwise raw text. Return value is a place description object as documented at the top of the file. Exported for use by [[Module:demonyms]]. ]==] function export.parse_new_style_place_desc(text, lang, form_of_directives, form_of_overridden_args) local placetypes = {} local segments = split(text, "<<(.-)>>") local retval = {holonyms = {}, order = {}} local form_of_directives_already_present = form_of_directives and not not form_of_directives[1] for i, segment in ipairs(segments) do if i % 2 == 1 then insert(retval.order, {type = "raw", value = segment}) elseif segment:find("@") then if not form_of_directives then error(("Form-of directive '%s' not allowed in this context"):format(segment)) elseif form_of_directives_already_present then error(("Saw form-of directive '%s' in new-style place desc followed by direct (separate-parameter) form-of directives; not allowed"):format( segment)) elseif placetypes[1] or retval.holonyms[1] then error(("Form-of directive '%s' must come first, before placetypes and holonyms"):format(segment)) else local form_of_directive = parse_form_of_directive(segment, lang, form_of_overridden_args) if not retval.order[1] or retval.order[1].type ~= "raw" or retval.order[2] then internal_error("`retval.order` should have a single raw element: %s", retval.order) end form_of_directive.pretext = retval.order[1].value retval.order[1] = nil insert(form_of_directives, form_of_directive) end elseif segment:find("/") then local holonyms = split_holonym(segment) for j, holonym in ipairs(holonyms) do if j > 1 then if not holonym.no_display then if j == #holonyms then insert(retval.order, {type = "raw", value = " and "}) else insert(retval.order, {type = "raw", value = ", "}) end end -- All but the first in a multi-holonym need an article. For the first one, the article is -- specified in the raw text if needed. (Currently, needs_article is only used when displaying the -- holonym, so it wouldn't matter when no_display is set, but we set it anyway in case we need it -- for something else.) holonym.needs_article = true end insert(retval.holonyms, holonym) if not holonym.no_display then insert(retval.order, {type = "holonym", value = #retval.holonyms}) end m_placetypes.key_holonym_into_place_desc(retval, holonym) end else local treat_as, display = segment:match("^(..-):(.+)$") if treat_as then segment = treat_as else display = segment end -- see if the placetype segment is just qualifiers local only_qualifiers = true local split_segments = split(segment, " ", true) for _, split_segment in ipairs(split_segments) do if m_placetypes.placetype_qualifiers[split_segment] == nil then only_qualifiers = false break end end insert(placetypes, {placetype = segment, only_qualifiers = only_qualifiers}) if only_qualifiers then insert(retval.order, {type = "qualifier", value = display}) else insert(retval.order, {type = "placetype", value = display}) end end end if not form_of_directives_already_present and form_of_directives and form_of_directives[1] then form_of_directives[#form_of_directives].posttext = "" end local final_placetypes = {} for i, placetype in ipairs(placetypes) do if i > 1 and placetypes[i - 1].only_qualifiers then final_placetypes[#final_placetypes] = final_placetypes[#final_placetypes] .. " " .. placetypes[i].placetype else insert(final_placetypes, placetypes[i].placetype) end end retval.placetypes = final_placetypes return retval end --[==[ Parse one or more "new-style" place descriptions, with placetypes and holonyms surrounded by `<<...>>` amid otherwise raw text. Multiple descriptions are separated by two semicolons in a row. Return value is a list of place description objects as documented at the top of the file. ]==] local function parse_conjoined_new_style_place_desc(text, lang, form_of_directives, form_of_overridden_args) local separate_specs = split(text, ";(;[^ ]*)") local descs = {} for i = 1, #separate_specs do if i % 2 == 1 then insert(descs, export.parse_new_style_place_desc(separate_specs[i], lang, form_of_directives, form_of_overridden_args)) form_of_directives = nil else descs[#descs].separator = separate_specs[i] end end return descs end --[=[ Process numeric and "extra info" arguments into an overall place spec, as described at the top of the file. `data` is an object with the following fields: * `args`: The parsed arguments of {{tl|place}}. * `from_tcl`: True if we're being invoked from {{tl|tcl}}. * `extra_info_overridden_set`, `form_of_overridden_args`: Same as the corresponding fields in the `data` object passed to `export.format`. ]=] local function parse_overall_place_spec(data) local args, from_tcl, extra_info_overridden_set, form_of_overridden_args = data.args, data.from_tcl, data.extra_info_overridden_set, data.form_of_overridden_args local descs = {} local this_desc -- Index of separate (semicolon-separated) place descriptions within `descs`. local desc_index = 1 -- Index of separate holonyms within a place description. 0 means we've seen no holonyms and have yet to process -- the placetypes that precede the holonyms. 1 means we've seen no holonyms but have already processed the -- placetypes. local holonym_index = 0 local in_place_desc = false local form_of_directives = {} local function set_desc_joiner(desc, separator) if separator == ";" then this_desc.joiner = "; " this_desc.include_following_article = true elseif separator == ";;" then this_desc.joiner = " " else local joiner = separator:sub(2) if rfind(joiner, "^%a") then this_desc.joiner = " " .. joiner .. " " else this_desc.joiner = joiner .. " " end end end for _, arg in ipairs(args[2]) do if arg:find("^@") then if not (desc_index == 1 and holonym_index == 0) then error("@-directives cannot follow place descriptions") end local form_of_directive = parse_form_of_directive(arg, args[1], form_of_overridden_args) if form_of_directives[1] then form_of_directive.pretext = ", " else form_of_directive.pretext = "" end insert(form_of_directives, form_of_directive) elseif arg == ";" or arg:find("^;[^ ]") then if not this_desc then error("Saw semicolon joiner without preceding place description") end set_desc_joiner(this_desc, arg) desc_index = desc_index + 1 holonym_index = 0 in_place_desc = false else if arg:find("<<") then if in_place_desc then error("New-style place description must come first or following a separator (semicolon or similar), not directly following another description") end in_place_desc = true local this_descs = parse_conjoined_new_style_place_desc(arg, args[1], form_of_directives, form_of_overridden_args) for j, desc in ipairs(this_descs) do this_desc = desc if holonym_index > 0 then desc_index = desc_index + 1 holonym_index = 0 end if j < #this_descs then set_desc_joiner(this_desc, this_desc.separator) end descs[desc_index] = this_desc last_was_new_style = true holonym_index = #this_desc.holonyms + 1 end else -- Old-style arguments can directly follow a new-style argument; they become additional holonyms -- tacked onto the end of the holonym list, and are displayed old-style except that there is no -- prefix before the first one following the new-style argument. in_place_desc = true if holonym_index == 0 then local entry_placetypes = split_on_slash(arg) this_desc = {placetypes = entry_placetypes, holonyms = {}} descs[desc_index] = this_desc holonym_index = holonym_index + 1 else local holonyms = split_holonym(arg) for j, holonym in ipairs(holonyms) do if j > 1 then -- All but the first in a multi-holonym need an article. Not for the first one because e.g. -- {{place|en|city|s/Arizona|c/United States}} should not display as "a city in Arizona, the -- United States". The overall first holonym in the place description gets an article if -- needed regardless of our setting here. holonym.needs_article = true -- Insert "and" before the last holonym. if j == #holonyms then this_desc.holonyms[holonym_index] = { -- Use the no_display value from the first holonym; it should be the same for all -- holonyms. `unlinked_placename` should not be used. display_placename = "and", no_display = holonyms[1].no_display } holonym_index = holonym_index + 1 end end this_desc.holonyms[holonym_index] = holonym m_placetypes.key_holonym_into_place_desc(this_desc, this_desc.holonyms[holonym_index]) holonym_index = holonym_index + 1 end end end end end if form_of_directives[1] and not form_of_directives[#form_of_directives].posttext then form_of_directives[#form_of_directives].posttext = (args.def and args.def ~= "-" or not args.def and descs[1]) and ": " or "" end -- Tracking code. This does nothing but add tracking for seen placetypes and qualifiers. The place will be linked to -- [[Wiktionary:Tracking/place/entry-placetype/PLACETYPE]] for all entry placetypes seen; in addition, if PLACETYPE -- has qualifiers (e.g. 'small city'), there will be links for the bare placetype minus qualifiers and separately -- for the qualifiers themselves: -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/BARE_PLACETYPE]] -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-qualifier/QUALIFIER]] -- Note that if there are multiple qualifiers, there will be links for each possible split. For example, for -- 'small maritime city'), there will be the following links: -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/small maritime city]] -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/maritime city]] -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/city]] -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-qualifier/small]] -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-qualifier/maritime]] -- Finally, there are also links for holonym placetypes, e.g. if the holonym 'c/Italy' occurs, there will be the -- following link: -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/holonym-placetype/country]] for _, desc in ipairs(descs) do for _, entry_placetype in ipairs(desc.placetypes) do local splits = m_placetypes.split_qualifiers_from_placetype(entry_placetype, "no canon qualifiers") for _, split in ipairs(splits) do local prev_qualifier, this_qualifier, bare_placetype = unpack(split, 1, 3) track("entry-placetype/" .. bare_placetype) if this_qualifier then track("entry-qualifier/" .. this_qualifier) end end end for _, holonym in ipairs(desc.holonyms) do if holonym.placetype then track("holonym-placetype/" .. holonym.placetype) end end end local extra_info = {} for _, extra_info_spec in ipairs(export.extra_info_args) do local extra_info_terms = parse_extra_info_arg(args[extra_info_spec.arg], extra_info_spec, -- If called from {{tcl}} and extra info argument was set by {{tcl}}, interpret the argument -- according to the language in 1=; otherwise interpret as English. To override this, prefix -- with the appropriate language. from_tcl and extra_info_overridden_set and extra_info_overridden_set[extra_info_spec.arg] and args[1] or enlang) if extra_info_terms then insert(extra_info, extra_info_terms) end end return { lang = args[1], args = args, directives = form_of_directives, descs = descs, extra_info = extra_info, } end -------- Definition-generating functions -- Return a string with the wikilinks to the English translations of the word. local function get_translations(transl, ids) local ret = {} for i, t in ipairs(transl) do local arg_transls = split_on_comma(t) local arg_ids = ids[i] if arg_ids then arg_ids = split_on_comma(arg_ids) if #arg_transls ~= #arg_ids then error(("Saw %s translation%s in t%s=%s but %s ID%s in tid%s=%s"):format( #arg_transls, #arg_transls > 1 and "s" or "", i == 1 and "" or i, t, #arg_ids, #arg_ids > 1 and "'s" or "", i == 1 and "" or i, ids[i])) end end for j, arg_transl in ipairs(arg_transls) do insert(ret, link(arg_transl, "en", arg_ids and arg_ids[j] or nil)) end end return concat(ret, ", ") end -- Return the article (currently always `"the"`) to be prepended to the given placename, or nil. `decorated_placename` -- is the placename as specified by the user along with any affix added to it. `placename` is the raw unlinked -- placename, defaulting to the unlinked version of `decorated_placename` if not given. `placetypes` is a placetype or -- list of placetypes for the placename. `suppress_holonym_use_the_check` suppresses checking the placetypes for -- `holonym_use_the`. local function get_placename_article(decorated_placename, placetypes, placename, suppress_holonym_use_the_check) local unlinked_decorated_placename = m_placetypes.remove_links_and_html(decorated_placename) if unlinked_decorated_placename:find("^the ") then return nil end placename = placename or unlinked_decorated_placename if type(placetypes) == "string" then placetypes = {placetypes} end for _, placetype in ipairs(placetypes) do local art = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) local art = m_placetypes.placename_article[pt] and m_placetypes.placename_article[pt][placename] if art then return art end end) if art then return art end end -- Get equivalent placetypes of the specified placetype so that e.g. -- {{place|en|@official name of:Bahamas|island country|r/Caribbean}} put 'the' before Bahamas ("Bahamas" is just -- specified as a country but "island country" falls back to "country"). local all_equiv_placetypes = {} for _, placetype in ipairs(placetypes) do local this_equiv_placetypes = m_placetypes.get_placetype_equivs(placetype) for _, this_equiv_placetype in ipairs(this_equiv_placetypes) do insert(all_equiv_placetypes, this_equiv_placetype.placetype) end end -- Look for a known location. We should be using find_matching_holonym_location() but that function doesn't -- currently work without alias resolution. Instead we check if any matching location has `the = true` set. -- In practice there aren't any cases where a given placename matches two locations, only one of which has -- `the = true` set. for group, key, spec in m_placetypes.iterate_matching_location { placetypes = all_equiv_placetypes, placename = placename, alias_resolution = "none", } do -- `iterate_holonym_location` doesn't initialize the spec if alias resolution is turned off, so check both -- the spec and group. Be careful in case `the = false` is explicitly given by the spec. if spec.the ~= nil then if spec.the then return "the" end elseif group.default_the then return "the" end end if not suppress_holonym_use_the_check then -- See if the placetype requests an article to be placed before the placename. This occurs e.g. with 'sea'. But -- if the user specifies e.g. "sea:pref/Cortez", we'll wrongly get "the sea of the Cortez", so in that case we -- need to ignore the holonym article specified along with the placetype. for _, placetype in ipairs(placetypes) do local holonym_use_the = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) return placetype_data[pt] and placetype_data[pt].holonym_use_the end) if holonym_use_the then return "the" end end end local universal_res = m_placetypes.placename_the_re["*"] for _, re in ipairs(universal_res) do if unlinked_decorated_placename:find(re) then return "the" end end for _, placetype in ipairs(placetypes) do local matched = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) local res = m_placetypes.placename_the_re[pt] if not res then return nil end for _, re in ipairs(res) do if unlinked_decorated_placename:find(re) then return true end end return nil end) if matched then return "the" end end return nil end -- Prepend the appropriate article if needed to `decorated_placename` (the user-specified placename with any affix -- added), where the underlying holonym object that generated `linked_placename` can be found at `holonym_index` in the -- holonyms in `place_desc`. local function get_holonym_article(decorated_placename, place_desc, holonym_index) local holonym = place_desc.holonyms[holonym_index] local holonym_placetype = holonym.placetype if not holonym_placetype then return nil end return get_placename_article(decorated_placename, holonym_placetype, holonym.unlinked_placename, not not holonym.affix_type) end -- Convert a holonym into display format. This adds wikilinks to holonyms and passes them through any display handlers, -- which may (e.g.) add the placetype to the holonym. If `needs_article` is true, prepend the article `"the"` if the -- holonym requires it (e.g. if the holonym is `United States`). `needs_article` is set to true we are processing the -- first specified holonym in an old-style place description (i.e. the holonym directly following the entry placetype, -- with no raw-text holonym in between). -- -- Examples: -- ({placetype = "country", display_placename = "United States", unlinked_placename = "United States"}, true) returns -- the template-expanded equivalent of "the {{l|en|United States}}". -- ({placetype = "region", display_placename = "O'Higgins", unlinked_placename = "O'Higgins", affix_type = "suf"}, false) -- returns the template-expanded equivalent of "{{l|en|O'Higgins}} region". -- ({display_placename = "in the southern"}, false) returns "in the southern" (without wikilinking because .placetype -- and .langcode are both nil). local function format_holonym(place_desc, holonym_index, needs_article) local holonym = place_desc.holonyms[holonym_index] if holonym.no_display then return "" end local orig_needs_article = needs_article needs_article = needs_article or holonym.needs_article or holonym.force_the local output = holonym.display_placename local placetype = holonym.placetype local affix_type_pt_data, affix_type, affix_is_prefix, affix, prefix, suffix, no_affix_strings local pt_equiv_for_affix_type, already_seen_affix, need_affix -- Implement display handlers. local display_handler = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) return placetype_data[pt] and placetype_data[pt].display_handler end) if display_handler then output = display_handler(placetype, output) end if not holonym.suppress_affix then -- Implement adding an affix (prefix or suffix) based on the holonym's placetype. The affix will be -- added either if the placetype's placetype_data spec says so (by setting 'affix_type'), or if the -- user explicitly called for this (e.g. by using 'r:suf/O'Higgins'). Before adding the affix, -- however, we check to see if the affix is already present (e.g. the placetype is "district" -- and the placename is "Mission District"). The placetype can override the affix to add (by setting -- `prefix`, `suffix` or `affix`) and/or override the strings used for checking if the affix is already -- present (by setting 'no_affix_strings', which defaults to the affix explicitly given through `prefix`, -- `suffix` or `affix` if any are given). `prefix` and `suffix` take precedence over `affix` if both are -- set, but only when the appropriate type of affix is requested. -- Search through equivalent placetypes for a setting of `affix_type`, `affix`, `prefix` or `suffix`. If we -- find any, use them. If `affix_type` is given, it is overridden by the user's explicitly specified affix -- type. If either an `affix_type` is found or the user explicitly specified an affix type, the affix is -- displayed according to the following: -- 1. If `prefix`, `suffix` or `affix` is given by the placetype or equivalent placetypes, use it (e.g. -- placetype `administrative region` requests suffix "region" but doesn't set affix type; if the user -- explicitly specifies `administrative region` as the placetype for a holonym and specifies a suffixal -- affix type, use "region"). In this search, we stop looking if we find an explicit `affix_type` -- setting; if this is found without an associated affix setting, the assumption is the associated -- placetype was intended as the affix, not some explicit affix setting associated with a fallback -- placetype. -- 2. Otherwise, if the user explicitly requested an affix type, use the actual placetype (principle of -- least surprise). -- 3. Finally, fall back to the placetype associated with an explicit `affix_type` setting (which will -- always exist if we get this far). affix_type_pt_data, pt_equiv_for_affix_type = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) local cdpt = placetype_data[pt] return cdpt and cdpt.affix_type and cdpt or nil end ) affix_pt_data, pt_equiv_for_affix = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) local cdpt = placetype_data[pt] return cdpt and (cdpt.affix_type or cdpt.affix or cdpt.prefix or cdpt.suffix) and cdpt or nil end ) if affix_type_pt_data then affix_type = affix_type_pt_data.affix_type need_affix = true end if affix_pt_data then prefix = affix_pt_data.prefix or affix_pt_data.affix suffix = affix_pt_data.suffix or affix_pt_data.affix need_affix = true end no_affix_strings = affix_pt_data and affix_pt_data.no_affix_strings or affix_type_pt_data and affix_type_pt_data.no_affix_strings if holonym.affix_type and placetype then affix_type = holonym.affix_type prefix = prefix or placetype suffix = suffix or placetype need_affix = true end if need_affix then -- At this point the affix_type has been determined and can't change any more, so we can figure out -- whether we need the calculated prefix or suffix. affix_is_prefix = affix_type == "pref" or affix_type == "Pref" if affix_is_prefix then affix = prefix else affix = suffix end if not affix then if not pt_equiv_for_affix_type then internal_error("Something wrong, `pt_equiv_for_affix_type` not set processing holonym: %s", holonym) end affix = pt_equiv_for_affix_type.placetype if not affix then internal_error("Something wrong, no affix could be located in `pt_equiv_for_affix_type` for " .. "holonym %s: %s", holonym, pt_equiv_for_affix_type) end end no_affix_strings = no_affix_strings or lc(affix) if holonym.pluralize_affix then affix = m_placetypes.pluralize_placetype(affix) end already_seen_affix = m_placetypes.check_already_seen_string(output, no_affix_strings) end end output = link(output, holonym.langcode or placetype and "en" or nil) if need_affix and not affix_is_prefix and not already_seen_affix then output = output .. " " .. (affix_type == "Suf" and ucfirst_all(affix) or affix) end if needs_article then local article = holonym.force_the and "the" or get_holonym_article(output, place_desc, holonym_index) if article then output = article .. " " .. output end end if affix_is_prefix and not already_seen_affix then output = (affix_type == "Pref" and ucfirst_all(affix) or affix) .. " of " .. output if orig_needs_article then -- Put the article before the added affix if we're the first holonym in the place description. This is -- distinct from the article added above for the holonym itself; cf. "c:pref/United States,Canada" -> -- "the countries of the United States and Canada". We need to use the value of `needs_article` passed -- in from the function, which indicates whether we're processing the first holonym. output = "the " .. output end end return output end -- Format a holonym for display, taking into account the entry's placetype (specifically, the last placetype if there -- are more than one, excluding conjunctions and parenthetical items); the holonym's index among the holonyms in the -- template (which specifies what the previous holonym is and whether it is the first holonym); and the full place -- description (which helps resolve ambiguities in holonyms when looking up known locations). This may involve putting a -- preposition ("in" or "of") before the formatted holonym, particularly if it is the first one, and may involve -- prepending a comma. If `holonym_no_prefix` is specified, nothing except a space is put before the holonym; used -- when formatting mixed new/old-style descriptions. local function format_holonym_in_context(entry_placetype, place_desc, holonym_index, holonym_no_prefix) local desc = "" -- If holonym.placetype is nil, the holonym is just raw text, e.g. 'in southern'. if holonym_no_prefix then desc = " " else local holonym = place_desc.holonyms[holonym_index] if not holonym.no_display then -- First compute the initial delimiter. if holonym_index == 1 then if holonym.placetype then desc = desc .. " " .. m_placetypes.get_placetype_entry_preposition(entry_placetype) .. " " elseif not holonym.display_placename:find("^,") then desc = desc .. " " end else local prev_holonym = place_desc.holonyms[holonym_index - 1] if prev_holonym.placetype and not holonym.suppress_comma then local dname = holonym.display_placename if dname ~= "and" and dname ~= "in" and dname ~= "and the" and dname ~= "in the" then desc = desc .. "," end end if holonym.placetype or not holonym.display_placename:find("^,") then desc = desc .. " " end end end end return desc .. format_holonym(place_desc, holonym_index, not holonym_no_prefix and holonym_index == 1) end -- Return the linked description of a placetype. This splits off any qualifiers and displays them separately. local function get_placetype_description(placetype) local splits = m_placetypes.split_qualifiers_from_placetype(placetype) local prefix = "" for _, split in ipairs(splits) do local prev_qualifier, this_qualifier, bare_placetype = unpack(split, 1, 3) if this_qualifier then prefix = (prev_qualifier and prev_qualifier .. " " .. this_qualifier or this_qualifier) .. " " else prefix = "" end local display_form = m_placetypes.get_placetype_display_form(bare_placetype) if display_form then return prefix .. display_form end placetype = bare_placetype end return prefix .. placetype end -- Return the linked description of a qualifier (which may be multiple words). local function get_qualifier_description(qualifier) local splits = m_placetypes.split_qualifiers_from_placetype(qualifier .. " foo") local split = splits[#splits] local prev_qualifier, this_qualifier, bare_placetype = unpack(split, 1, 3) return prev_qualifier and prev_qualifier .. " " .. this_qualifier or this_qualifier end -- Format a set of form-of directive terms. local function format_form_of_directive(overall_place_spec, directive_terms, ucfirst, from_tcl) local formatted_terms = {} local placetypes if not overall_place_spec.descs[2] then placetypes = overall_place_spec.descs[1].placetypes else placetypes = {} for _, desc in ipairs(overall_place_spec.descs) do m_table.extend(placetypes, desc.placetypes) end end for _, termobj in ipairs(directive_terms.terms) do local placename_article if not termobj.alt and termobj.term and not termobj.term:find("%[%[") then placename_article = get_placename_article(termobj.term, placetypes) end local linked_term = m_links.full_link(termobj, "term", nil, "show qualifiers") linked_term = "<span class='form-of-definition-link'>" .. linked_term .. "</span>" if termobj.eq then linked_term = linked_term .. " (= " .. m_links.full_link {term = termobj.eq, lang = enlang} .. ")" end if placename_article then linked_term = placename_article .. " " .. linked_term end insert(formatted_terms, linked_term) end local spec = directive_terms.spec local text = spec.text if type(text) == "function" then text = text(overall_place_spec) end if text == "+" then text = directive_terms.directive end if ucfirst then text = m_strutils.ucfirst(text) end if not from_tcl then local tracking_prefix = "form-of/" .. directive_terms.directive track(tracking_prefix) local langcode = overall_place_spec.lang:getCode() local full_langcode = overall_place_spec.lang:getFullCode() track(tracking_prefix .. "/" .. langcode) if full_langcode ~= langcode then track(tracking_prefix .. "/" .. full_langcode) end if full_langcode ~= "en" then track(tracking_prefix .. "/non-english") end end return (require(form_of_module).format_form_of { text = text, lemmas = m_table.serialCommaJoin(formatted_terms, {conj = directive_terms.conj or spec.conjunction or "and"}), lemma_classes = false, -- text_classes = "place-text", }) end -- Format a set of extra-info terms for extra information that is sometimes added to a definition, such as the capital, -- largest city, modern name, official name, etc. `overall_place_spec` is the overall parsed {{tl|place}} spec (see -- comment at top of file); `extra_info_terms` is the terms spec for this type of extra-info (as returned by -- `parse_extra_info_arg`); and `sentence_style` indicates whether we're generating a sentence-style definition (as -- suitable for an English-language term without a translation specified using t=). local function format_extra_info(overall_place_spec, extra_info_terms, sentence_style) local formatted_terms = {} for _, termobj in ipairs(extra_info_terms.terms) do insert(formatted_terms, m_links.full_link(termobj, nil, nil, "show qualifiers")) end local spec = extra_info_terms.spec local text = spec.text if type(text) == "function" then text = text(overall_place_spec) end if text == "+" then text = spec.arg end if spec.auto_plural and formatted_terms[2] then text = pluralize(text) end if spec.with_colon then text = text .. ":" end if sentence_style and spec.match_sentence_style then text = ". " .. m_strutils.ucfirst(text) else text = "; " .. text end -- FIME: Use joinSegments when available. -- return text .. " " .. -- m_table.joinSegments(formatted_terms, {conj = extra_info_terms.conj or spec.conjunction or "and"}) return text .. " " .. m_table.serialCommaJoin(formatted_terms, {conj = extra_info_terms.conj or spec.conjunction or "and"}) end -- Format an old-style place description (with separate arguments for the placetype and each holonym) for display and -- return the resulting string. local function format_old_style_place_desc_for_display(args, place_desc, desc_index, with_article, ucfirst) -- The placetype used to determine whether "in" or "of" follows is the last placetype if there are -- multiple slash-separated placetypes, but ignoring "and", "or" and parenthesized notes -- such as "(one of 254)". local entry_placetype = nil local placetypes = place_desc.placetypes local function is_and_or(item) return item == "and" or item == "or" end local parts = {} local function ins(txt) insert(parts, txt) end local function ins_space() if #parts > 0 then ins(" ") end end local and_or_pos for i, placetype in ipairs(placetypes) do if is_and_or(placetype) then and_or_pos = i -- no break here; we want the last in case of more than one end end local remaining_placetype_index if and_or_pos then track("multiple-placetypes-with-and") if and_or_pos == #placetypes then error("Conjunctions 'and' and 'or' cannot occur last in a set of slash-separated placetypes: " .. concat(placetypes, "/")) end local items = {} for i = 1, and_or_pos + 1 do local pt = placetypes[i] if is_and_or(pt) then -- skip elseif i > 1 and pt:find("^%(") then -- append placetypes beginning with a paren to previous item items[#items] = items[#items] .. " " .. pt else entry_placetype = pt insert(items, get_placetype_description(pt)) end end ins(m_table.serialCommaJoin(items, {conj = placetypes[and_or_pos]})) remaining_placetype_index = and_or_pos + 2 else remaining_placetype_index = 1 end for i = remaining_placetype_index, #placetypes do local pt = placetypes[i] -- Check for and, or and placetypes beginning with a paren (so that things like -- "{{place|en|county/(one of 254)|s/Texas}}" work). if m_placetypes.placetype_is_ignorable(pt) then ins_space() ins(pt) else entry_placetype = pt -- Join multiple placetypes with comma unless placetypes are already -- joined with "and". We allow "the" to precede the second placetype -- if they're not joined with "and" (so we get "city and county seat of ..." -- but "city, the county seat of ..."). if i > 1 then ins(", ") local article = m_placetypes.get_placetype_article(pt) if article ~= "the" and i > remaining_placetype_index then -- Track cases where we are comma-separating multiple placetypes without the second one starting -- with "the", as they may be mistakes. The occurrence of "the" is usually intentional, e.g. -- {{place|zh|municipality/state capital|s/Rio de Janeiro|c/Brazil|t1=Rio de Janeiro}} -- for the city of [[Rio de Janeiro]], which displays as "a municipality, the state capital of ...". track("multiple-placetypes-without-and-or-the") end if article then ins(article) ins(" ") end end ins(get_placetype_description(pt)) end end if place_desc.holonyms then for holonym_index, _ in ipairs(place_desc.holonyms) do ins(format_holonym_in_context(entry_placetype, place_desc, holonym_index)) end end local gloss = concat(parts) if with_article then local article if desc_index == 1 then article = args.a else if not place_desc.holonyms then -- there isn't a following holonym; the place type given might be raw text as well, so don't add -- an article. with_article = false else local saw_placetype_holonym = false for _, holonym in ipairs(place_desc.holonyms) do if holonym.placetype then saw_placetype_holonym = true break end end if not saw_placetype_holonym then -- following holonym(s)s is/are just raw text; the place type given might be raw text as well, -- so don't add an article. with_article = false end end if with_article then track("second-or-higher-description-with-added-article") else track("second-or-higher-description-suppressed-article") end end if with_article then article = article or m_placetypes.get_placetype_article(place_desc.placetypes[1], ucfirst) if article then gloss = article .. " " .. gloss elseif ucfirst then gloss = m_strutils.ucfirst(gloss) end end end return gloss end --[==[ Get the full gloss (English description) of a new-style place description. New-style place descriptions are specified with a single string containing raw text interspersed with placetypes and holonyms surrounded by `<<...>>`. Exported for use by [[Module:demonyms]]. ]==] function export.format_new_style_place_desc_for_display(args, place_desc, with_article) local parts = {} local function ins(txt) insert(parts, txt) end if with_article and args.a then ins(args.a .. " ") end local max_holonym = 0 for _, order in ipairs(place_desc.order) do local segment_type, segment = order.type, order.value if segment_type == "raw" then ins(segment) elseif segment_type == "placetype" then ins(get_placetype_description(segment)) elseif segment_type == "qualifier" then ins(get_qualifier_description(segment)) elseif segment_type == "holonym" then ins(format_holonym(place_desc, segment, false)) if segment > max_holonym then max_holonym = segment end else internal_error("Unrecognized segment type %s", segment_type) end end if place_desc.holonyms and max_holonym < #place_desc.holonyms then local holonym_no_prefix = true for holonym_index = max_holonym + 1, #place_desc.holonyms do ins(format_holonym_in_context(nil, place_desc, holonym_index, holonym_no_prefix)) holonym_no_prefix = false end end return concat(parts) end -- Return a string with the gloss (the description of the place itself, as opposed to translations). If `ucfirst` is -- given, the gloss's first letter is made upper case. If `sentence_style` is given, the "extra info" (modern name, -- capital, largest city, etc.) is displayed as separated sentences; otherwise, it is displayed separated from the main -- definition by semicolons. local function get_display_form(data) local overall_place_spec, ucfirst, sentence_style, drop_extra_info, extra_info_overridden_set, from_tcl = data.overall_place_spec, data.ucfirst, data.sentence_style, data.drop_extra_info, data.extra_info_overridden_set, data.from_tcl local args = overall_place_spec.args local parts = {} local function ins(txt) table.insert(parts, txt) end if overall_place_spec.directives and overall_place_spec.directives[1] then for i, directive_terms in ipairs(overall_place_spec.directives) do ins(directive_terms.pretext) if directive_terms.pretext ~= "" then ucfirst = false end if not args.def or args.def == "-" then ins(format_form_of_directive(overall_place_spec, directive_terms, ucfirst, from_tcl)) ucfirst = false if i == #overall_place_spec.directives and directive_terms.posttext then ins(directive_terms.posttext) end end end end if args.def == "-" then return concat(parts) end if args.def then if args.def:find("<<") then local def_desc = export.parse_new_style_place_desc(args.def, args[1]) ins(export.format_new_style_place_desc_for_display({}, def_desc, false)) else ins(args.def) end else local include_article = true for n, desc in ipairs(overall_place_spec.descs) do if desc.order then ins(export.format_new_style_place_desc_for_display(args, desc, n == 1)) else ins(format_old_style_place_desc_for_display(args, desc, n, include_article, ucfirst)) end if desc.joiner then ins(desc.joiner) end include_article = desc.include_following_article ucfirst = false end end local addl = args.addl if addl then posttext = posttext or "" if addl:find("^[;:]") then ins(addl) elseif addl:find("^_") then ins(" " .. addl:sub(2)) else ins(", " .. addl) end end for _, extra_info_terms in ipairs(overall_place_spec.extra_info) do -- Include a given extra info term either when -- (1) drop_extra_info not set (it's set by {{tcl}}), or -- (2) the extra info term is marked as "display even when dropped" (e.g. modern= or full=, to help understand -- the term's sense), or -- (3) the term was overridden by a `place_*=` setting in {{tcl}}. if not drop_extra_info or extra_info_terms.spec.display_even_when_dropped or extra_info_overridden_set and extra_info_overridden_set[extra_info_terms.spec.arg] then ins(format_extra_info(overall_place_spec, extra_info_terms, sentence_style)) end end return concat(parts) end -- Return the definition line. local function get_def(data) local overall_place_spec, from_tcl, drop_extra_info, extra_info_overridden_set, translation_follows = data.overall_place_spec, data.from_tcl, data.drop_extra_info, data.extra_info_overridden_set, data.translation_follows local args = overall_place_spec.args local sentence_style = overall_place_spec.lang:getCode() == "en" local ucfirst = sentence_style and not args.nocap if #args.t > 0 then local gloss = get_display_form { overall_place_spec = overall_place_spec, ucfirst = false, sentence_style = false, drop_extra_info = drop_extra_info, extra_info_overridden_set = extra_info_overridden_set, from_tcl = from_tcl, } if from_tcl and not args.tcl_nolc then gloss = m_strutils.lcfirst(gloss) end if translation_follows then return (gloss == "" and "" or gloss .. ": ") .. get_translations(args.t, args.tid) else return get_translations(args.t, args.tid) .. (gloss == "" and "" or " (" .. gloss .. ")") end else return get_display_form { overall_place_spec = overall_place_spec, ucfirst = ucfirst, sentence_style = sentence_style, drop_extra_info = drop_extra_info, extra_info_overridden_set = extra_info_overridden_set, from_tcl = from_tcl, } end end ---------- Functions for the category wikicode -- The code in this section finds the categories to which a given place belongs. See comment at top of file. --[=[ Find the appropriate category specs for a given place description and placetype. For example, for the template invocation {{tl|place|en|city/and/county|s/Pennsylvania|c/US}}, which results in the place description ``` { placetypes = {"city", "and", "county"}, holonyms = { {placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania"}, {placetype = "country", display_placename = "United States", unlinked_placename = "United States"}, }, holonyms_by_placetype = { state = {"Pennsylvania"}, country = {"United States"}, }, } ``` the call ``` find_placetype_cat_specs { entry_placetype = "city", place_desc = { placetypes = {"city", "and", "county"}, holonyms = { {placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania"}, {placetype = "country", display_placename = "United States", unlinked_placename = "United States"}, }, holonyms_by_placetype = { state = {"Pennsylvania"}, country = {"United States"}, }, }, } ``` might produce the return value ``` { entry_placetype = "city", cat_specs = {"Cities in Pennsylvania, USA"}, triggering_holonym = {placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania"}, triggering_holonym_index = 1, } ``` See the comment at the top of the section for a description of category specs and the overall algorithm. On entry, `data` is an object with the following fields: * `entry_placetype`: the entry placetype (or equivalent) used to look up the category data in placetype_data, which must have already been resolved to a placetype with an entry in `placetype_data`; * `place_desc`: the full place description as documented at the top of the file (used only for its holonyms); * `first_holonym_index`: the index of the first holonym to consider when iterating through the holonyms (used to implement the `:also` holonym placetype modifier); * `overriding_holonym`: an optional overriding holonym to use, in place of iterating through the holonyms (used to implement categorizing other holonyms of the same type as the triggering holonym, so that e.g. {{tl|place|en|river|s/Kansas,Nebraska}}, or equivalently {{tl|place|en|river|s/Kansas|and|s/Nebraska}}, works); * `from_demonym`: we are called from {{tl|demonym-noun}} or {{tl|demonym-adj}} instead of {{tl|place}}, and should generate categories appropriate to those templates. * `form_of_directive`: A form-of directive prefix such as `FORMER_NAME_OF`. If specified, use that type prefix to generate categories appropriate to the form-of directive (in addition to the regular categories generated for the {{tl|place}} invocation, which happens in a separate call). The return value is {nil} if no category specs could be located, otherwise an object with the following fields: * `entry_placetype`: the placetype that should be used to construct categories when `true` is one of the returned category specs (normally the same as the `entry_placetype` passed in, but will be different when a "fallback" key exists and is used); * `cat_specs`: list of category specs as described above; * `triggering_holonym`: the triggering holonym (see the comment at the top of the section), or nil if there was no triggering holonym; * `triggering_holonym_index`: the index of the triggering holonym in the list of holonyms in `place_desc`, or nil if an overriding holonym was passed in or there was no triggering holonym. ]=] local function find_placetype_cat_specs(data) local entry_placetype, place_desc, first_holonym_index, overriding_holonym, from_demonym = data.entry_placetype, data.place_desc, data.first_holonym_index, data.overriding_holonym, data.from_demonym local form_of_directive = data.form_of_directive local function fetch_cat_specs(holonym_to_match, index, no_fallback) local holonym_placetype = holonym_to_match.placetype if not holonym_placetype then -- raw text in place of holonym return nil end local holonym_placename = holonym_to_match.unlinked_placename if not holonym_placename then internal_error("Missing unlinked_placename in holonym (index %s): %s", index, holonym_to_match) end local cat_specs, equiv_entry_placetype_and_qualifier = m_placetypes.get_equiv_placetype_prop(entry_placetype, function(equiv_entry_pt) return m_placetypes.get_equiv_placetype_prop(holonym_placetype, function(equiv_holonym_pt) return m_placetypes.political_division_cat_handler { entry_placetype = equiv_entry_pt, holonym_placetype = equiv_holonym_pt, holonym_placename = holonym_placename, holonym_index = index, place_desc = place_desc, from_demonym = from_demonym, } end) end, {no_fallback = no_fallback, form_of_directive = form_of_directive} ) if cat_specs and cat_specs[1] then return cat_specs, equiv_entry_placetype_and_qualifier.placetype end local cat_handler, equiv_entry_placetype_and_qualifier = m_placetypes.get_equiv_placetype_prop(entry_placetype, function(equiv_entry_pt) local entry_placetype_data = m_placetypes.placetype_data[equiv_entry_pt] if entry_placetype_data and entry_placetype_data.cat_handler then return entry_placetype_data.cat_handler end end, {no_fallback = no_fallback, form_of_directive = form_of_directive} ) if cat_handler then local cat_specs = m_placetypes.get_equiv_placetype_prop(holonym_placetype, function(equiv_holonym_pt) return cat_handler { entry_placetype = equiv_entry_placetype_and_qualifier.placetype, holonym_placetype = equiv_holonym_pt, holonym_placename = holonym_placename, holonym_index = index, place_desc = place_desc, from_demonym = from_demonym, } end) if cat_specs and cat_specs[1] then return cat_specs, equiv_entry_placetype_and_qualifier.placetype end end if not no_fallback then local cat_specs, equiv_entry_placetype_and_qualifier = m_placetypes.get_equiv_placetype_prop(entry_placetype, function(equiv_entry_pt) local entry_placetype_data = m_placetypes.placetype_data[equiv_entry_pt] if entry_placetype_data then return m_placetypes.get_equiv_placetype_prop(holonym_placetype, function(equiv_holonym_pt) return entry_placetype_data[equiv_holonym_pt .. "/*"] end) end end, {form_of_directive = form_of_directive} ) if cat_specs and cat_specs[1] then return cat_specs, equiv_entry_placetype_and_qualifier.placetype end end return nil end if overriding_holonym then -- FIXME, change the algorithm to eliminate overriding_holonym local cat_specs, fetched_entry_placetype = fetch_cat_specs(overriding_holonym, nil) if cat_specs and cat_specs[1] then return { entry_placetype = fetched_entry_placetype, cat_specs = cat_specs, triggering_holonym = overriding_holonym, -- no triggering_holonym_index } end else -- We loop twice over holonyms, the first time setting `no_fallback` so that we process only category specs for -- the specifically given entry placetype (possibly with preceding qualifiers). The reason for this is to -- correctly handle cases like [[Poblacion IX]]: -- {{place|en|barangay|mun/Roxas|p/Capiz|c/Philippines}}. -- "barangay" falls back to "neighborhood", and without the `no_fallback` loop, the neighborhood cat handler run -- on the mun/Roxas holonym will take precedence over the barangay-specific setting for p/Capiz because we -- check, for each holonym in turn, first for a matching spec through political_division_cat_handler, then a cat -- handler, then a wildcard spec like country/*. During the first no-fallback loop, we disable checking for -- wildcard specs because it seems a fallback matching exactly or through a cat handler on an earlier holonym -- would be better than a wildcard match for the exact entry placetype at a later holonym. (FIXME: But I don't -- know for sure; maybe we should check wildcard holonyms on the exact entry placetype first, or contrariwise -- maybe we should check only exact-match holonyms through political_division_cat_handler on the exact entry -- placetype first, not even checking other cat handlers.) for i, holonym in ipairs(place_desc.holonyms) do if first_holonym_index and i < first_holonym_index then -- continue else local cat_specs, fetched_entry_placetype = fetch_cat_specs(holonym, i, "no_fallback") if cat_specs and cat_specs[1] then return { entry_placetype = fetched_entry_placetype, cat_specs = cat_specs, triggering_holonym = holonym, triggering_holonym_index = i, } end end end for i, holonym in ipairs(place_desc.holonyms) do if first_holonym_index and i < first_holonym_index then -- continue else local cat_specs, fetched_entry_placetype = fetch_cat_specs(holonym, i) if cat_specs and cat_specs[1] then return { entry_placetype = fetched_entry_placetype, cat_specs = cat_specs, triggering_holonym = holonym, triggering_holonym_index = i, } end end end end return nil end -- Turn a list of category specs (see comment at section top) into the corresponding categories (minus the language -- code prefix). The function is given the following arguments: -- (1) the category specs retrieved using find_placetype_cat_specs(); -- (2) the entry placetype used to fetch the entry in `placetype_data` -- (3) the triggering holonym (a holonym object; see comment at top of file) used to fetch the category specs -- (see top-of-section comment); or nil if no triggering holonym. -- The return value is constructed as described in the top-of-section comment. local function cat_specs_to_categories(place_desc, cat_data) local all_cats = {} local cat_specs, entry_placetype, triggering_holonym, triggering_holonym_index = cat_data.cat_specs, cat_data.entry_placetype, cat_data.triggering_holonym, cat_data.triggering_holonym_index if triggering_holonym then for _, cat_spec in ipairs(cat_specs) do local cat if cat_spec == true then cat = m_placetypes.pluralize_placetype(entry_placetype, "ucfirst") .. " " .. m_placetypes.get_placetype_entry_preposition(entry_placetype) .. " +++" else cat = cat_spec end if cat:find("%+%+%+") then local group, key, spec, container_trail = m_placetypes.find_matching_holonym_location { holonym_placetype = triggering_holonym.placetype, holonym_placename = triggering_holonym.unlinked_placename, holonym_index = triggering_holonym_index, place_desc = place_desc, } if group then cat = cat:gsub("%+%+%+", m_strutils.replacement_escape(m_placetypes.get_prefixed_key(key, spec))) insert(all_cats, cat) else mw.log(("Unable to insert category for cat spec '%s' because holonym '%s/%s' did not match a " .. "known location"):format(cat, triggering_holonym.placetype, triggering_holonym.unlinked_placename)) track("cant-match-holonym-for-category-spec") end else insert(all_cats, cat) end end else for _, cat_spec in ipairs(cat_specs) do local cat if cat_spec == true then cat = m_placetypes.pluralize_placetype(entry_placetype, "ucfirst") else cat = cat_spec if cat:find("%+%+%+") then internal_error("Category %s contains +++ but there is no holonym to substitute", cat) end end insert(all_cats, cat) end end return all_cats end -- Return the categories (without initial lang code) that should be added to the entry, given the place description -- (which specifies the entry placetype(s) and holonym(s); see top of file) and a particular entry placetype (e.g. -- "city"). Note that only the holonyms from the place description are looked at, not the entry placetypes in the place -- description. local function get_placetype_cats(place_desc, entry_placetype, from_demonym, form_of_directive) local cats = {} local first_holonym_index = 1 while first_holonym_index <= #place_desc.holonyms do -- Find the category specs (see top-of-file comment) corresponding to the holonym(s) in the place description. local cat_data = find_placetype_cat_specs { entry_placetype = entry_placetype, place_desc = place_desc, first_holonym_index = first_holonym_index, from_demonym = from_demonym, form_of_directive = form_of_directive, } -- Check if no category spec could be found. if not cat_data then break end local triggering_holonym = cat_data.triggering_holonym if not triggering_holonym then internal_error("find_placetype_cat_specs should have returned a triggering holonym: %s", cat_data) end -- Generate categories for the category specs found. extend(cats, cat_specs_to_categories(place_desc, cat_data)) -- Also generate categories for other holonyms of the same placetype, so that e.g. -- {{place|en|city|s/Kansas|and|s/Missouri|c/USA}} generates both [[:Category:en:Cities in Kansas, USA]] and -- [[:Category:en:Cities in Missouri, USA]]. first_holonym_index = cat_data.triggering_holonym_index -- Loop over non-fallback equivalent placetypes to the triggering holonym's placetype, in case it is -- non-canonical (e.g. `cities/San Francisco`). This matches the loop over equivalent places in -- key_holonym_into_place_desc(). local equiv_triggering_placetypes = m_placetypes.get_placetype_equivs(triggering_holonym.placetype, {no_fallback = true}) for _, equiv in ipairs(equiv_triggering_placetypes) do local other_holonyms_of_same_type = place_desc.holonyms_by_placetype[equiv.placetype] if other_holonyms_of_same_type then for _, other_placename_of_same_type in ipairs(other_holonyms_of_same_type) do if other_placename_of_same_type ~= triggering_holonym.unlinked_placename then local overriding_holonym = { placetype = triggering_holonym.placetype, unlinked_placename = other_placename_of_same_type, } local other_cat_data = find_placetype_cat_specs { entry_placetype = entry_placetype, place_desc = place_desc, overriding_holonym = overriding_holonym, from_demonym = from_demonym, form_of_directive = form_of_directive, } if other_cat_data then extend(cats, cat_specs_to_categories(place_desc, other_cat_data)) end end end end end -- If there are any later-specified holonyms that had the modifier :also, try to produce categories for them -- as well. first_holonym_index = first_holonym_index + 1 while first_holonym_index <= #place_desc.holonyms do if place_desc.holonyms[first_holonym_index].continue_cat_loop then break end first_holonym_index = first_holonym_index + 1 end end if cats[1] then return cats end local entry_pt_default, equiv_entry_placetype_and_qualifier = m_placetypes.get_equiv_placetype_prop(entry_placetype, function(pt) return m_placetypes.placetype_data[pt] and m_placetypes.placetype_data[pt].default end, {form_of_directive = form_of_directive}) if entry_pt_default then return cat_specs_to_categories(place_desc, { cat_specs = entry_pt_default, entry_placetype = equiv_entry_placetype_and_qualifier.placetype, -- no triggering holonym }) end return {} end --[==[ Iterate through each type of place and return a list of the categories that need to be added to the entry. The returned categories need to be formatted using `format_cats`, as they can be either topic-style categories (by default) or langname-style categories (if prefixed with `cln:`). The function is passed the overall place spec, which contains all the parsed info on the {{tl|place}} call (see comment at top of file), the parsed arguments (needed for arguments not parsed by `parse_overall_place_spec` and used primarily to add "bare categories" corresponding to toponyms for known locations), and `from_demonym`, which is true if we're being called from {{tl|demonym-noun}} or {{tl|demonym-adj}} (in this case, we only want certain categories added, specifically bare categories corresponding to the specified holonym(s)). ]==] function export.get_cats(args, overall_place_spec, from_demonym) local cats = {} local place_descriptions = overall_place_spec.descs handle_category_implications(place_descriptions, m_placetypes.cat_implications) m_placetypes.augment_holonyms_with_container(place_descriptions) if overall_place_spec.directives then -- not necessarily when called from [[Module:demonym]] for _, directive_terms in ipairs(overall_place_spec.directives) do local spec_cats = directive_terms.spec.cat if spec_cats then if type(spec_cats) == "string" then spec_cats = {spec_cats} end for _, spec_cat in ipairs(spec_cats) do insert(cats, spec_cat) end end if directive_terms.spec.type_prefix then for _, place_desc in ipairs(place_descriptions) do for _, placetype in ipairs(place_desc.placetypes) do if not m_placetypes.placetype_is_ignorable(placetype) then extend(cats, get_placetype_cats(place_desc, placetype, from_demonym, directive_terms.spec.type_prefix)) end end end end end end if not from_demonym then local bare_categories = m_placetypes.get_bare_categories(args, overall_place_spec) extend(cats, bare_categories) end for _, place_desc in ipairs(place_descriptions) do if not from_demonym then for _, placetype in ipairs(place_desc.placetypes) do if not m_placetypes.placetype_is_ignorable(placetype) then extend(cats, get_placetype_cats(place_desc, placetype)) end end end -- Also add generic place categories for the holonyms listed (e.g. a category like -- [[Category:Places in Merseyside, England]]). This is handled through the special placetype "*". extend(cats, get_placetype_cats(place_desc, "*", from_demonym)) end if args.cat then -- not necessarily when called from [[Module:demonym]] for _, cat in ipairs(args.cat) do local split_cats = split_on_comma(cat) extend(cats, split_cats) end end return cats end -- Return the category link for a category, given the language code and the name of the category. local function format_cats(lang, cats, sort_key) local full_cats = {} local langcode = lang:getFullCode() for _, cat in ipairs(cats) do -- 'cln' corresponds to {{cln}}, which generates lang-name categories like [[:Category:English abbreviations]] -- (as opposed to topic categories like [[:Category:en:Abbreviations of states of the United States]]). local cln_cat = cat:match("^cln:(.*)$") if cln_cat then insert(full_cats, lang:getFullName() .. " " .. cln_cat) else insert(full_cats, langcode .. ":" .. cat) end end return require(utilities_module).format_categories(full_cats, lang, sort_key, nil, force_cat or m_placetypes.get_force_cat()) end ----------- Main entry point --[==[ Implementation of {{tl|place}}. Meant to be callable from another module (specifically, [[Module:transclude]]). The single argument `data` is an object with the following fields: * `template_args`: Raw arguments specified by {{tl|place}}, possibly modified by {{tl|tcl}}. * `from_tcl`: True if we're being invoked from {{tl|tcl}}. * `drop_extra_info`: True if we should drop most of the "extra info" specified using extra info arguments (capital, largest city, etc.). Usually true when invoked from {{tl|tcl}}. Note that some extra info is still displayed even when `drop_extra_info` is set in order to establish the context (e.g. {{para|full}} and {{para|modern}}), and any extra info overridden at the {{tl|tcl}} level is displayed regardless. * `extra_info_overridden_set`: Set of booleans specifying, for each extra info arg, whether it was overridden at the {{tl|tcl}} level. This means, for example, that the values are interpreted according to the language in {{para|1}} instead of always defaulting to English, as is the case when {{tl|place}} is called directly. * `form_of_overridden_args`: Set of objects of the form `{new_directive = ``directive``, new_value = ``value``}` for overriding a given form-of directive (the key) with new directive ``directive`` and new unparsed value ``value``. Both the key and the replacing directive should be canonical. ``value`` will be parsed in the same way as a regular form-of directive except that all specified terms are interpreted in the language specified in {{para|1}}, never in English. This is present so that {{tl|tcl}} can be used on abbreviations like [[GDR]] and [[FYROM]], whose equivalents in a foreign language have language-specific expansions but where the rest of the call should stay the same. * `translation_follows`: If true, any translation specified using t= should follow the definition, after a colon, rather than preceding, with the definition in parens. ]==] function export.format(data) local template_args = data.template_args local list_param = {list = true} local boolean_param = {type = "boolean"} local params = { [1] = {required = true, type = "language", default = "und"}, [2] = {required = true, list = true}, ["t"] = list_param, ["tid"] = {list = true, allow_holes = true}, ["cat"] = list_param, ["nocat"] = boolean_param, ["nocap"] = boolean_param, ["sort"] = true, ["pagename"] = true, -- for testing or documentation purposes ["a"] = true, ["addl"] = true, ["def"] = true, -- params that are only used when transcluding using {{tcl}}/{{transclude}}, to transmit information to {{tcl}}. ["tcl"] = true, ["tcl_t"] = list_param, ["tcl_tid"] = list_param, ["tcl_nolb"] = true, ["tcl_nolc"] = boolean_param, ["tcl_noextratext"] = boolean_param, } -- add "extra info" parameters for _, extra_arg_spec in ipairs(export.extra_info_args) do params[extra_arg_spec.arg] = list_param end -- FIXME, once we've flushed out any uses, delete the following clause. That will cause def= to be ignored. if template_args.def == "" then error("Cannot currently pass def= as an empty parameter; use def=- if you want to suppress the definition display") end local args = require("Module:parameters").process(template_args, params) if args.a then track("a") if args.a:find("^[Aa]n?$") or args.a:find("^[Tt]he$") then track("a/article") else error("a= can only be used to specify a definite or indefinite article (and preferably use |nocap=1 instead to get the initial letter lowercase); see especially the documentation on the [[Template:place#Mixed format|mixed format]], which can be used to add arbitrary text before the placetype") end end data.args = args local overall_place_spec = parse_overall_place_spec(data) data.overall_place_spec = overall_place_spec return get_def(data) .. ( args.nocat and "" or format_cats(args[1], export.get_cats(args, overall_place_spec), args.sort)) end --[==[ Actual entry point of {{tl|place}}. ]==] function export.show(frame) return export.format { template_args = frame:getParent().args, } end return export 14006m3e32f16t6msgvk7fsloksykq2 231544 231543 2026-04-16T11:56:16Z Lee 19 Lee විසින් යලියොමුවක් නොදමාම [[Module:place/Module:place]] පිටුව [[Module:place]] වෙත ගෙනයන ලදී 231542 Scribunto text/plain local export = {} local force_cat = false -- set to true for testing local m_placetypes = require("Module:place/placetypes") local m_links = require("Module:links") local memoize = require("Module:memoize") local m_strutils = require("Module:string utilities") local m_table = require("Module:table") local debug_track_module = "Module:debug/track" local en_utilities_module = "Module:en-utilities" local form_of_module = "Module:form of" local languages_module = "Module:languages" local parse_interface_module = "Module:parse interface" local parse_utilities_module = "Module:parse utilities" local parameter_utilities_module = "Module:parameter utilities" local utilities_module = "Module:utilities" local enlang = require(languages_module).getByCode("en") local rmatch = m_strutils.match local rfind = m_strutils.find local ulen = m_strutils.len local split = m_strutils.split local dump = mw.dumpObject local insert = table.insert local concat = table.concat local pluralize = require(en_utilities_module).pluralize local extend = m_table.extend local unpack = unpack or table.unpack -- Lua 5.2 compatibility local internal_error = m_placetypes.internal_error local process_error = m_placetypes.process_error local placetype_data = m_placetypes.placetype_data --[==[ intro: ===Introduction=== This module implements {{tl|place}}, which is a template for standardizing the description and categorization of toponyms (terms that refer to locations such as cities, countries, rivers, etc.). The following modules support this template: * [[Module:place]]: The main module. * [[Module:place/placetypes]]: A module containing data on placetypes, as well as utilities for working with placetypes; category generation handlers for adding categories based on placetypes; and display handlers for displaying holonyms (i.e. containing locations) of a specific type. FIXME: Maybe split out the code from the data. * [[Module:place/locations]]: A module containing data on known locations, as well as utilities for working with such locations. FIXME: Maybe split out the code from the data. * [[Module:category tree/topic/Places]]: A category tree module for generating the descriptions of all categories generated by {{tl|place}}. * [[Module:place doc]]: A module that generates documentation tables describing known placetypes and locations. ===Basic terminology=== The basic terminology used in this and associated {{tl|place}} modules is: * A ''location'' (or equivalently, a ''place'') is any geographic feature (either natural or geopolitical), either on the surface of the Earth or elsewhere. Examples of types of natural places are rivers, mountains, seas and moons; examples of types of geopolitical places are cities, countries, neighborhoods and roads. A ''known location'' is specifically a location whose properties are specified in the {{tl|place}} modules; more on them below. * Specific places are identified by names, referred to as ''toponyms'' or ''placenames''. A given place will often have multiple names, and a given toponym may be ambiguous, referring to multiple possible locations. Specifically: ** There may be names including different amounts of disambiguating information (`Tucson` vs. `Tucson, Arizona` vs. `Tucson, Arizona, USA` or `New York` vs. `New York City` vs. `New York, New York`); abbreviations (`NYC` for `New York City`, `USA` for `United States of America`); ''official'' vs. ''short'' names (e.g. `Union of Soviet Socialist Republics` vs. `Soviet Union`); spelling variations (`Cracow` vs. `Krakow` vs. `Kraków`); current vs. former names (`Saint Petersburg` vs. `Leningrad` vs. `Petrograd`); [[exonym]]s vs. [[endonym]]s (e.g. `Tavastia Proper` vs. `Kanta-Häme`, both referring to the same administrative region in Finland); alternative names not due to any of the above reasons (`Bashkiria` vs. `Bashkortostan`); etc. In addition, each language that has an opportunity to refer to the place will have its own name, with the same sorts of variations as exist in English. ** Examples of ambiguous toponyms are `New York` (either a city or a state); `Georgia` (either a state of the US or an independent country in the Caucasus Mountains); `Paris` (either the capital of France or various small cities and towns in the US); `Mexico` (either a country, a state of that country, or the capital city of that country); and `San Antonio` (besides being a major city in Texas, it is the name of dozens of settlements of all sorts throughout the US and Latin America, and a least 181 distinct [[barangay]]s in the Philippines). * A ''placetype'' is the (or a) type that a location belongs to (e.g. `city`, `state`, `river`, `administrative region`, `[[regional county municipality]]`, etc.). ** It is common for locations to be described using multiple placetypes, and even sometimes known locations have multiple placetypes that they may be identified by (e.g. American Samoa can be identified either as an `unincorporated territory`, an `overseas territory` or just a `territory`). Both the {{tl|place}} template and the known location data allow a given location to be identified by multiple placetypes. When in doubt as to the correct placetype or placetypes for a given location, generally follow how Wikipedia describes the place. ** Some placetypes themselves are ambiguous; e.g. an ''area'' can variously refer to a top-level administrative division (specifically of Kuwait); a geographic region, generally without unambiguously defined borders; or a section of a city, similar to a neighborhood. The term ''district'' is similarly ambiguous. A ''[[prefecture]]'' in the context of Japan is similar to a province, but a prefecture in France is the capital of a ''[[department]]'' (which is similar to a county). Some of this ambiguity is currently handled automatically; e.g. the ambiguity of areas and districts is handled by looking at the ''holonyms'', or containing locations, specified for a given place. But sometimes it is necessary to use a qualifier before the placetype to disambiguate; for example to refer to a French prefecture, use the placetype `French prefecture` instead of just `prefecture`. (FIXME: Handle this automatically.) * A ''holonym'', in the context of a description of a place, is a placename that refers to a larger-sized entity that contains the location being described. For example, `Arizona` and `United States` are holonyms of `Tucson`, and `United States` is a holonym of `Arizona`. * A ''place invocation'' consists of the invocation of {{tl|place}}, including all its parameters. Place invocations may contain one or more ''place descriptions'', each of which provides a description of the location, including its placetype or types, any holonyms, and any additional raw text needed to properly explain the place in context. Place invocations may also contain named parameters specifying zero or more English ''glosses'' or translations (for foreign-language toponyms) and any attached ''extra information'' such as the capital, largest city, official name, modern name or full name. Multiple place descriptions in a single invocation are separated by a numbered parameter starting with a semicolon, and are used when it is necessary to provide two or more definitions of a single location for proper categorization. For example, [[Vatican City]] is defined both as a city-state in Southern Europe and as an enclave within the city of Rome, follows: : {{tl|place|en|city-state|r/Southern Europe|;,|an <<enclave>> within the city of [[Rome]], [[Italy]]|cat=Places in Rome|official=Vatican City State}}. Similar things need to be done for places like [[Crimea]] that are claimed by two different countries with different definitions and administrative structures. ** There are two types of place descriptions, ''new-style'' and ''old-style''. (The use of the terms "new" and "old" indicates chronological precedence in the development of {{tl|place}}, but is not meant to pass any value judgments on the two types, and does not indicate any intent to deprecate old-style descriptions. Both types of descriptions are useful; for example, old-style descriptions are generally more succinct but less flexible.) The above invocation shows both types: an old-style description followed by a new-style description. Old style descriptions use multiple numbered parameters, where the first parameter (after the language code) specifies the placetype or types, and following parameters specify either holonyms (which are always of the form ` ``placetype``/``placename`` `) or raw text (which is identifiable by not having a slash in it). New-style descriptions use a single parameter, where both placetypes and holonyms are surrounded by double angle brackets, and all remaining text is raw (displayed as-is). In both types of descriptions, holonyms include a slash in them to separate the placetype (which is mandatory and often abbreviated) from the placename. ** In the context of a place description, there are two types of placetypes. The ''entry placetypes'' are the placetypes of the place being described, while the ''holonym placetypes'' are the placetypes of the holonyms that the place being described is located within. Currently, a given place can have multiple placetypes specified (e.g. [[Normandy]] is specified using the ''compound placetype'' `administrative region/former province/and/medieval kingdom`) while a given holonym can have only one placetype associated with it. Holonym placetypes are frequently abbreviated (e.g. `r` for `region`, `s` for `state`, `co` for `county`, etc.), while stylistically it is preferred to spell out the entry placetype (except for some long placetypes with well-known abbreviations, such as `CDP` or `cdp` for `[[census-designated place]]`). ** All holonyms in place descriptions are automatically linked as if surrounded by {{tl|l|en|...}}; i.e. if double brackets do not occur in the holonym, the entire holonym will be linked to the corresponding Wiktionary article. For this reason, the holonym should generally be in the same format as the canonical Wiktionary article describing the location; see below). * A ''known location'' is a location whose properties are specifically defined in the {{tl|place}} modules. Generally each such location has an associated category, and known locations exist in a containment hierarchy, where the immediately containing known location is known as the ''container'' of the location and the chain of successive containing locations is known as the ''container trail''. Generally the location's container corresponds to the first parent of its category. Note that some known locations belong to more than one immediate container; for example, Russia belongs to both Europe and Asia. ===More about placetypes=== # The following general categories of placetypes exist: ## ''Natural features'' such as lakes, mountains, mountain ranges, islands, archipelagoes, moons, stars, asteroids, etc. ## ''Continents'', ''supercontinents'' (groupings of continents where it makes sense, such as `America` and `Eurasia`) and ''continent-level regions'' (grouping of countries in a given continent, such as `Central America` and `Polynesia`). ## ''Political entities'', which are generally classified as either ''polities'' (top-level entities such as countries), ''subpolities'' or ''political divisions'' (non-sovereign divisions, often specifically ''administrative divisions'', of a polity, where an administrative division has a governmental or statistical function and almost always has unambiguously defined boundaries), or ''settlements'' (e.g. cities; towns; villages; and divisions of a city such as neighborhoods, wards, [[barrio]]s and [[barangay]]s, which may or may not be formal administrative divisions and may or may not have unambiguous boundaries). ## ''Geographic regions'', which refer to recognized areas of the Earth (either with a natural geographic, political or cultural significance, often of a historical nature). Such regions can be of greatly varying size, may exist either within a single country or spanning multiple countries or (more often) parts of multiple countries, and may not have well-defined boundaries. They should be distinguished from ''administrative regions'', which exist within a single country and have well-defined boundaries and a political or administrative function. Geographic regions are categorized using the generic term ''geographic and cultural areas'' to emphasize that (a) they have no administrative significance; (b) they may vary greatly in size; and (c) their cohesion is due either to natural geographic boundaries, such as rivers or mountain ranges, or to sharing some cultural characteristics. ## ''Man-made structures'' below the level of a settlement or neighborhood, such as airports, roads, individual buildings, and the like. (Note that such structures, even if named, often do not meet the [[WT:CFI]] criteria; this is particularly the case for roads.) # Placetypes support aliases, and the mapping to canonical form happens early on in the processing. For example, `state` can be abbreviated as `s`; `administrative region` as `adr`; `regional county municipality` as `rcomun`; etc. Some placetype aliases handle alternative spellings rather than abbreviations. For example, `departmental capital` maps to `department capital`, and `home-rule city` maps to `home rule city`. Placetype abbreviations are particularly useful in holonym specs, because every holonym must be accompanied by its placetype, for disambiguation purposes. # A ''placetype qualifier'' is an adjective prepended to the placetype to give additional information about the place being described. For example, a given place may be described as a `small city`; logically this is still a city, but the qualifier `small` gives additional information about the place. Multiple qualifiers can be stacked, e.g. `small affluent beachfront unincorporated community`, where `unincorporated community` is a recognized placetype and `small`, `affluent` and `beachfront` are qualifiers. (As shown here, it may not always be obvious where the qualifiers end and the placetype begins.) For the most part, placetype qualifiers do not affect categorization; a `small city` is still a city and an `affluent beachfront unincorporated community` is still an unincorporated community, and both should still be categorized as such. But some qualifiers do change the categorization. In particular, a `former province` is no longer a province and should not be categorized in e.g. [[:Category:Provinces of Italy]], but instead in a different set of categories, e.g. [[:Category:Historical political subdivisions]]. There are several terms treated as equivalent for this purpose: `abandoned` `ancient`, `extinct`, `historic(al)`, `medi(a)eval` and `traditional`. Another set of qualifiers that change categorization are `fictional` and `mythological`, which cause any term using the qualifier to be categorized respectively into [[:Category:Fictional locations]] and [[:Category:Mythological locations]]. ===More about toponyms=== # Toponyms may be: ## ''simple'' (not including any containing location in its name, such as `Tucson`) or ''multipart'' (including one or more containing locations, such as `Tucson, Arizona` or `Tucson, USA` or even `Tucson, Arizona, USA`); ## ''bare'' (not including the word `the` if the location normally requires this article when following a preposition, such as `United States`, `Gambia` or 'Community of Madrid') or ''prefixed'' (including the word `the` as needed, such as `the United States`, `the Gambia` or `the Community of Madrid`); ## ''elliptical'' (just the placename without any disambiguating placetype, such as `Durham`, `New York` or `Mexico`) or ''full'' (containing a disambiguating placetype or similar identifier if one is commonly included, such as the city of `Durham` (in England) vs. its containing county `County Durham`; the US city `New York City` vs. its containing state `New York`; or the three-way distinction between `Mexico` (the country), `Mexico City` (the capital of this country) and `(the) State of Mexico` (one of the states of the country Mexico, mostly surrounding but not including Mexico City)). # The ''canonical Wiktionary article'' is the main article on Wiktionary where a location is described. Canonical articles, per the above terminology, are generally ''simple'' and ''bare'', but may be either ''full'' or ''elliptical''. The fact that a given article is canonical is often identifiable by the fact that translations are housed there an not somewhere else. For example, most counties of the US and Canada include the word `County` in their canonical article name, but most counties elsewhere do not. `Washington, D.C.` is one of the few cases where a non-simple toponym is used as the canonical article; this is based on common usage, especially by residents of the city in question (who commonly refer to it as "D.C." but rarely just as "Washington"). ===More about known locations=== # The following types of known locations are defined in this module: ## Continents, supercontinents and continent-level regions, into which countries are grouped. Specifically: ### At the top level below `Earth` are the supercontinents `America` and `Eurasia` and the continents `Africa`, `Oceania` and `Antartica`. ### `America` is further broken down into the continents `North America` (in turn containing the continental regions `Central America` and `Caribbean`, with the United States, Canada and Mexico directly under North America) and `South America`. ### `Eurasia` is further broken down into the continents `Europe` and `Asia`. ### `Oceania` is further broken down into the continental regions `Melanesia`, `Micronesia` and `Polynesia`, with Australia` directly under `Oceania. ### Under the above-specified divisions are countries. Some countries are placed in more than one continent or continent-level region, either because they actually span two continents (e.g. Russia, Turkey, Kazakhstan, Egypt) or because they are politically considered to belong to a continent different from the one they are geographically in (Cyprus, Georgia, Armenia, etc.). ## Political entities, including: ### Top-level political entities, which includes: #### Countries, with a fairly liberal definition, notably including all UN-recognized countries plus some others that are commonly considered countries, even if not all other countries recognize them as such or consider them completely independent (notably, Kosovo, Palestine, Taiwan, Western Sahara, Niue and the Cook Islands). #### Pseudo-countries, which include areas calling themselves countries that are de-facto not under the control of the country that they are internationally considered part of (e.g. Abkhazia, South Ossetia, Transnistria); dependent/external/etc. territories of countries (e.g. American Samoa [US], Bermuda [UK], Christmas Island [Australia], Easter Island [Chile]); constituent countries, autonomous territories and the like (Aruba, Curaçao and Sint Maarten of the Netherlands; Greenland and the Faroe Islands of Denmark; etc.; but notably not including England, Scotland, Northern Ireland and Wales, which are treated as regular countries); and a grab bag of other entities that have a semi-independent existence, such as Hong Kong, Macau, Guadeloupe, Martinique and the like. Currently, the actual distinction in treatment between "countries" and "country-like entities" is minimal, but in the future we might restrict the sorts of subcategories of country-like entities more than regular countries. #### Former countries, e.g. the Soviet Union, Yugoslavia, West Germany and the Roman Empire. These are much more limited in the sorts of subcategories allowed, because generally locations, especially cities, should be described from the perspective of which political entity they are currently located in (e.g. "an ancient Roman town in modern Syria") and categorized as such. ### Subpolities. Generally we only list top-level administrative divisions of countries (and only fairly major countries are usually included), but sometimes we list second-level administrative divisions, as in the case of the United Kingdom (where the top-level administrative divisions of the four constituent countries are listed) and China (where major prefecture-level cities are listed, and are considered administrative divisions rather than cities). ### Cities. Only major cities get categories, with the definition of "major" varying by country but often including those where the city population itself (sometimes the metro area) is >= 1,000,000 people. # A distinction should be made in the {{tl|place}} modules between ''keys'' and ''placenames''. Placenames are as the location appears in a holonym, and are generally in the same format as the canonical Wiktionary article describing the location so that when formatted as a link, the link goes to the right article; i.e. they are simple and bare, and may be full or elliptical according to Wiktionary conventions. The ''canonical key'' of a location is how the location's category is named, and always uniquely identifies the location from among the known locations in this module (but not necessarily among all possible locations). In particular, subpolities usually have multipart keys that include the containing location, such as `Anhui, China` (not just `Anhui`); `Arizona, USA` (not just `Arizona`, and also not `Arizona, United States`); and `Herefordshire, England` (not just `Herefordshire`, and also in this case not `Herefordshire, UK` or `Herefordshire, England, UK` or any other possible variation). Cities are normally simple, but some cities are multipart for disambiguation purposes (e.g. `Newcastle, New South Wales` for the city in Australia vs. `Newcastle upon Tyne` for the identically-named city in England). Canonical keys may have ''key aliases'', other ways of referring to the location that are not necessarily unique (e.g. `Newcastle` is a key alias for both of the above-mentioned cities), and city keys with diacritics generally have diacriticless aliases, such as canonical key `Düsseldorf` vs. key alias `Dusseldorf`, or canonical key `Łódź` vs. key alias `Lodz`. # Known locations are gathered into ''groups'' with similar properties, such as all the states of the United States; all the (ceremonial) counties of England (see below); and all the "sufficiently major" prefecture-level cities in China (where a prefecture-level city is a prefecture surrounding a major city with a unified government and is more like a prefecture, i.e. a major administrative division just underneath a province, than like a city, and where "sufficiently major" is defined according to the population of either the total prefecture or the urban area of the city). Note that there are multiple types of counties in England, with overlapping but non-identical names and boundaries; there are, in particular, ''ceremonial counties'', ''local government counties'' and ''historic counties''; ''ceremonial counties'' have only ceremonial administrative functionality but unlike local government counties (a) don't frequently change their boundaries or nature, (b) correspond more closely to historic county boundaries and names, and (c) are what Englanders usually identify themselves with, and so they are used as top-level divisions rather than local government counties. # Some known locations have ''aliases'' defined, which are of two types. ''Display aliases'' map holonyms to their canonical form near the beginning of processing (in particular before the displayed output is formatted). For example, `US`, `U.S.`, `USA`, `U.S.A.` and `United States of America` are all canonicalized to `United States` (if identified as a country), and display as `United States`. Similarly, the foreign forms `Occitanie` (as a region or administrative region) and `Noord-Brabant` (as a province) are mapped to `Occitania` and `North Brabant` for display purposes. There are also ''category aliases'', so that if e.g. `Republic of Macedonia` is encountered, it will display as such but categorize as `North Macedonia`. (This is because, among other reasons, `Republic of Macedonia` is normally preceded by `"the"` while `North Macedonia` is not, so a call {{tl|place|en|a <<city>> in the <<c/Republic of Macedonia>>}} would look wrong if `Republic of Macedonia` were converted to `North Macedonia` during display, as the result would be `a city in the North Macedonia`. There are also frequently political connotations to different category aliases, e.g. `Burma` vs. `Myanmar`.) All of these aliases are sensitive to the placetype specified. For example, `Mexico` as a state is categorized under `State of Mexico, Mexico` but `Mexico` the country is categorized as just `Mexico`. ===Categories=== There are two main types of categories: # Categories for known locations, divided into: ## Top-level polity categories (e.g. [[:Category:United States]], [[:Category:Taiwan]], [[:Category:South Ossetia]], [[:Category:Bermuda]], [[:Category:Soviet Union]], [[:Category:West Germany]]). ## Subpolity categories ([[:Category:Arizona, USA]], [[:Category:Hunan]], [[:Category:Kagoshima Prefecture]], [[:Category:Cluj County, Romania]]). For historical reasons, different formats are used for the subpolities of different polities. Increasingly, we are moving towards always including the polity name in the subpolity category, but whether the subpolity type is included and where it is included (cf. [[:Category:Cluj County, Romania]] vs. [[:Category:County Cork, Ireland]] is still inconsistent and will probably remain that way, based on how the subpolity is normally referred to. ## City categories ([[:Category:Tokyo]], [[:Category:New York City]], [[:Category:Jaipur]]). Normally these do not include the containing subpolity, but may do so in order to disambiguate. # Categories for placetypes, divided into: ## "Immediate" political and non-political division categories ([[:Category:States of the United States]], [[:Category:Municipalities of Tocantins, Brazil]], [[:Category:Ghost towns in Arizona, USA]]). These are name categories, whose purpose is to contain locations of the specified type. "Immediate" here refers to the fact that the location in the category name is the immediately-containing polity. Usually these categories use the preposition "of", but sometimes "in". (Specifically, "of" typically implies that the placetype in question has an official or semi-official status, whereas "in" implies there is no such official status, but common usage may override this.) The form of the toponym appearing in these categories is always the same as that of the corresponding toponym category except that the word "the" may appear (e.g. [[:Category:States of the United States]]), whereas it doesn't appear in the toponym category itself ([[:Category:United States]], no "the"). ## "Skip-polity" categories for second-level political and non-political divisions of a country or other top-level polity (e.g. [[:Category:Counties of the United States]], [[:Category:Municipalities of Brazil]] and [[:Category:Subprefectures of Japan]]). These have several purposes: * They group the immediate division categories mentioned previously. * They categorize "straggler" topoynms that (often improperly) fail to mention the subpolity they belong to, but only the top-level polity. * If categories do not exist for the first-level divisions of a country (and sometimes even when they do), they group all toponyms of the specified type for the specified country. For example, Lithuania is divided into first-level counties and second-level municipalities, but since we don't currently have categories for Lithuanian counties, all municipalities go under [[:Category:Municipalities of Lithuania]] rather than under a category for a specific county. In addition, even though we do have categories for Japanese prefectures (a first-level division), all subprefectures (a second-level division) go under [[:Category:Subprefectures of Japan]] because there aren't very many of them (see below). ## "Generic placetype" categories, both of the immediate and skip-polity type (immediate [[:Category:Cities in California, USA]] and [[:Category:Neighborhoods of the Bronx]]; skip-polity [[:Category:Villages in Ivory Coast]], [[:Category:Geographic and cultural areas of England]], [[:Category:Rivers in Egypt]] and [[:Category:Places in the Philippines]]). As mentioned above, "generic" placetypes occur in every polity (although the set of generic placetypes allowed for cities is a subset of those allowed for top-level polities and subpolities). Usually these categories use the preposition "in", but sometimes "of". As above, skip-polity categories group immediate categories, and in addition there are various reasons a toponym entry is categorized into a skip-polity category. (For example, as a general rule, geographic and cultural areas only categorize at the country level, not the subpolity level, both because there often aren't very many in a given country and because they often span multiple subpolities.) The parent categories of a given category depend on its type. Generally, location categories have placetype categories as their first parent, and vice-versa. Specifically: # Top-level country categories have as their parent e.g. [[:Category:Countries in Europe]], [[:Category:Countries in Central America]] or [[:Category:Countries in Polynesia]], using the most specific continental-level region the country is contained in. # Pseudo-countries are under [[:Category:Country-like entities]] as a neutral designation. There aren't enough of them to subcategorize under continent-level regions. # Former countries are under [[:Category:Former countries and country-like entities]]. # Subpolity categories are usually under a placetype category whose placetype is the canonical (first-listed) placetype of the subpolity and whose toponym is the immediately containing polity, but there are exceptions. Specifically, sometimes if a polity has multiple types of subpolities, they are combined (e.g. [[:Category:States and territories of Australia]], [[:Category:Federal subjects of Russia]]). In addition, sometimes a less specific but more identifiable placetype is used instead of the canonical one (e.g. [[:Category:Regions of France]] when the canonical placetype is "administrative region"). The same rules and exceptions generally apply when categorizing subpolities themselves; e.g. both the Australian state of Queensland and territory of Northern Territory go under [[:Category:en:States and territories of Australia]] rather than separately under [[:Category:en:States of Australia]] and [[:Category:en:Territories of Australia]]. In addition, sometimes subpolities may "skip a level" if there aren't very many. For example, there are only 26 subprefectures of Japan (14 under Hokkaido and 12 more scattered under five other prefectures). Rather than have e.g. [[:Category:en:Subprefectures of Kagoshima Prefecture]] containing at most two entries and [[:Category:en:Subprefectures of Miyazaki Prefecture]] containing at most one, they are all grouped under the so-called "skip-subpolity category" [[:Category:en:Subprefectures of Japan]]. # City categories are always under e.g. [[:Category:Cities in the United States]] (e.g. [[:Category:New York City]] is so-placed, even though [[:Category:Cities in New York, USA]] exists). However, they may have a second, more-specific parent (e.g. [[:Category:Cities in New York, USA]] in the case of New York City). The city entries themselves will go under the more specific parent if it exists. # Immediate placetype categories for second-level divisions of a country generally have, respectively, a "toponym parent" that is the toponym mentioned in the category and a "skip-polity parent" that groups all subpolity placetype categories of a specific type and containing polity. For example, [[:Category:Counties of Arizona, USA]] has toponym parent [[:Category:en:Arizona, USA]] and skip-polity parent [[:Category:en:Counties of the United States]]. Sometimes the default skip-polity parent is overridden or disabled entirely. For example, in the US, most states are divided into counties but Louisiana is divided into parishes and Alaska into boroughs. It would make no sense to put [[:Category:Parishes of Louisiana, USA]] under [[:Category:Parishes of the United States]] (which would only have one subcategory), so we include them under [[:Category:Counties of the United States]]. An alternative would be to name the skip-polity category to explicitly include parishes and boroughs; this would get awkward here but is done in some cases. Similarly, [[:Category:Regional county municipalities of Quebec]] is placed under [[:Category:Regional municipalities of Canada]] since that name is used in other provinces. Meanwhile, [[:Category:Regional districts of British Columbia]] disables its skip-polity category since no other province or territory of Canada has regional districts or comparable subpolities under a different name (an alternative would be to place them under [[:Category:Counties of Canada]], since they are sort of comparable to counties). # Placetype categories for first-level divisions of a country similarly (e.g. [[:Category:States of the United States]]) have a toponym parent (in this case [[:Category:United States]]), but in place of the skip-polity parent they have two other parents: a "bare placetype" parent (in this case [[:Category:States]]) and the "generic" parent [[:Category:Political divisions of specific countries]]. (There is also a bare [[:Category:Political divisions]] that groups "bare placetype" categories.) Skip-polity placetype categories for second-level divisions of a country (e.g. [[:Category:Counties of the United States]]) work the same. Placetype categories for countries work likewise except they are missing the generic parent. ===Place descriptions=== A given place description is defined internally in a table of the following form: ```{ placetypes = {"``placetype``", "``placetype``", ...}, holonyms = { { -- holonym object; see below placetype = "``placetype``" or nil, display_placename = "``placename``", unlinked_placename = "``placename``", langcode = "``langcode``" or nil, no_display = BOOLEAN, needs_article = BOOLEAN, force_the = BOOLEAN, affix_type = "``affix_type``" or nil, pluralize_affix = BOOLEAN, suppress_affix = BOOLEAN, continue_cat_loop = BOOLEAN, }, ... }, order = { ``order_item``, ``order_item``, ... }, -- (only for new-style place descriptions), joiner = "``joiner_string``" or nil, holonyms_by_placetype = { ``holonym_placetype`` = {"``placename``", "``placename``", ...}, ``holonym_placetype`` = {"``placename``", "``placename``", ...}, ... }, }``` Holonym objects have the following fields: * `placetype`: The canonicalized placetype if specified as e.g. `c/Australia`; nil if no slash is present (in which case the placename in `display_placename` refers to raw text). * `display_placename`: The placename or raw text, in the format to be displayed. Placename display aliases have already been resolved. It is raw text if `placetype` is nil. * `unlinked_placename`: Same as `display_placename` but with links and HTML removed. * `langcode`: The language code prefix if specified as e.g. `c/fr:Australie`; otherwise nil. * `no_display`: If true (holonym prefixed with !), don't display the holonym but use it for categorization. * `needs_article`: If true, prepend an article if the placename needs one (e.g. `United States`). * `force_the`: If true, always prepend the article `the`. Example use: holoynm 'city:pref:the/Gold Coast', which gets formatted as `(the) city of the [[Gold Coast]]`. * `affix_type`: Type of affix to prepend (values `pref` or `Pref`) or append (values `suf` or `Suf`). The actual affix added is the placetype (capitalized if values `Pref` or `Suf` are given), or its plural if `pluralize_affix` is given. Note that some placetypes (e.g. `district` and `department`) have inherent affixes displayed after (or sometimes before) them. * `pluralize_affix`: Pluralize any displayed affix. Used for holonyms like `c:pref/Canada,US`, which displays as `the countries of Canada and the United States`. * `suppress_affix`: Don't display any affix even if the placetype has an inherent affix. Used for the non-last placenames when there are multiple and a suffix is present, and for the non-first placenames when there are multiple and a prefix is present. * `continue_cat_loop`: If true (holonym used :also), continue producing categories starting with this holonym when preceding holonyms generated categories. Note that new-style place descs (those specified as a single argument using <<...>> to denote placetypes, placetype qualifiers and holonyms) have an additional `order` field to properly capture the raw text surrounding the items denoted in double angle brackets. The ``order_item`` items in the `order` field are objects of the following form: ```{ type = "``order_type``", value = "STRING" or INDEX, }``` Here, the ``order_type`` is one of `"raw"`, `"qualifier"`, `"placetype"` or `"holonym"`: * `"raw"` is used for raw text surrounding `<<...>>` specs. * `"qualifier"` is used for `<<...>>` specs without slashes in them that consist only of qualifiers (e.g. the spec `<<former>>` in `<<former>> French <<colony>>`). * `"placetype"` is used for `<<...>>` `specs without slashes that do not consist only of qualifiers. * `"holonym"` is used for holonyms, i.e. `<<...>>` specs with a slash in them. For all types but `"holonym"`, the value is a string, specifying the text in question. For `"holonym"`, the value is a numeric index into the `holonyms` field. It should be noted that placetypes and placenames occurring inside the holonyms structure are canonicalized, but placetypes inside the placetypes structure are as specified by the user. Stripping off of qualifiers and canonicalization of qualifiers and bare placetypes happens later. The information under `holonyms_by_placetype` is redundant to the information in holonyms but makes categorization easier. The holonym placenames listed here already have category aliases applied. For example, the call {{tl|place|en|city|s/Pennsylvania|c/US}} will result in the return value ```{ placetypes = {"city"}, holonyms = { { placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania" }, { placetype = "country", display_placename = "United States", unlinked_placename = "United States" }, }, holonyms_by_placetype = { state = {"Pennsylvania"}, country = {"United States"}, }, }``` Here, the placetype aliases `s` and `c` have been expanded into `state` and `country` respectively, and the placename display alias `US` has been expanded into `United States`. PLACETYPES is a list because there may be more than one. For example, the call {{tl|place|en|city/and/municipality|p/[[Kwango]] Province|c/Congo}} will result in the return value ``` { placetypes = {"city", "and", "municipality"}, holonyms = { { placetype = "province", display_placename = "[[Kwango]] Province", unlinked_placename = "Kwango Province" }, { placetype = "country", display_placename = "Congo", unlinked_placename = "Congo" }, }, holonyms_by_placetype = { country = {"Congo"}, }, }``` Here, the `unlinked_placename` field has removed links from `display_placename`. The value in the key/value pairs is likewise a list; e.g. the call {{tl|place|en|city|s/Kansas|and|s/Missouri}} will return ``` { placetypes = {"city"}, holonyms = { { placetype = "state", display_placename = "Kansas", unlinked_placename = "Kansas" }, { display_placename = "and", unlinked_placename = "and" }, { placetype = "state", display_placename = "Missouri", unlinked_placename = "Missouri" }, }, holonyms_by_placetype = { state = {"Kansas", "Missouri"}, }, } ``` Note that in `get_cats()` (which runs after the display form has been generated), further changes to the holonym structure are made to aid in categorization. For example, after `handle_category_implications()` and `augment_holonyms_with_container()` are called, the above structure will look more like ``` { placetypes = {"city"}, holonyms = { { placetype = "state", display_placename = "Kansas", unlinked_placename = "Kansas" }, { placetype = "country", unlinked_placename = "United States" }, { display_placename = "and", unlinked_placename = "and" }, { placetype = "state", display_placename = "Missouri", unlinked_placename = "Missouri" }, { placetype = "country", unlinked_placename = "United States" }, }, holonyms_by_placetype = { state = {"Kansas", "Missouri"}, country = {"United States"} }, } ``` ===Overall place specs=== The overall place spec parsed by `parse_overall_place_spec` has the following fields: * `lang`: The language object (from {{para|1}}). * `args`: The parsed arguments from the {{tl|place}} call. * `directives`: List of form-of directives (starting with `@`) parsed from the numeric args beginning with {{para|2}}. Each directive contains fields `directive` (the directive as specified by the user, e.g. `"former name of"`); `terms` (list of term objects for the terms specified by the user); `conj` (conjunction specified by the user using inline modifier `<conj:...>`, or {nil}); `spec` (the corresponding directive spec from `all_form_of_directives`); `pretext` (the text to display directly before the directive); `posttext` (the text to display directly after the directive; {nil} except for the last directive). * `descs`: List of one or more place description objects parsed from the numeric args beginning with {{para|2}}, as described above. * `extra_info`: List of extra-info objects for extra info specified using arguments such as {{para|capital}}, {{para|modern}}, etc. Objects are in the order they should be displayed, and each object contains fields `spec` (the spec for the type of extra info, taken from `export.extra_info_args`), `terms` (list of term objects for the terms specified by the user); and `conj` (conjunction specified by the user using inline modifier `<conj:...>`, or {nil}). ===Category determination=== The algorithm to find the categories to which a given place belongs works off of a place description (which specifies the entry placetype(s) and holonym(s); see above). If there are multiple place descriptions, each is processed independently to generate categories. Likewise, if there are multiple entry placetypes in a given place description, each is processed independently with all the holonyms of the description to generate categories. Furthermore, before the category-generation algorithm runs, earlier steps have modified the holonyms of the place description (inserting containing polities whenever possible; see the description above of `handle_category_implications()` and `augment_holonyms_with_container()`). Given a single entry placetype and a place description, the algorithm to generate categories processes holonyms from left to right until it finds one that "matches" in that it produces one or more categories. At that point it attempts to generate categories for all other holonyms in the place description of the same placetype. Normally, it then stops processing holonyms, but if a holonym is marked using the `:also` modifier, the category generation process starts over starting with that holonym (or the leftmost such remaining holonym, if there is more than one marked with `:also`). This makes it possible, for example, to specify the description of a river that passes through two different types of political divisions (e.g. Alberta and the Northwest Territories), or categorize a geographic region at both the continent and country level, such as this: <pre> {{place|en|historical region|r/Eastern Europe|located in southeastern|c:also/Poland|*and western|c/Ukraine}} </pre> Here, `r/Eastern Europe` has a category implication that adds `cont/Europe` as a holonym directly after it, which causes the page to be categorized into [[:Category:en:Geographic and cultural areas of Europe]]. The category generation process would normally stop at this point, but the presence of `:also` causes it to restart with `c/Poland` and generate the category [[:Category:en:Geographic and cultural areas of Poland]]. After doing this, it looks for other holonyms of the same placetype as `c/Poland` (i.e. other countries), which causes it to process `c/Ukraine` and generate the category [[:Category:en:Geographic and cultural areas of Ukraine]]. The category generation process works off of the `placetype_data` table, which specifies various properties for placetypes, such as how to display a holonym of that placetype as well as how to categorize certain pages where the {{tl|place}} call contains the specified placetype as an entry placetype. For example, the entry for `city-state` in [[Module:place/placetypes]] might look like ``` ["city-state"] = { link = true, category_link = "[[sovereign]] [[microstate]]s consisting of a single [[city]] and [[w:dependent territory|dependent territories]]", has_neighborhoods = true, class = "settlement", ["continent/*"] = {"City-states", "Cities", "Countries", "Countries in +++", "National capitals"}, default = {"City-states", "Cities", "Countries", "National capitals"}, }, ``` Here, the keys specify, respectively: # If `city-state` occurs as an entry placetype, link it to the corresponding Wiktionary entry (that is what `true` means in `link = true`). # Use the specified `category_link` text for categories such as [[:Category:City-states]]. # City-states are "city-like", i.e. they have neighborhoods; this controls the handling of entry placetypes such as `neighborhood`, `district`, `area`, etc. # City-states should be treated as settlements for determining how to handle the placetype `former city-state` and for categorizing the bare category [[:Category:City-states]] and language-specific equivalents such as [[:Category:en:City-states]]. # When the entry placetype `city-state` occurs along with a continent holonym, categorize into the specified categories under `continent/*`. Here, `+++` stands for the holonym in question. # When the entry placetype `city-state` occurs in any other context, categorize into the specified categories under `default`. It's important to realize that the only categorization keys under a given placetype entry that are specified explicitly in [[Module:place/placetypes]] are certain wildcard keys such as `continent/*` above (i.e. containing a slash followed by `*`) and under the key `default`. All the remaining categorization happens through category handlers, based on the information on known locations in [[Module:place/locations]]. For example, [[Module:place/locations]] has an "England group" specified similarly to the following: ``` export.england_group = { default_container = {key = "England", placetype = "constituent country"}, default_placetype = "county", default_divs = { "districts", {type = "local government districts", cat_as = "districts"}, { type = "local government districts with borough status", cat_as = {"districts", "boroughs"}, }, {type = "boroughs", cat_as = {"districts", "boroughs"}}, "civil parishes", }, default_british_spelling = true, data = export.england_counties, } ``` The `default_divs` key here specifies the divisions that exist for each of the counties listed under the `data` key (unless the key overrides them). Here, the entry `{type = "boroughs", cat_as = {"districts", "boroughs"}}` directs the category handler `political_division_cat_handler` in [[Module:place/placetypes]] (which is one of two category handlers that run for all entry placetypes, along with `generic_place_cat_handler`) to categorize boroughs specified under any of the counties listed under `data` as both districts and boroughs. Now, the categorization process proceeds as follows, given an entry placetype and place description, which specifies a set of holonyms (the code to do this is in `get_placetype_cats()`): # First, look up the entry placetype and any equivalent placetypes in `placetype_data`, which is defined in [[Module:place/placetypes]]. Note that the entry in `placetype_data` that specifies the placetype information that is used to determine the category or categories may not directly correspond to the entry placetype as specified in the place description. For example, if the entry placetype is `small town`, the placetype whose data is fetched will be `town` since `small` is a recognized qualifier and there is no entry in `placetype_data` for `small town`. As another example, if the entry placetype is `administrative capital`, the code will first look up `administrative capital` and then look up `capital city`, which is where the category handler is found, because `administrative capital` specifies `capital city` as its fallback. # Then, iterate over holonyms from left to right, as described above. For each holonym, we proceed as follows: ## First, call `political_division_cat_handler` to check if the entry placetype and holonym match a division in the `locations` data in [[Module:place/locations]], as in the example above. Note that when doing this, holonyms are canonicalized so that e.g. `co/Bedfordshire` gets mapped to `county/Bedfordshire` (because there is an entry in `placetype_aliases` in [[Module:place/placetypes]] that maps `co` to `county`) and `c/USA` gets mapped to `country/United States` (because there is an entry in the location data for the list of countries that maps `country/USA` to `country/United States` for both display and categorization purposes). This category handler, as with all such handlers, is passed the entry placetype and holonym being processed, but is also passed the entire place description, so it can look at other specified holonyms (particularly those that follow). It either returns {nil} or a list of category specs (which are the actual categories minus the preceding language code). ## If `political_division_cat_handler` doesn't generate any categories, check if there is a category handler defined using the `cat_handler` key for the entry placetype. If so, call it to generate the categories (if any). ## If the category handler returns {nil}, or there is no category handler, look for a ''wildcard key'' of the format e.g. `country/*`, which matches any holonym of placetype `country`. If found, the value is a list of category specs, which are processed as above. ## If we get this far without generating any categories, move to the next holonym. ## If we do generate any categories, process all other holonyms of the same placetype. For example, if the user says {{tl|place|en|city|s/Kansas|and|s/Missouri}}, when we get to the holonym `s/Kansas`, we generate the category [[:Category:en:Cities in Kansas, USA]]. This causes us to look for other holonyms of the same placetype `state`, and process them accordingly, generating a category [[:Category:en:Cities in Missouri, USA]] as well. The same thing happens in an invocation like {{tl|place|pl|river|c/Poland,Ukraine,Belarus}}. # Once we generate categories for a holonym and any other holonyms of the same placetype, we normally stop processing holonyms. But if a holonym has the `:also` modifier, we restart the left-to-right loop at that holonym. For example, in the invocation {{tl|place|en|river|flowing through|p/Alberta|p/British Columbia|and the|terr/Northwest Territories}}, we will generate a category [[:Category:en:Rivers in Alberta, Canada]] as well as [[:Category:en:Rivers in British Columbia, Canada]] (because British Columbia is of the same placetype as Alberta); but no category will be generated for the Northwest Territories, which is of a different placetype. To fix this, write {{tl|place|en|river|flowing through|p/Alberta|p/British Columbia|and the|terr:also/Northwest Territories}}. The use of `:also` will cause holonym processing to resume at `Northwest Territories` after `Alberta` is processed, leading to an additional category [[:Category:en:Rivers in the Northwest Territories, Canada]]. (The presence of `the` in this last category is because `Northwest Territories` is a known location with a spec indicating that it should be preceded by `the`; it has nothing to do with the raw text `and the` in the invocation.) # Finally, if we process all holonyms and don't end up producing any categories, we check the entry placetype's data for a `default` key. If found, it lists category specs, which are processed to generate categories. This is used, for example, in the placetype `city-state`, as described above. # It should be noted that the above process runs independently for each combination of entry placetype and place description. Thus, for example, an invocation {{tl|place|en|city/and/county|s/Kansas,Missouri|c/USA}} will generate categories for both cities and counties in both Kansas and Missouri. # Two additional sources of categories are ''bare location'' categories and ''generic place'' categories. These categories are added by appropriate calls in the outer function `get_cats`, which iterates over placetypes and place descriptions, calling `get_placetype_cats` on each combination. ## Bare location categories are categories like [[:Category:Arizona, USA]] that are related-to categories containing terms related to the specified location. The bare location code, for example, adds the term [[Arizona]], and its equivalents in other languages, to [[:Category:Arizona, USA]]. When looking for terms to consider, it checks the pagename, the glosses specified using {{para|t}}, and the terms specified using {{para|modern}}, {{para|short}} and {{para|full}}. It looks to see if any of these parameters match any known locations, but only adds them to a bare location category if (a) the specified entry placetype matches, so that for example Russian `[[Джорджия]]` goes into [[:Category:Georgia, USA]] while `[[Грузия]]` goes into [[:Category:Georgia]] (the country), even though both have a gloss `Georgia`; and (b) there are no conflicting holonyms, so that for example the Old English term [[Munucceaster]] if defined similarly to {{tl|place|ang|city|in modern|cc/England|t=Newcastle}} won't get added to [[:Category:Newcastle, New South Wales]] (even though it is also a city) because the latter city is known to be in Australia, which conflicts with the country `United Kingdom` (added internally to the Old English place description through the holonym augmentation process, based on the holonym `cc/England`). ## Generic place categories are categories like [[:Category:Places in Kansas, USA]] and [[:Category:Places in England]] that contain places of arbitrary placetype. These are added through a special category handler that operates like other category handlers but is run for all placetypes, rather than only for the specified one(s). ]==] --[=[ TODO/FIXME: 1. [DONE] Neighborhoods should categorize at the city level. Categories like [[:Category:Places in Los Angeles]] exist but not [[:Category:Neighborhoods in Los Angeles]]; we can refactor the code in generic_cat_handler() to support this use case. 2. Display handlers should be smarter. For example, 'co/Travis' as a holonym should display as 'Travis County' in the United States, but (I think) display handlers don't currently have the full context of holonyms passed in to allow this to happen. 3. Connected to this, we have various display handlers that add the name of the holonym after or (sometimes) before the placename if it's not already there. An example is the county_display_handler() in [[Module:place/placetypes]], which adds "County" before Ireland and Northern Ireland counties and after Taiwan and Romania counties. This should be integrated into the polity group for these respective polities through a setting rather than requiring a separate handler that has special casing for various polities. 4. Placetypes for toponyms should also have display handlers rather than just fixed text. This should allow us to dispense with the need for special types for "fpref" = "French prefecture" (which displays as "prefecture" but links to the appropriate Wikipedia article on Frenc prefectures, which are completely different from the more general concept of prefecture). Similarly for "Polish colony" and "Welsh community". ("Israeli settlement" should probably stay as-is because it displays as "Israeli settlement" not just "settlement".) 5. [DONE] Currently, categories for e.g. states and territories of Australia go into [[:Category:States and territories of Australia]] but terms for states and territories of Australia go into (respectively) [[:Category:States of Australia]] and [[:Category:Territories of Australia]]. We should fix this; maybe this is as easy as setting cat_as in the respective divs definitions. 6. Probably cat_as should support raw categories as well as category types; raw categories would be indicated by being prefixed with "Category:". 7. [MOSTLY DONE] Update documentation. 8. [DONE] Rename remaining political division categories to include name of country in them. 9. [DONE] Add Pakistan provinces and territories. 10. [DONE] Add a polity group for continents and continent-level regions instead of special-casing. This should make it possible e.g. to have Jerusalem as a city under "Asia". 11. [DONE] Add better handling of cities that are their own states, like Mexico City. 12. [DONE] Breadcrumb for e.g. [[Category:Aguascalientes, Mexico]] is "Aguascalientes, Mexico" instead of just "Aguascalientes". 13. [DONE] Unify aliasing system; cities have a completely different mechanism (alias_of) vs. polities/subpolities (which use`placename_cat_aliases` and `placename_display_aliases` in [[Module:place/placetypes]]). 14. [DONE] More generally, cities should be unified into the polity grouping system to the extent possible; this would allow for divs of cities (see #17 below). 15. [DONE] We have `no_containing_polity_cat` set for Lebanon, Malta and Saudi Arabia to prevent country-level implications from being added due to generically-named divisions like "North Governorate", "Central Region" and "Eastern Province" but (a) this setting seems to do multiple things and should be split, (b) it should be possible to set this at the division level instead of the country level. 16. Split out the data from the handlers so we can use loadData() on the data because it's becoming very big. 17. [DONE] Cities like Tokyo have special wards; "prefecture-level cities" like Wuhan (which aren't really cities but we treat them as such) have districts, subdistricts, etc. We need to support divs for cities and even named divisions of cities (such as we already have for boroughs of New York City). 18. [DONE] It should be allowed to set 'true' to any qualifier (which links it) and have it work correctly; qualifier lookup in [[Module:place]] needs to remove links first. 19. [DONE] Categories 'Historical polities' and 'Historical political subdivisions' should be renamed 'Former ...' since "historic(al)" is ambiguous (cf. "historic counties" in England which are not former, but still have a legal definition). 20. [PARTLY DONE; SUPPORT IS THERE BUT FORMER PROVINCES NOT YET CATEGORIZED] It should be possible to categorize former subpolities of certain polities; cf. [[:Category:ja:Provinces of Japan]], which contains former provinces. 21. [DONE] In subpolity_keydesc(), we need to generate the correct indefinite article and have a huge hack to check specifically for "union territory", which is the only placetype that shows up in this function where the default indefinite article generating function fails. To fix this properly, we need to separate out the non-category placetype data from `cat_data` in [[Module:place/placetypes]] and move it to [[Module:place/locations]], because we don't have access to the data in [[Module:place/placetypes]], and that data indicates the correct article for placetypes like "union territory". 22. [DONE] Simplify the specs in `cat_data`, eliminating the distinction between "inner" and "outer" matching. There should not be two levels, just one. For example, in "district", instead of ["country/Portugal"] = { ["itself"] = {"Districts and autonomous regions of +++"}, } we should just have ["country/Portugal"] = {"Districts and autonomous regions of +++"}, And in "dependent territory", instead of ["default"] = { ["itself"] = {true}, ["country"] = {true}, }, we should just have ["itself"] = {true}, ["country/*"] = {true}, It appears the only remaining spec that can't be easily converted in this fashion is for "subdistrict": ["country/Indonesia"] = { ["municipality"] = {true}, }, This seems to be specifically for Jakarta and doesn't seem to work anyway, as the two entries in [[:Category:en:Subdistricts of Jakarta]] and the one entry in [[:Category:id:Subdistricts of Jakarta]] are manually categorized. 23. [DONE] Consolidate the remaining stuff in [[Module:category tree/topic cat/data/Earth]] into [[Module:category tree/topic cat/data/Places]]. 24. [DONE] The `generic_cat_handler` that categorizes into `Places in FOO` is smart enough not to categorize cities that are in different polities from the specified containing polity/polities of the city, but doesn't do the same for larger-level divisions. Likewise for the `city_type_cat_handler`. There are some sufficiently generically-named divisions that this issue can occur; for example, [[Koforidua]], the capital city of Eastern Region, Ghana, is incorrectly categorized under [[:Category:en:Cities in Eastern Region, Malta]] and [[:Category:en:Places in Eastern Region, Malta]]. Note that the function `augment_holonyms_with_container` ''DOES'' do such checks, so we should be able to refactor the code out of that function and use it elsewhere. 25. [DONE] The `generic_cat_handler` that categorizes into `Places in FOO` is smart enough not to categorize cities that are in different polities from the specified containing polity/polities of the city; but how smart is it? It will successfully avoid categorizing a neighborhood in e.g. [[Columbus]], [[Georgia]] that doesn't explicitly mention the US (only `s/Georgia`) into [[:Category:en:Places in Columbus]], which is for Columbus, Ohio, but will it do the same for a hypothetical neighborhood of Columbus in say Merseyside, England? This should be investigated. It will probably work for a hypothetical Columbus in [[Canada]] because `augment_holonyms_with_container` would auto-add Canada as an additional holonym once say `p/Ontario` is mentioned, but I think there's a setting preventing this augmentation from happening for the UK. (This relates to FIXME #15. `no_containing_polity_cat` is set on England, Scotland, etc. to prevent the toponyms from being added to [[:Category:en:Places in the United Kingdom]], but this same setting is used to prevent augmentation, which it should not be; there should be different settings.) 26. [DONE] The `generic_cat_handler` (or more specifically `find_holonym_keys_for_categorization`) checks for city holonyms by looking specifically for holonym type `city`. But some cities (particularly those in China) can be specified using different holonym types, e.g. `prefecture-level city`, `subprovincial city`, etc. We should allow these when appropriate (which means the cities in China need to have a `placetype` set that indicates their regional-level status as well as just `city`). I'm not sure if cities support specifying a custom `placetype` at the moment; this relates to FIXME #14 above concerning unifying cities and political divisions internally. 27. [DONE] The bare category handler (`get_bare_categories` in [[Module:place/placetypes]]) is not smart enough to avoid overcategorizing cities or other divisions that are of the right placetype but in the wrong containing polity. For example, Asturian [[Llión]] "León (city in Spain)" gets put in [[:Category:ast:León]] even though the latter is supposed to refer to a city in Mexico. We can borrow the check-containing-polity code from `generic_cat_handler`. 28. [DONE] Redo handling of singular and plural to respect overrides specified in placetype_data. Check more carefully for things that may not singularize correctly, e.g. 'passes' -> 'passe'? Definitely 'headquarters' and variants. 29. [DONE] Combine placetype_equivs and other placetype data into `placetype_data`. Figure out if we need the distinction between `placetype_equivs` and `fallback`. 30. `has_neighborhoods` may need to be a function that can look at the containing holonyms to determine whether the entity in question is city-like. 31. [DONE] Bare placenames as they appear in holonyms (e.g. `Riau Islands`) instead of category keys (e.g. `the Riau Islands, Indonesia`) should appear in the polity data tables. As a first pass, the word "the" should not appear but should instead be a property of the polity. 32. [DONE] `capital_city_cat_handler` should use `get_holonyms_to_check()`. 33. [PARTLY DONE] The code to generate and parse the correct preposition ("in" or "of") is very convoluted, and the actual preposition used is specified in various locations with various defaults, sometimes hardcoded. This should be simplified. It is made more difficult by the fact that the in/of distinction occurs in several places: (a) when generating the {{place}} text in old-style descriptions where the preposition isn't explicitly given, which uses the `preposition` setting in placetype_data, defaulting to "in"; (b) when generating categories based on explicit category specs in placetype_data (which are gradually being deprecated), which likewise uses the `preposition` setting in placetype_data, defaulting to "in"; (c) when generating categories based on political_division_cat_handler, originating in the `divs` placetypes for specific known locations in [[Module:place/locations]], which uses the `prep` setting embedded in the `divs` specifications, defaulting to "of"; (d) when generating categories based on category handlers specified using the `cat_handler` property of entries in placetype_data, which tend to hardcode "in" or "of" depending on the specific category handler; (e) when generating category descriptions in [[Module:category tree/topic/Places]] for `divs` categories generated in (c), which (correctly) uses the same `prep` setting embedded in the `divs` settings that is used when generating the categories themselves; (f) when generating category descriptions for categories generated in (b) and (d) above, which relies on the `generic_before_non_cities` and `generic_before_cities` settings in placetype_data, which need to match the corresponding prepositions hardcoded in the category generation handlers. Instead of the hardcoding, the category generation handler should respect the `generic_before_*` settings. 34. [[Krakow]] defined as {{place|en|A <<city>> on the [[Vistula]] River, the <<capital>> of the <<voi/Lesser Poland Voivodeship>> in southern <<c/Poland>>}} categorizes under [[:Category:Voivodeship capitals]] when it should probably instead be under [[:Category:Voivodeship capitals of Poland]]. Possibly this is because the various voivodeships haven't yet been entered as known locations, but this should happen regardless of that. 35. {{tcl}} bugs: a. [DONE] Lowercase initial letter in new-style {{place}} descriptions in {{tcl}}. Maybe we can have a setting tcl_nolc=1 to prevent this from happening. b. [DONE] tcl= and probably new-style {{place}} descriptions in general should recognize ;; to separate distinct {{place}} descriptions, and similarly ;;and as the equivalent of regular `;and`, etc. c. [DONE] The value supplied in `modern=` should be displayed in {{tcl}} descriptions regardless of the setting that normally disables this, so that e.g. the foreign-language equivalent of [[British Honduras]] doesn't just say it's a former British colony in Central America but specifically identifies it as modern Belize. If the user gives, place_modern= in {{tcl}}, that should override the modern= value and still display. d. [DONE] The page supplied to {{tcl}} should be used for generating bare categories even if t= is supplied and overrides the English term displayed. [DONE] e. [DONE] If text follows {{place}} and begins with a semicolon, the semicolon isn't copied into {{tcl}}. 36. County boroughs used as holonyms currently display 'borough county borough' because there's an affix setting for 'county borough' and a fallback display handler for 'borough'. We need to rethink this; maybe merge the affix setting and display handlers. 37. Implement known-location groups and specs in a more standardly object-oriented way using metatables. 38. Implement caching of known location lookup in the holonym. This may have to be keyed by placetype, but we can have a special field for when the lookup placetype is the same as the user-specified placetype of the holonym. Use this known location in place of looking up known locations and store the appropriate known location there in `augment_holonyms_with_container()` instead of calling `key_to_placename`. 39. Bug fixes with 'the': (a) [DONE] [[Kazaň]] defined as {{place|cs|caplc|rep:Pref/Tatarstan|c/Russia|t1=Kazan}} displays as "Republic of the Tatarstan". (b) [[Valday]] defined as {{place|en|town/administrative center|dist:Suf/Valdaysky|obl/Novgorod|c/Russia}} displays as "a town, the administrative center of the Valdaysky District". Changing to `dist:suf/Valdaysky` displays as "... of Valdaysky district". 40. [DONE] Bug fix with 'the': [[Verkhoyansk]] defined as {{place|en|town|rep/Sakha|c/Russia}} displays as "a town in the Sakha". 41. [DONE] [[Category:Cities in Asia]] has [[Category:Cities in Eurasia]] as a parent, which in turn has [[Category:Cities in the Earth]] as a parent. Continents should not have the second parent like this. 42. [DONE] When checking `british_spelling`, it should check all containers as well; otherwise it's too hard to keep this in sync across cities, administrative divisions and countries. 43. [DONE] `skip_polity_parent_type` should be renamed to container_parent_type or similar. 44. There should be a flag to allow e.g. departments of France that are currently categorized as departments of their region to also be categorized as departments of France. 45. [DONE] Aliases are causing iterate_matching_holonym_location() to fail, e.g. if [[براق]] "Prague" is specified as {{place|acw|capital city|c/Czechia|t1=Prague}}, this fails add a bare category [[Category:acw:Prague]] because the code in iterate_matching_holonym_location() isn't resolving aliases when comparing the known container 'Czech Republic'. Probably we want to build an alias table to speed up these sorts of lookups. 46. [DONE; DUE TO TYPO IN HANDLER] The district cat handler is failing to work right, e.g. in [[Saint-Gaudérique]] defined as {{place|fr|district|city/Perpignan|in|dept/Pyrénées-Orientales|r/Occitania|c/France|t=Saint-Gaudérique}}, only the 'Places in ...' categories are getting triggered. 47. Suburbs of a given city aren't generally in the city and may not even be in the same country or country division, so they should not categorize as "Places in ..." based on the city and specified country and division. Same goes for "enclave" (within somewhere) and "exclave". 48. When converting display aliases, we should automatically convert full placenames to full placenames and elliptical placenames to elliptical placenames instead of always either doing elliptical or full placenames depending on the value of `display_as_full`. 49. `@obsolete form of` and `@archaic form of` should automatically trigger nocat=1. 50. The handler that adds bare categories should pick up values in <eq:...>. ]=] --[==[ var: List specifying the allowed form-of directives, used for former names, official names, abbreviations, etc. of places. The key is the form-of directive and the value is an object with the following properties: * `text`: The actual text displayed before the terms. If the value is `+`, the key is used as the text. If the value is a function, it is passed a single argument, the overall place spec (see comment at top of file) and should return the text to be displayed. * `type_prefix`: The prefix used to generate the placetype for looking up the appropriate category or categories in the placetype data structure. Can be omitted if there are no categories associated with the directive. * `conjunction`: The conjunction used to join multiple terms, defaulting to `and`. * `cat`: Additional category or categories to add the term to, whenever this particular directive is used. Normally the value is a topic-style category minus the langcode prefix, but if prefixed with `cln:`, it is a langname-style category. For example, the value `"Abbreviations"` would correspond to a category [[:Category:en:Abbreviations]] (assuming the language of the {{tl|place}} call is English), while the value `"cln:abbreviations"` corresponds to a category [[:Category:English abbreviations]]. Use a list of such specs for multiple categories. * `default_foreign`: If specified, the default language of terms given along with this directive is the language in {{para|1}}; otherwise it is English. ]==] export.all_form_of_directives = { ["former name of"] = {text = "+", type_prefix = "FORMER_NAME_OF"}, ["fmr of"] = {alias_of = "former name of"}, ["ancient name of"] = {text = "+", type_prefix = "FORMER_NAME_OF"}, ["official name of"] = {text = "+", type_prefix = "OFFICIAL_NAME_OF"}, ["former official name of"] = {text = "+", type_prefix = "FORMER_OFFICIAL_NAME_OF"}, ["long form of"] = {text = "+", type_prefix = "LONG_FORM_OF"}, ["former long form of"] = {text = "+", type_prefix = "FORMER_LONG_FORM_OF"}, ["nickname for"] = {text = "+", type_prefix = "NICKNAME_FOR"}, ["official nickname for"] = {text = "+", type_prefix = "OFFICIAL_NICKNAME_FOR"}, ["former nickname for"] = {text = "+", type_prefix = "FORMER_NICKNAME_FOR"}, ["derogatory name for"] = {text = "[[Appendix:Glossary#derogatory|derogatory]] name for", type_prefix = "DEROGATORY_NAME_FOR"}, ["synonym of"] = {text = "+"}, ["syn of"] = {alias_of = "synonym of"}, ["abbreviation of"] = {text = "[[Appendix:Glossary#abbreviation|abbreviation]] of", type_prefix = "ABBREVIATION_OF", cat = "cln:abbreviations", default_foreign = true}, ["abbr of"] = {alias_of = "abbreviation of"}, ["abbrev of"] = {alias_of = "abbreviation of"}, ["initialism of"] = {text = "[[Appendix:Glossary#initialism|initialism]] of", type_prefix = "ABBREVIATION_OF", cat = "cln:initialisms", default_foreign = true}, ["init of"] = {alias_of = "initialism of"}, ["acronym of"] = {text = "[[Appendix:Glossary#acronym|acronym]] of", type_prefix = "ABBREVIATION_OF", cat = "cln:acronyms", default_foreign = true}, ["syllabic abbreviation of"] = {text = "[[Appendix:Glossary#syllabic abbreviation|syllabic abbreviation]] of", type_prefix = "ABBREVIATION_OF", cat = "cln:syllabic abbreviations", default_foreign = true}, ["sylabbr of"] = {alias_of = "syllabic abbreviation of"}, ["sylabbrev of"] = {alias_of = "syllabic abbreviation of"}, ["ellipsis of"] = {text = "[[Appendix:Glossary#ellipsis|ellipsis]] of", type_prefix = "ELLIPSIS_OF", cat = "cln:ellipses", default_foreign = true}, ["ellip of"] = {alias_of = "ellipsis of"}, ["clipping of"] = {text = "[[Appendix:Glossary#clipping|clipping]] of", type_prefix = "CLIPPING_OF", cat = "cln:clippings", default_foreign = true}, ["clip of"] = {alias_of = "clipping of"}, ["alternative form of"] = {text = "+", default_foreign = true}, ["alt form"] = {alias_of = "alternative form of"}, ["alternative spelling of"] = {text = "+", default_foreign = true}, ["alt spell"] = {alias_of = "alternative spelling of"}, ["alt sp"] = {alias_of = "alternative spelling of"}, ["dated form of"] = {text = "[[Appendix:Glossary#dated|dated]] form of", type_prefix = "DATED_FORM_OF", cat = "cln:dated forms", default_foreign = true}, ["dated form"] = {alias_of = "dated form of"}, ["dated spelling of"] = {text = "[[Appendix:Glossary#dated|dated]] spelling of", type_prefix = "DATED_FORM_OF", cat = "cln:dated forms", default_foreign = true}, ["dated spell"] = {alias_of = "dated spelling of"}, ["dated sp"] = {alias_of = "dated spelling of"}, ["archaic form of"] = {text = "[[Appendix:Glossary#archaic|archaic]] form of", type_prefix = "ARCHAIC_FORM_OF", cat = "cln:archaic forms", default_foreign = true}, ["arch form"] = {alias_of = "archaic form of"}, ["archaic spelling of"] = {text = "[[Appendix:Glossary#archaic|archaic]] spelling of", type_prefix = "ARCHAIC_FORM_OF", cat = "cln:archaic forms", default_foreign = true}, ["arch spell"] = {alias_of = "archaic spelling of"}, ["arch sp"] = {alias_of = "archaic spelling of"}, ["obsolete form of"] = {text = "[[Appendix:Glossary#obsolete|obsolete]] form of", type_prefix = "OBSOLETE_FORM_OF", cat = "cln:obsolete forms", default_foreign = true}, ["obs form"] = {alias_of = "obsolete form of"}, ["obsolete spelling of"] = {text = "[[Appendix:Glossary#obsolete|obsolete]] spelling of", type_prefix = "OBSOLETE_FORM_OF", cat = "cln:obsolete forms", default_foreign = true}, ["obs spell"] = {alias_of = "obsolete spelling of"}, ["obs sp"] = {alias_of = "obsolete spelling of"}, } local function get_seat_text(overall_place_spec) local placetype = overall_place_spec.descs[1].placetypes[1] if placetype == "county" or placetype == "counties" then return "county seat" elseif placetype == "parish" or placetype == "parishes" then return "parish seat" elseif placetype == "borough" or placetype == "boroughs" then return "borough seat" else return "seat" end end --[==[ var: List specifying the allowed arguments containing extra information that is sometimes added to a definition, such as the capital, largest city, modern name, official name, etc., along with associated properties; displayed in the order given. Each element is an object with the following properties: * `arg`: The argument name. * `text`: The actual text displayed before the terms. If the value is `+`, the argument name is used as the text. If the value is a function, it is passed a single argument, the overall place spec (see the comment at the top of the file) and should return the text to be displayed. * `conjunction`: The conjunction used to join multiple terms, defaulting to `and`. * `display_even_when_dropped`: Display this piece of extra info even when it would normally be dropped (e.g. in {{tl|tcl}} when the language is other than English). * `match_sentence_style`: If true, the text will be capitalized and preceded by a period when ''sentence style'' is in effect (essentially, when the language is English and there is no translation specified using {{para|t}} or similar parameter); otherwise, the text will be displayed as-is and preceded by a semicolon. If false, the semicolon style will always be used. * `auto_plural`: If true, pluralize the text when there is more than one term. * `with_colon`: If true, follow the text with a colon. (This colon cannot easily be included in the text itself because if pluralized, the pluralized text goes before the colon.) ]==] export.extra_info_args = { {arg = "modern", text = "+", conjunction = "or", display_even_when_dropped = true}, {arg = "now", text = "now,", conjunction = "or", display_even_when_dropped = true}, {arg = "full", text = "in full,", conjunction = "or", display_even_when_dropped = true}, {arg = "short", text = "short form", conjunction = "or"}, {arg = "abbr", text = "abbreviation", conjunction = "or"}, {arg = "former", text = "formerly,"}, {arg = "official", text = "official name", match_sentence_style = true, auto_plural = true, with_colon = true}, {arg = "capital", text = "+", match_sentence_style = true, auto_plural = true, with_colon = true}, {arg = "largest city", text = "+", match_sentence_style = true, auto_plural = true, with_colon = true}, {arg = "caplc", text = "capital and largest city", match_sentence_style = true, auto_plural = false, with_colon = true}, {arg = "seat", text = get_seat_text, match_sentence_style = true, auto_plural = true, with_colon = true}, {arg = "shire town", text = "+", match_sentence_style = true, auto_plural = true, with_colon = true}, {arg = "headquarters", text = "+", match_sentence_style = true, auto_plural = false, with_colon = true}, {arg = "center", text = "administrative center", match_sentence_style = true, auto_plural = false, with_colon = true}, {arg = "centre", text = "administrative centre", match_sentence_style = true, auto_plural = false, with_colon = true}, } export.extra_info_arg_map = {} for _, spec in ipairs(export.extra_info_args) do export.extra_info_arg_map[spec.arg] = spec end ----------- Wikicode utility functions -- Return a wikilink link {{l|language|text}} local function link(text, langcode, id) if not langcode then return text end return m_links.full_link( {term = text, lang = require(languages_module).getByCode(langcode, true, "allow etym"), id = id}, nil, "allow self link" ) end ---------- Basic utility functions -- Add the page to a tracking "category". To see the pages in the "category", -- go to [[Wiktionary:Tracking/place/PAGE]] and click on "What links here". local function track(page) require(debug_track_module)("place/" .. page) return true end local function ucfirst_all(text) if text:find(" ") then local parts = split(text, " ", true) for i, part in ipairs(parts) do parts[i] = m_strutils.ucfirst(part) end return concat(parts, " ") else return m_strutils.ucfirst(text) end end local function lc(text) return mw.getContentLanguage():lc(text) end ---------- Argument parsing functions and utilities -- Split an argument on comma, but not comma followed by whitespace. local function split_on_comma(val) if val:find(",") then return require(parse_interface_module).split_on_comma(val) else return {val} end end -- Split an argument on slash, but not slash occurring inside of HTML tags like </span> or <br />. local function split_on_slash(arg) if arg:find("<") then local m_parse_utilities = require(parse_utilities_module) -- We implement this by parsing balanced segment runs involving <...>, and splitting on slash in the remainder. -- The result is a list of lists, so we have to rejoin the inner lists by concatenating. local segments = m_parse_utilities.parse_balanced_segment_run(arg, "<", ">") local slash_separated_groups = m_parse_utilities.split_alternating_runs(segments, "/") for i, group in ipairs(slash_separated_groups) do slash_separated_groups[i] = concat(group) end return slash_separated_groups else return split(arg, "/", true) end end -- Implement "implications", i.e. where the presence of a given holonym causes additional holonym(s) to be added. -- Implications apply only to categorization. There used to be support for "general implications" that applied to both -- display and categorization, but there ended up not being any such implications, so we've removed the support. It is -- a bad idea in any case to have such implications; the user might purposely leave out a higher-level polity to avoid -- redundancy in several successive definitions, and we wouldn't want to override that. Note that in practice the -- mechanism implemented by this function is used specifically for non-administrative geographic regions such as -- Eastern Europe and the West Bank; there is a similar mechanism for administrative regions handled by -- `augment_holonyms_with_containing_polity` in [[Module:place/placetypes]]. -- -- `place_descriptions` is a list of place descriptions (see top of file, collectively describing the data passed to -- {{place}}). `implication_data` is the data used to implement the implications, i.e. a table indexed by holonym -- placetype, each value of which is a table indexed by holonym placename, each value of which is a list of -- "PLACETYPE/PLACENAME" holonyms to be added to the end of the list of holonyms. local function handle_category_implications(place_descriptions, implication_data) for i, desc in ipairs(place_descriptions) do if desc.holonyms then local new_holonyms = {} for _, holonym in ipairs(desc.holonyms) do insert(new_holonyms, holonym) local imp_data = m_placetypes.get_equiv_placetype_prop(holonym.placetype, function(pt) local implication = implication_data[pt] and implication_data[pt][holonym.unlinked_placename] if implication then return implication end end) if imp_data then for _, holonym_to_add in ipairs(imp_data) do local split_holonym = split_on_slash(holonym_to_add) if #split_holonym ~= 2 then internal_error("Invalid holonym in implications: %s", holonym_to_add) end local holonym_placetype, holonym_placename = unpack(split_holonym, 1, 2) local new_holonym = { -- By the time we run, the display has already been generated so we don't need to set -- display_placename. placetype = holonym_placetype, unlinked_placename = holonym_placename } insert(new_holonyms, new_holonym) m_placetypes.key_holonym_into_place_desc(desc, new_holonym) end end end desc.holonyms = new_holonyms end end end -- Split a holonym (e.g. "continent/Europe" or "country/en:Italy" or "in southern" or "r:suf/O'Higgins" or -- "c/Austria,Germany,Czech Republic") into its components. Return a list of holonym objects (see top of file). Note -- that if there isn't a slash in the holonym (e.g. "in southern"), the `placetype` field of the holonym will be nil. -- Placetype aliases (e.g. "r" for "region") and placename aliases (e.g. "US" or "USA" for "United States") will be -- expanded. local function split_holonym(raw) local no_display, combined_holonym = raw:match("^(!)(.*)$") no_display = not not no_display combined_holonym = combined_holonym or raw local suppress_comma, combined_holonym_without_comma = combined_holonym:match("^(%*)(.*)$") suppress_comma = not not suppress_comma combined_holonym = combined_holonym_without_comma or combined_holonym local holonym_parts = split_on_slash(combined_holonym) if #holonym_parts == 1 then -- `unlinked_placename` should not be used. return {{display_placename = combined_holonym, no_display = no_display, suppress_comma = suppress_comma}} end -- Rejoin further slashes in case of slash in holonym placename, e.g. Admaston/Bromley. local placetype = holonym_parts[1] local placename = concat(holonym_parts, "/", 2) -- Check for modifiers after the holonym placetype. local split_holonym_placetype = split(placetype, ":", true) placetype = split_holonym_placetype[1] local affix_type local saw_also local saw_the for i = 2, #split_holonym_placetype do local modifier = split_holonym_placetype[i] if modifier == "also" then if saw_also then error(("Modifier ':also' occurs twice in holonym '%s'"):format(combined_holonym)) end saw_also = true elseif modifier == "the" then if saw_the then error(("Modifier ':the' occurs twice in holonym '%s'"):format(combined_holonym)) end saw_the = true elseif modifier == "pref" or modifier == "Pref" or modifier == "suf" or modifier == "Suf" or modifier == "noaff" then if affix_type then error(("Affix-type modifier ':%s' occurs twice in holonym '%s'"):format(modifier, combined_holonym)) end affix_type = modifier else error(("Unrecognized holonym placetype modifier '%s', should be one of " .. "'pref', 'Pref', 'suf', 'Suf', 'noaff', 'also' or 'the'"):format(modifier)) end end placetype = m_placetypes.resolve_placetype_aliases(placetype) local holonyms = split_on_comma(placename) local pluralize_affix = #holonyms > 1 local affix_holonym_index = (affix_type == "pref" or affix_type == "Pref") and 1 or affix_type == "noaff" and 0 or #holonyms for i, placename in ipairs(holonyms) do -- Check for langcode before the holonym placename, but don't get tripped up by Wikipedia links, which begin -- "[[w:...]]" or "[[wikipedia:]]". local langcode, placename_without_langcode = rmatch(placename, "^([^%[%]]-):(.*)$") if langcode then placename = placename_without_langcode end placename = m_placetypes.resolve_placename_display_aliases(placetype, placename) holonyms[i] = { placetype = placetype, display_placename = placename, unlinked_placename = m_placetypes.remove_links_and_html(placename), langcode = langcode, affix_type = i == affix_holonym_index and affix_type or nil, pluralize_affix = i == affix_holonym_index and pluralize_affix, suppress_affix = i ~= affix_holonym_index, no_display = no_display, suppress_comma = suppress_comma, continue_cat_loop = saw_also, force_the = i == 1 and saw_the, } end return holonyms end local get_param_mods = memoize(function() local m_param_utils = require(parameter_utilities_module) return m_param_utils.construct_param_mods { {group = {"link", "q", "l", "ref"}}, {param = "eq"}, -- FIXME: Finish [[Module:format utilities]]. --{param = "conj", set = require(format_utilities_module).allowed_conjs_for_join_segments, overall = true}, {param = "conj", set = {["and"] = true, ["or"] = true, ["and/or"] = true}, overall = true}, } end) local function parse_term_with_inline_modifiers(term, paramname, default_lang) -- FIXME: Finish changes to [[Module:parameter utilities]] and [[Module:parse utilities]] that support continuations -- and new-format generate_obj(). --local function generate_obj(data) -- local m_param_utils = require(parameter_utilities_module) -- data.parse_lang_prefix = true -- data.special_continuations = m_param_utils.default_special_continuations -- data.default_lang = default_lang -- return m_param_utils.generate_obj_maybe_parsing_lang_prefix(data) --end local function generate_obj(raw_term, parse_err) local obj = require(parameter_utilities_module).generate_obj_maybe_parsing_lang_prefix { term = raw_term, parse_err = parse_err, parse_lang_prefix = true, } obj.lang = obj.lang or default_lang return obj end return require(parse_interface_module).parse_inline_modifiers(term, { paramname = paramname, param_mods = get_param_mods(), generate_obj = generate_obj, -- FIXME: See above. --generate_obj_new_format = true, splitchar = ",", outer_container = {}, }) end local function parse_form_of_directive(arg, lang, form_of_overridden_args) local form_of_directive, raw_terms = arg:match("^@([a-z -]+):(.*)$") if not form_of_directive then error("Misformatted @-directive: " .. dump(arg)) end if not export.all_form_of_directives[form_of_directive] then local known_directives = {} for k, _ in pairs(export.all_form_of_directives) do insert(known_directives, '"' .. k .. '"') end table.sort(known_directives) error(("Unrecognized form-of directive %s in @-directive %s; recognized directives are %s"):format( dump(form_of_directive), dump(arg), concat(known_directives, ", "))) end local spec = export.all_form_of_directives[form_of_directive] local canonical_directive = form_of_directive if spec.alias_of then canonical_directive = spec.alias_of spec = export.all_form_of_directives[canonical_directive] if not spec then internal_error("Form-of directive alias %s points to %s, which is not a directive", "@" .. form_of_directive, canonical_directive) elseif spec.alias_of then internal_error("Form-of directive alias %s points to %s, which is also an alias", "@" .. form_of_directive, canonical_directive) end end local default_foreign = spec.default_foreign local directive_param = "@" .. form_of_directive if form_of_overridden_args and form_of_overridden_args[canonical_directive] then raw_terms = form_of_overridden_args[canonical_directive].new_value local new_directive = form_of_overridden_args[canonical_directive].new_directive local new_spec = export.all_form_of_directives[new_directive] if not new_spec then error(("Internal error: [[Module:transclude]] passed in unrecognized replacement directive '@%s'"): format(new_directive)) end if new_spec.alias_of then error(("Internal error: [[Module:transclude]] passed in replacement directive alias '@%s', " .. "should be canonical"):format(new_directive)) end if new_directive ~= canonical_directive then directive_param = directive_param .. (" (replaced with @%s)"):format(new_directive) canonical_directive = new_directive spec = new_spec end default_foreign = true end local terms = parse_term_with_inline_modifiers(raw_terms, directive_param, default_foreign and lang or enlang) return { directive = canonical_directive, terms = terms.terms, conj = terms.conj, spec = spec, } end -- Parse an argument containing extra information that is sometimes added to a definition, such as the capital, largest -- city, modern name, official name, etc. `args` is the value from the parsed argument structure and can be either nil, -- a string or a list (depending on whether it was declared as a single parameter or a list). `spec` is the extra info -- spec corresponding to the type of extra info. Each value in `args` can be a comma-separated list of terms with inline -- modifiers attached. [FIXME: we should switch to always using the comma-separated format and disallow list parameters -- such as |capital=, |capital2=, etc.] The return value is a structure containing fields `terms` (a list of term -- objects, each of which is in the format expected by full_link() in [[Module:links]]), `conj` (an explicit -- conjunction to join multiple terms, or nil if no explicit conjunction was given) and `spec` (the passed-in spec). local function parse_extra_info_arg(args, spec, default_lang) if not args then return nil end if type(args) ~= "table" then args = {args} end if not args[1] then return nil end local terms = nil local conj for i, arg in ipairs(args) do local this_terms = parse_term_with_inline_modifiers(arg, spec.arg .. (i == 1 and "" or i), default_lang) local thisconj = this_terms.conj if not conj then conj = thisconj elseif thisconj and conj ~= thisconj then error(("Two different conjunctions '%s' and '%s' specified for |%s=; you only need to specify the " .. "conjunction once"):format(conj, thisconj)) end if not terms then terms = this_terms.terms else m_table.extend(terms, this_terms.terms) end end return { spec = spec, terms = terms, conj = conj, } end --[==[ Parse a "new-style" place description, with placetypes and holonyms surrounded by `<<...>>` amid otherwise raw text. Return value is a place description object as documented at the top of the file. Exported for use by [[Module:demonyms]]. ]==] function export.parse_new_style_place_desc(text, lang, form_of_directives, form_of_overridden_args) local placetypes = {} local segments = split(text, "<<(.-)>>") local retval = {holonyms = {}, order = {}} local form_of_directives_already_present = form_of_directives and not not form_of_directives[1] for i, segment in ipairs(segments) do if i % 2 == 1 then insert(retval.order, {type = "raw", value = segment}) elseif segment:find("@") then if not form_of_directives then error(("Form-of directive '%s' not allowed in this context"):format(segment)) elseif form_of_directives_already_present then error(("Saw form-of directive '%s' in new-style place desc followed by direct (separate-parameter) form-of directives; not allowed"):format( segment)) elseif placetypes[1] or retval.holonyms[1] then error(("Form-of directive '%s' must come first, before placetypes and holonyms"):format(segment)) else local form_of_directive = parse_form_of_directive(segment, lang, form_of_overridden_args) if not retval.order[1] or retval.order[1].type ~= "raw" or retval.order[2] then internal_error("`retval.order` should have a single raw element: %s", retval.order) end form_of_directive.pretext = retval.order[1].value retval.order[1] = nil insert(form_of_directives, form_of_directive) end elseif segment:find("/") then local holonyms = split_holonym(segment) for j, holonym in ipairs(holonyms) do if j > 1 then if not holonym.no_display then if j == #holonyms then insert(retval.order, {type = "raw", value = " and "}) else insert(retval.order, {type = "raw", value = ", "}) end end -- All but the first in a multi-holonym need an article. For the first one, the article is -- specified in the raw text if needed. (Currently, needs_article is only used when displaying the -- holonym, so it wouldn't matter when no_display is set, but we set it anyway in case we need it -- for something else.) holonym.needs_article = true end insert(retval.holonyms, holonym) if not holonym.no_display then insert(retval.order, {type = "holonym", value = #retval.holonyms}) end m_placetypes.key_holonym_into_place_desc(retval, holonym) end else local treat_as, display = segment:match("^(..-):(.+)$") if treat_as then segment = treat_as else display = segment end -- see if the placetype segment is just qualifiers local only_qualifiers = true local split_segments = split(segment, " ", true) for _, split_segment in ipairs(split_segments) do if m_placetypes.placetype_qualifiers[split_segment] == nil then only_qualifiers = false break end end insert(placetypes, {placetype = segment, only_qualifiers = only_qualifiers}) if only_qualifiers then insert(retval.order, {type = "qualifier", value = display}) else insert(retval.order, {type = "placetype", value = display}) end end end if not form_of_directives_already_present and form_of_directives and form_of_directives[1] then form_of_directives[#form_of_directives].posttext = "" end local final_placetypes = {} for i, placetype in ipairs(placetypes) do if i > 1 and placetypes[i - 1].only_qualifiers then final_placetypes[#final_placetypes] = final_placetypes[#final_placetypes] .. " " .. placetypes[i].placetype else insert(final_placetypes, placetypes[i].placetype) end end retval.placetypes = final_placetypes return retval end --[==[ Parse one or more "new-style" place descriptions, with placetypes and holonyms surrounded by `<<...>>` amid otherwise raw text. Multiple descriptions are separated by two semicolons in a row. Return value is a list of place description objects as documented at the top of the file. ]==] local function parse_conjoined_new_style_place_desc(text, lang, form_of_directives, form_of_overridden_args) local separate_specs = split(text, ";(;[^ ]*)") local descs = {} for i = 1, #separate_specs do if i % 2 == 1 then insert(descs, export.parse_new_style_place_desc(separate_specs[i], lang, form_of_directives, form_of_overridden_args)) form_of_directives = nil else descs[#descs].separator = separate_specs[i] end end return descs end --[=[ Process numeric and "extra info" arguments into an overall place spec, as described at the top of the file. `data` is an object with the following fields: * `args`: The parsed arguments of {{tl|place}}. * `from_tcl`: True if we're being invoked from {{tl|tcl}}. * `extra_info_overridden_set`, `form_of_overridden_args`: Same as the corresponding fields in the `data` object passed to `export.format`. ]=] local function parse_overall_place_spec(data) local args, from_tcl, extra_info_overridden_set, form_of_overridden_args = data.args, data.from_tcl, data.extra_info_overridden_set, data.form_of_overridden_args local descs = {} local this_desc -- Index of separate (semicolon-separated) place descriptions within `descs`. local desc_index = 1 -- Index of separate holonyms within a place description. 0 means we've seen no holonyms and have yet to process -- the placetypes that precede the holonyms. 1 means we've seen no holonyms but have already processed the -- placetypes. local holonym_index = 0 local in_place_desc = false local form_of_directives = {} local function set_desc_joiner(desc, separator) if separator == ";" then this_desc.joiner = "; " this_desc.include_following_article = true elseif separator == ";;" then this_desc.joiner = " " else local joiner = separator:sub(2) if rfind(joiner, "^%a") then this_desc.joiner = " " .. joiner .. " " else this_desc.joiner = joiner .. " " end end end for _, arg in ipairs(args[2]) do if arg:find("^@") then if not (desc_index == 1 and holonym_index == 0) then error("@-directives cannot follow place descriptions") end local form_of_directive = parse_form_of_directive(arg, args[1], form_of_overridden_args) if form_of_directives[1] then form_of_directive.pretext = ", " else form_of_directive.pretext = "" end insert(form_of_directives, form_of_directive) elseif arg == ";" or arg:find("^;[^ ]") then if not this_desc then error("Saw semicolon joiner without preceding place description") end set_desc_joiner(this_desc, arg) desc_index = desc_index + 1 holonym_index = 0 in_place_desc = false else if arg:find("<<") then if in_place_desc then error("New-style place description must come first or following a separator (semicolon or similar), not directly following another description") end in_place_desc = true local this_descs = parse_conjoined_new_style_place_desc(arg, args[1], form_of_directives, form_of_overridden_args) for j, desc in ipairs(this_descs) do this_desc = desc if holonym_index > 0 then desc_index = desc_index + 1 holonym_index = 0 end if j < #this_descs then set_desc_joiner(this_desc, this_desc.separator) end descs[desc_index] = this_desc last_was_new_style = true holonym_index = #this_desc.holonyms + 1 end else -- Old-style arguments can directly follow a new-style argument; they become additional holonyms -- tacked onto the end of the holonym list, and are displayed old-style except that there is no -- prefix before the first one following the new-style argument. in_place_desc = true if holonym_index == 0 then local entry_placetypes = split_on_slash(arg) this_desc = {placetypes = entry_placetypes, holonyms = {}} descs[desc_index] = this_desc holonym_index = holonym_index + 1 else local holonyms = split_holonym(arg) for j, holonym in ipairs(holonyms) do if j > 1 then -- All but the first in a multi-holonym need an article. Not for the first one because e.g. -- {{place|en|city|s/Arizona|c/United States}} should not display as "a city in Arizona, the -- United States". The overall first holonym in the place description gets an article if -- needed regardless of our setting here. holonym.needs_article = true -- Insert "and" before the last holonym. if j == #holonyms then this_desc.holonyms[holonym_index] = { -- Use the no_display value from the first holonym; it should be the same for all -- holonyms. `unlinked_placename` should not be used. display_placename = "and", no_display = holonyms[1].no_display } holonym_index = holonym_index + 1 end end this_desc.holonyms[holonym_index] = holonym m_placetypes.key_holonym_into_place_desc(this_desc, this_desc.holonyms[holonym_index]) holonym_index = holonym_index + 1 end end end end end if form_of_directives[1] and not form_of_directives[#form_of_directives].posttext then form_of_directives[#form_of_directives].posttext = (args.def and args.def ~= "-" or not args.def and descs[1]) and ": " or "" end -- Tracking code. This does nothing but add tracking for seen placetypes and qualifiers. The place will be linked to -- [[Wiktionary:Tracking/place/entry-placetype/PLACETYPE]] for all entry placetypes seen; in addition, if PLACETYPE -- has qualifiers (e.g. 'small city'), there will be links for the bare placetype minus qualifiers and separately -- for the qualifiers themselves: -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/BARE_PLACETYPE]] -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-qualifier/QUALIFIER]] -- Note that if there are multiple qualifiers, there will be links for each possible split. For example, for -- 'small maritime city'), there will be the following links: -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/small maritime city]] -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/maritime city]] -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/city]] -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-qualifier/small]] -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-qualifier/maritime]] -- Finally, there are also links for holonym placetypes, e.g. if the holonym 'c/Italy' occurs, there will be the -- following link: -- [[Special:WhatLinksHere/Wiktionary:Tracking/place/holonym-placetype/country]] for _, desc in ipairs(descs) do for _, entry_placetype in ipairs(desc.placetypes) do local splits = m_placetypes.split_qualifiers_from_placetype(entry_placetype, "no canon qualifiers") for _, split in ipairs(splits) do local prev_qualifier, this_qualifier, bare_placetype = unpack(split, 1, 3) track("entry-placetype/" .. bare_placetype) if this_qualifier then track("entry-qualifier/" .. this_qualifier) end end end for _, holonym in ipairs(desc.holonyms) do if holonym.placetype then track("holonym-placetype/" .. holonym.placetype) end end end local extra_info = {} for _, extra_info_spec in ipairs(export.extra_info_args) do local extra_info_terms = parse_extra_info_arg(args[extra_info_spec.arg], extra_info_spec, -- If called from {{tcl}} and extra info argument was set by {{tcl}}, interpret the argument -- according to the language in 1=; otherwise interpret as English. To override this, prefix -- with the appropriate language. from_tcl and extra_info_overridden_set and extra_info_overridden_set[extra_info_spec.arg] and args[1] or enlang) if extra_info_terms then insert(extra_info, extra_info_terms) end end return { lang = args[1], args = args, directives = form_of_directives, descs = descs, extra_info = extra_info, } end -------- Definition-generating functions -- Return a string with the wikilinks to the English translations of the word. local function get_translations(transl, ids) local ret = {} for i, t in ipairs(transl) do local arg_transls = split_on_comma(t) local arg_ids = ids[i] if arg_ids then arg_ids = split_on_comma(arg_ids) if #arg_transls ~= #arg_ids then error(("Saw %s translation%s in t%s=%s but %s ID%s in tid%s=%s"):format( #arg_transls, #arg_transls > 1 and "s" or "", i == 1 and "" or i, t, #arg_ids, #arg_ids > 1 and "'s" or "", i == 1 and "" or i, ids[i])) end end for j, arg_transl in ipairs(arg_transls) do insert(ret, link(arg_transl, "en", arg_ids and arg_ids[j] or nil)) end end return concat(ret, ", ") end -- Return the article (currently always `"the"`) to be prepended to the given placename, or nil. `decorated_placename` -- is the placename as specified by the user along with any affix added to it. `placename` is the raw unlinked -- placename, defaulting to the unlinked version of `decorated_placename` if not given. `placetypes` is a placetype or -- list of placetypes for the placename. `suppress_holonym_use_the_check` suppresses checking the placetypes for -- `holonym_use_the`. local function get_placename_article(decorated_placename, placetypes, placename, suppress_holonym_use_the_check) local unlinked_decorated_placename = m_placetypes.remove_links_and_html(decorated_placename) if unlinked_decorated_placename:find("^the ") then return nil end placename = placename or unlinked_decorated_placename if type(placetypes) == "string" then placetypes = {placetypes} end for _, placetype in ipairs(placetypes) do local art = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) local art = m_placetypes.placename_article[pt] and m_placetypes.placename_article[pt][placename] if art then return art end end) if art then return art end end -- Get equivalent placetypes of the specified placetype so that e.g. -- {{place|en|@official name of:Bahamas|island country|r/Caribbean}} put 'the' before Bahamas ("Bahamas" is just -- specified as a country but "island country" falls back to "country"). local all_equiv_placetypes = {} for _, placetype in ipairs(placetypes) do local this_equiv_placetypes = m_placetypes.get_placetype_equivs(placetype) for _, this_equiv_placetype in ipairs(this_equiv_placetypes) do insert(all_equiv_placetypes, this_equiv_placetype.placetype) end end -- Look for a known location. We should be using find_matching_holonym_location() but that function doesn't -- currently work without alias resolution. Instead we check if any matching location has `the = true` set. -- In practice there aren't any cases where a given placename matches two locations, only one of which has -- `the = true` set. for group, key, spec in m_placetypes.iterate_matching_location { placetypes = all_equiv_placetypes, placename = placename, alias_resolution = "none", } do -- `iterate_holonym_location` doesn't initialize the spec if alias resolution is turned off, so check both -- the spec and group. Be careful in case `the = false` is explicitly given by the spec. if spec.the ~= nil then if spec.the then return "the" end elseif group.default_the then return "the" end end if not suppress_holonym_use_the_check then -- See if the placetype requests an article to be placed before the placename. This occurs e.g. with 'sea'. But -- if the user specifies e.g. "sea:pref/Cortez", we'll wrongly get "the sea of the Cortez", so in that case we -- need to ignore the holonym article specified along with the placetype. for _, placetype in ipairs(placetypes) do local holonym_use_the = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) return placetype_data[pt] and placetype_data[pt].holonym_use_the end) if holonym_use_the then return "the" end end end local universal_res = m_placetypes.placename_the_re["*"] for _, re in ipairs(universal_res) do if unlinked_decorated_placename:find(re) then return "the" end end for _, placetype in ipairs(placetypes) do local matched = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) local res = m_placetypes.placename_the_re[pt] if not res then return nil end for _, re in ipairs(res) do if unlinked_decorated_placename:find(re) then return true end end return nil end) if matched then return "the" end end return nil end -- Prepend the appropriate article if needed to `decorated_placename` (the user-specified placename with any affix -- added), where the underlying holonym object that generated `linked_placename` can be found at `holonym_index` in the -- holonyms in `place_desc`. local function get_holonym_article(decorated_placename, place_desc, holonym_index) local holonym = place_desc.holonyms[holonym_index] local holonym_placetype = holonym.placetype if not holonym_placetype then return nil end return get_placename_article(decorated_placename, holonym_placetype, holonym.unlinked_placename, not not holonym.affix_type) end -- Convert a holonym into display format. This adds wikilinks to holonyms and passes them through any display handlers, -- which may (e.g.) add the placetype to the holonym. If `needs_article` is true, prepend the article `"the"` if the -- holonym requires it (e.g. if the holonym is `United States`). `needs_article` is set to true we are processing the -- first specified holonym in an old-style place description (i.e. the holonym directly following the entry placetype, -- with no raw-text holonym in between). -- -- Examples: -- ({placetype = "country", display_placename = "United States", unlinked_placename = "United States"}, true) returns -- the template-expanded equivalent of "the {{l|en|United States}}". -- ({placetype = "region", display_placename = "O'Higgins", unlinked_placename = "O'Higgins", affix_type = "suf"}, false) -- returns the template-expanded equivalent of "{{l|en|O'Higgins}} region". -- ({display_placename = "in the southern"}, false) returns "in the southern" (without wikilinking because .placetype -- and .langcode are both nil). local function format_holonym(place_desc, holonym_index, needs_article) local holonym = place_desc.holonyms[holonym_index] if holonym.no_display then return "" end local orig_needs_article = needs_article needs_article = needs_article or holonym.needs_article or holonym.force_the local output = holonym.display_placename local placetype = holonym.placetype local affix_type_pt_data, affix_type, affix_is_prefix, affix, prefix, suffix, no_affix_strings local pt_equiv_for_affix_type, already_seen_affix, need_affix -- Implement display handlers. local display_handler = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) return placetype_data[pt] and placetype_data[pt].display_handler end) if display_handler then output = display_handler(placetype, output) end if not holonym.suppress_affix then -- Implement adding an affix (prefix or suffix) based on the holonym's placetype. The affix will be -- added either if the placetype's placetype_data spec says so (by setting 'affix_type'), or if the -- user explicitly called for this (e.g. by using 'r:suf/O'Higgins'). Before adding the affix, -- however, we check to see if the affix is already present (e.g. the placetype is "district" -- and the placename is "Mission District"). The placetype can override the affix to add (by setting -- `prefix`, `suffix` or `affix`) and/or override the strings used for checking if the affix is already -- present (by setting 'no_affix_strings', which defaults to the affix explicitly given through `prefix`, -- `suffix` or `affix` if any are given). `prefix` and `suffix` take precedence over `affix` if both are -- set, but only when the appropriate type of affix is requested. -- Search through equivalent placetypes for a setting of `affix_type`, `affix`, `prefix` or `suffix`. If we -- find any, use them. If `affix_type` is given, it is overridden by the user's explicitly specified affix -- type. If either an `affix_type` is found or the user explicitly specified an affix type, the affix is -- displayed according to the following: -- 1. If `prefix`, `suffix` or `affix` is given by the placetype or equivalent placetypes, use it (e.g. -- placetype `administrative region` requests suffix "region" but doesn't set affix type; if the user -- explicitly specifies `administrative region` as the placetype for a holonym and specifies a suffixal -- affix type, use "region"). In this search, we stop looking if we find an explicit `affix_type` -- setting; if this is found without an associated affix setting, the assumption is the associated -- placetype was intended as the affix, not some explicit affix setting associated with a fallback -- placetype. -- 2. Otherwise, if the user explicitly requested an affix type, use the actual placetype (principle of -- least surprise). -- 3. Finally, fall back to the placetype associated with an explicit `affix_type` setting (which will -- always exist if we get this far). affix_type_pt_data, pt_equiv_for_affix_type = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) local cdpt = placetype_data[pt] return cdpt and cdpt.affix_type and cdpt or nil end ) affix_pt_data, pt_equiv_for_affix = m_placetypes.get_equiv_placetype_prop(placetype, function(pt) local cdpt = placetype_data[pt] return cdpt and (cdpt.affix_type or cdpt.affix or cdpt.prefix or cdpt.suffix) and cdpt or nil end ) if affix_type_pt_data then affix_type = affix_type_pt_data.affix_type need_affix = true end if affix_pt_data then prefix = affix_pt_data.prefix or affix_pt_data.affix suffix = affix_pt_data.suffix or affix_pt_data.affix need_affix = true end no_affix_strings = affix_pt_data and affix_pt_data.no_affix_strings or affix_type_pt_data and affix_type_pt_data.no_affix_strings if holonym.affix_type and placetype then affix_type = holonym.affix_type prefix = prefix or placetype suffix = suffix or placetype need_affix = true end if need_affix then -- At this point the affix_type has been determined and can't change any more, so we can figure out -- whether we need the calculated prefix or suffix. affix_is_prefix = affix_type == "pref" or affix_type == "Pref" if affix_is_prefix then affix = prefix else affix = suffix end if not affix then if not pt_equiv_for_affix_type then internal_error("Something wrong, `pt_equiv_for_affix_type` not set processing holonym: %s", holonym) end affix = pt_equiv_for_affix_type.placetype if not affix then internal_error("Something wrong, no affix could be located in `pt_equiv_for_affix_type` for " .. "holonym %s: %s", holonym, pt_equiv_for_affix_type) end end no_affix_strings = no_affix_strings or lc(affix) if holonym.pluralize_affix then affix = m_placetypes.pluralize_placetype(affix) end already_seen_affix = m_placetypes.check_already_seen_string(output, no_affix_strings) end end output = link(output, holonym.langcode or placetype and "en" or nil) if need_affix and not affix_is_prefix and not already_seen_affix then output = output .. " " .. (affix_type == "Suf" and ucfirst_all(affix) or affix) end if needs_article then local article = holonym.force_the and "the" or get_holonym_article(output, place_desc, holonym_index) if article then output = article .. " " .. output end end if affix_is_prefix and not already_seen_affix then output = (affix_type == "Pref" and ucfirst_all(affix) or affix) .. " of " .. output if orig_needs_article then -- Put the article before the added affix if we're the first holonym in the place description. This is -- distinct from the article added above for the holonym itself; cf. "c:pref/United States,Canada" -> -- "the countries of the United States and Canada". We need to use the value of `needs_article` passed -- in from the function, which indicates whether we're processing the first holonym. output = "the " .. output end end return output end -- Format a holonym for display, taking into account the entry's placetype (specifically, the last placetype if there -- are more than one, excluding conjunctions and parenthetical items); the holonym's index among the holonyms in the -- template (which specifies what the previous holonym is and whether it is the first holonym); and the full place -- description (which helps resolve ambiguities in holonyms when looking up known locations). This may involve putting a -- preposition ("in" or "of") before the formatted holonym, particularly if it is the first one, and may involve -- prepending a comma. If `holonym_no_prefix` is specified, nothing except a space is put before the holonym; used -- when formatting mixed new/old-style descriptions. local function format_holonym_in_context(entry_placetype, place_desc, holonym_index, holonym_no_prefix) local desc = "" -- If holonym.placetype is nil, the holonym is just raw text, e.g. 'in southern'. if holonym_no_prefix then desc = " " else local holonym = place_desc.holonyms[holonym_index] if not holonym.no_display then -- First compute the initial delimiter. if holonym_index == 1 then if holonym.placetype then desc = desc .. " " .. m_placetypes.get_placetype_entry_preposition(entry_placetype) .. " " elseif not holonym.display_placename:find("^,") then desc = desc .. " " end else local prev_holonym = place_desc.holonyms[holonym_index - 1] if prev_holonym.placetype and not holonym.suppress_comma then local dname = holonym.display_placename if dname ~= "and" and dname ~= "in" and dname ~= "and the" and dname ~= "in the" then desc = desc .. "," end end if holonym.placetype or not holonym.display_placename:find("^,") then desc = desc .. " " end end end end return desc .. format_holonym(place_desc, holonym_index, not holonym_no_prefix and holonym_index == 1) end -- Return the linked description of a placetype. This splits off any qualifiers and displays them separately. local function get_placetype_description(placetype) local splits = m_placetypes.split_qualifiers_from_placetype(placetype) local prefix = "" for _, split in ipairs(splits) do local prev_qualifier, this_qualifier, bare_placetype = unpack(split, 1, 3) if this_qualifier then prefix = (prev_qualifier and prev_qualifier .. " " .. this_qualifier or this_qualifier) .. " " else prefix = "" end local display_form = m_placetypes.get_placetype_display_form(bare_placetype) if display_form then return prefix .. display_form end placetype = bare_placetype end return prefix .. placetype end -- Return the linked description of a qualifier (which may be multiple words). local function get_qualifier_description(qualifier) local splits = m_placetypes.split_qualifiers_from_placetype(qualifier .. " foo") local split = splits[#splits] local prev_qualifier, this_qualifier, bare_placetype = unpack(split, 1, 3) return prev_qualifier and prev_qualifier .. " " .. this_qualifier or this_qualifier end -- Format a set of form-of directive terms. local function format_form_of_directive(overall_place_spec, directive_terms, ucfirst, from_tcl) local formatted_terms = {} local placetypes if not overall_place_spec.descs[2] then placetypes = overall_place_spec.descs[1].placetypes else placetypes = {} for _, desc in ipairs(overall_place_spec.descs) do m_table.extend(placetypes, desc.placetypes) end end for _, termobj in ipairs(directive_terms.terms) do local placename_article if not termobj.alt and termobj.term and not termobj.term:find("%[%[") then placename_article = get_placename_article(termobj.term, placetypes) end local linked_term = m_links.full_link(termobj, "term", nil, "show qualifiers") linked_term = "<span class='form-of-definition-link'>" .. linked_term .. "</span>" if termobj.eq then linked_term = linked_term .. " (= " .. m_links.full_link {term = termobj.eq, lang = enlang} .. ")" end if placename_article then linked_term = placename_article .. " " .. linked_term end insert(formatted_terms, linked_term) end local spec = directive_terms.spec local text = spec.text if type(text) == "function" then text = text(overall_place_spec) end if text == "+" then text = directive_terms.directive end if ucfirst then text = m_strutils.ucfirst(text) end if not from_tcl then local tracking_prefix = "form-of/" .. directive_terms.directive track(tracking_prefix) local langcode = overall_place_spec.lang:getCode() local full_langcode = overall_place_spec.lang:getFullCode() track(tracking_prefix .. "/" .. langcode) if full_langcode ~= langcode then track(tracking_prefix .. "/" .. full_langcode) end if full_langcode ~= "en" then track(tracking_prefix .. "/non-english") end end return (require(form_of_module).format_form_of { text = text, lemmas = m_table.serialCommaJoin(formatted_terms, {conj = directive_terms.conj or spec.conjunction or "and"}), lemma_classes = false, -- text_classes = "place-text", }) end -- Format a set of extra-info terms for extra information that is sometimes added to a definition, such as the capital, -- largest city, modern name, official name, etc. `overall_place_spec` is the overall parsed {{tl|place}} spec (see -- comment at top of file); `extra_info_terms` is the terms spec for this type of extra-info (as returned by -- `parse_extra_info_arg`); and `sentence_style` indicates whether we're generating a sentence-style definition (as -- suitable for an English-language term without a translation specified using t=). local function format_extra_info(overall_place_spec, extra_info_terms, sentence_style) local formatted_terms = {} for _, termobj in ipairs(extra_info_terms.terms) do insert(formatted_terms, m_links.full_link(termobj, nil, nil, "show qualifiers")) end local spec = extra_info_terms.spec local text = spec.text if type(text) == "function" then text = text(overall_place_spec) end if text == "+" then text = spec.arg end if spec.auto_plural and formatted_terms[2] then text = pluralize(text) end if spec.with_colon then text = text .. ":" end if sentence_style and spec.match_sentence_style then text = ". " .. m_strutils.ucfirst(text) else text = "; " .. text end -- FIME: Use joinSegments when available. -- return text .. " " .. -- m_table.joinSegments(formatted_terms, {conj = extra_info_terms.conj or spec.conjunction or "and"}) return text .. " " .. m_table.serialCommaJoin(formatted_terms, {conj = extra_info_terms.conj or spec.conjunction or "and"}) end -- Format an old-style place description (with separate arguments for the placetype and each holonym) for display and -- return the resulting string. local function format_old_style_place_desc_for_display(args, place_desc, desc_index, with_article, ucfirst) -- The placetype used to determine whether "in" or "of" follows is the last placetype if there are -- multiple slash-separated placetypes, but ignoring "and", "or" and parenthesized notes -- such as "(one of 254)". local entry_placetype = nil local placetypes = place_desc.placetypes local function is_and_or(item) return item == "and" or item == "or" end local parts = {} local function ins(txt) insert(parts, txt) end local function ins_space() if #parts > 0 then ins(" ") end end local and_or_pos for i, placetype in ipairs(placetypes) do if is_and_or(placetype) then and_or_pos = i -- no break here; we want the last in case of more than one end end local remaining_placetype_index if and_or_pos then track("multiple-placetypes-with-and") if and_or_pos == #placetypes then error("Conjunctions 'and' and 'or' cannot occur last in a set of slash-separated placetypes: " .. concat(placetypes, "/")) end local items = {} for i = 1, and_or_pos + 1 do local pt = placetypes[i] if is_and_or(pt) then -- skip elseif i > 1 and pt:find("^%(") then -- append placetypes beginning with a paren to previous item items[#items] = items[#items] .. " " .. pt else entry_placetype = pt insert(items, get_placetype_description(pt)) end end ins(m_table.serialCommaJoin(items, {conj = placetypes[and_or_pos]})) remaining_placetype_index = and_or_pos + 2 else remaining_placetype_index = 1 end for i = remaining_placetype_index, #placetypes do local pt = placetypes[i] -- Check for and, or and placetypes beginning with a paren (so that things like -- "{{place|en|county/(one of 254)|s/Texas}}" work). if m_placetypes.placetype_is_ignorable(pt) then ins_space() ins(pt) else entry_placetype = pt -- Join multiple placetypes with comma unless placetypes are already -- joined with "and". We allow "the" to precede the second placetype -- if they're not joined with "and" (so we get "city and county seat of ..." -- but "city, the county seat of ..."). if i > 1 then ins(", ") local article = m_placetypes.get_placetype_article(pt) if article ~= "the" and i > remaining_placetype_index then -- Track cases where we are comma-separating multiple placetypes without the second one starting -- with "the", as they may be mistakes. The occurrence of "the" is usually intentional, e.g. -- {{place|zh|municipality/state capital|s/Rio de Janeiro|c/Brazil|t1=Rio de Janeiro}} -- for the city of [[Rio de Janeiro]], which displays as "a municipality, the state capital of ...". track("multiple-placetypes-without-and-or-the") end if article then ins(article) ins(" ") end end ins(get_placetype_description(pt)) end end if place_desc.holonyms then for holonym_index, _ in ipairs(place_desc.holonyms) do ins(format_holonym_in_context(entry_placetype, place_desc, holonym_index)) end end local gloss = concat(parts) if with_article then local article if desc_index == 1 then article = args.a else if not place_desc.holonyms then -- there isn't a following holonym; the place type given might be raw text as well, so don't add -- an article. with_article = false else local saw_placetype_holonym = false for _, holonym in ipairs(place_desc.holonyms) do if holonym.placetype then saw_placetype_holonym = true break end end if not saw_placetype_holonym then -- following holonym(s)s is/are just raw text; the place type given might be raw text as well, -- so don't add an article. with_article = false end end if with_article then track("second-or-higher-description-with-added-article") else track("second-or-higher-description-suppressed-article") end end if with_article then article = article or m_placetypes.get_placetype_article(place_desc.placetypes[1], ucfirst) if article then gloss = article .. " " .. gloss elseif ucfirst then gloss = m_strutils.ucfirst(gloss) end end end return gloss end --[==[ Get the full gloss (English description) of a new-style place description. New-style place descriptions are specified with a single string containing raw text interspersed with placetypes and holonyms surrounded by `<<...>>`. Exported for use by [[Module:demonyms]]. ]==] function export.format_new_style_place_desc_for_display(args, place_desc, with_article) local parts = {} local function ins(txt) insert(parts, txt) end if with_article and args.a then ins(args.a .. " ") end local max_holonym = 0 for _, order in ipairs(place_desc.order) do local segment_type, segment = order.type, order.value if segment_type == "raw" then ins(segment) elseif segment_type == "placetype" then ins(get_placetype_description(segment)) elseif segment_type == "qualifier" then ins(get_qualifier_description(segment)) elseif segment_type == "holonym" then ins(format_holonym(place_desc, segment, false)) if segment > max_holonym then max_holonym = segment end else internal_error("Unrecognized segment type %s", segment_type) end end if place_desc.holonyms and max_holonym < #place_desc.holonyms then local holonym_no_prefix = true for holonym_index = max_holonym + 1, #place_desc.holonyms do ins(format_holonym_in_context(nil, place_desc, holonym_index, holonym_no_prefix)) holonym_no_prefix = false end end return concat(parts) end -- Return a string with the gloss (the description of the place itself, as opposed to translations). If `ucfirst` is -- given, the gloss's first letter is made upper case. If `sentence_style` is given, the "extra info" (modern name, -- capital, largest city, etc.) is displayed as separated sentences; otherwise, it is displayed separated from the main -- definition by semicolons. local function get_display_form(data) local overall_place_spec, ucfirst, sentence_style, drop_extra_info, extra_info_overridden_set, from_tcl = data.overall_place_spec, data.ucfirst, data.sentence_style, data.drop_extra_info, data.extra_info_overridden_set, data.from_tcl local args = overall_place_spec.args local parts = {} local function ins(txt) table.insert(parts, txt) end if overall_place_spec.directives and overall_place_spec.directives[1] then for i, directive_terms in ipairs(overall_place_spec.directives) do ins(directive_terms.pretext) if directive_terms.pretext ~= "" then ucfirst = false end if not args.def or args.def == "-" then ins(format_form_of_directive(overall_place_spec, directive_terms, ucfirst, from_tcl)) ucfirst = false if i == #overall_place_spec.directives and directive_terms.posttext then ins(directive_terms.posttext) end end end end if args.def == "-" then return concat(parts) end if args.def then if args.def:find("<<") then local def_desc = export.parse_new_style_place_desc(args.def, args[1]) ins(export.format_new_style_place_desc_for_display({}, def_desc, false)) else ins(args.def) end else local include_article = true for n, desc in ipairs(overall_place_spec.descs) do if desc.order then ins(export.format_new_style_place_desc_for_display(args, desc, n == 1)) else ins(format_old_style_place_desc_for_display(args, desc, n, include_article, ucfirst)) end if desc.joiner then ins(desc.joiner) end include_article = desc.include_following_article ucfirst = false end end local addl = args.addl if addl then posttext = posttext or "" if addl:find("^[;:]") then ins(addl) elseif addl:find("^_") then ins(" " .. addl:sub(2)) else ins(", " .. addl) end end for _, extra_info_terms in ipairs(overall_place_spec.extra_info) do -- Include a given extra info term either when -- (1) drop_extra_info not set (it's set by {{tcl}}), or -- (2) the extra info term is marked as "display even when dropped" (e.g. modern= or full=, to help understand -- the term's sense), or -- (3) the term was overridden by a `place_*=` setting in {{tcl}}. if not drop_extra_info or extra_info_terms.spec.display_even_when_dropped or extra_info_overridden_set and extra_info_overridden_set[extra_info_terms.spec.arg] then ins(format_extra_info(overall_place_spec, extra_info_terms, sentence_style)) end end return concat(parts) end -- Return the definition line. local function get_def(data) local overall_place_spec, from_tcl, drop_extra_info, extra_info_overridden_set, translation_follows = data.overall_place_spec, data.from_tcl, data.drop_extra_info, data.extra_info_overridden_set, data.translation_follows local args = overall_place_spec.args local sentence_style = overall_place_spec.lang:getCode() == "en" local ucfirst = sentence_style and not args.nocap if #args.t > 0 then local gloss = get_display_form { overall_place_spec = overall_place_spec, ucfirst = false, sentence_style = false, drop_extra_info = drop_extra_info, extra_info_overridden_set = extra_info_overridden_set, from_tcl = from_tcl, } if from_tcl and not args.tcl_nolc then gloss = m_strutils.lcfirst(gloss) end if translation_follows then return (gloss == "" and "" or gloss .. ": ") .. get_translations(args.t, args.tid) else return get_translations(args.t, args.tid) .. (gloss == "" and "" or " (" .. gloss .. ")") end else return get_display_form { overall_place_spec = overall_place_spec, ucfirst = ucfirst, sentence_style = sentence_style, drop_extra_info = drop_extra_info, extra_info_overridden_set = extra_info_overridden_set, from_tcl = from_tcl, } end end ---------- Functions for the category wikicode -- The code in this section finds the categories to which a given place belongs. See comment at top of file. --[=[ Find the appropriate category specs for a given place description and placetype. For example, for the template invocation {{tl|place|en|city/and/county|s/Pennsylvania|c/US}}, which results in the place description ``` { placetypes = {"city", "and", "county"}, holonyms = { {placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania"}, {placetype = "country", display_placename = "United States", unlinked_placename = "United States"}, }, holonyms_by_placetype = { state = {"Pennsylvania"}, country = {"United States"}, }, } ``` the call ``` find_placetype_cat_specs { entry_placetype = "city", place_desc = { placetypes = {"city", "and", "county"}, holonyms = { {placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania"}, {placetype = "country", display_placename = "United States", unlinked_placename = "United States"}, }, holonyms_by_placetype = { state = {"Pennsylvania"}, country = {"United States"}, }, }, } ``` might produce the return value ``` { entry_placetype = "city", cat_specs = {"Cities in Pennsylvania, USA"}, triggering_holonym = {placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania"}, triggering_holonym_index = 1, } ``` See the comment at the top of the section for a description of category specs and the overall algorithm. On entry, `data` is an object with the following fields: * `entry_placetype`: the entry placetype (or equivalent) used to look up the category data in placetype_data, which must have already been resolved to a placetype with an entry in `placetype_data`; * `place_desc`: the full place description as documented at the top of the file (used only for its holonyms); * `first_holonym_index`: the index of the first holonym to consider when iterating through the holonyms (used to implement the `:also` holonym placetype modifier); * `overriding_holonym`: an optional overriding holonym to use, in place of iterating through the holonyms (used to implement categorizing other holonyms of the same type as the triggering holonym, so that e.g. {{tl|place|en|river|s/Kansas,Nebraska}}, or equivalently {{tl|place|en|river|s/Kansas|and|s/Nebraska}}, works); * `from_demonym`: we are called from {{tl|demonym-noun}} or {{tl|demonym-adj}} instead of {{tl|place}}, and should generate categories appropriate to those templates. * `form_of_directive`: A form-of directive prefix such as `FORMER_NAME_OF`. If specified, use that type prefix to generate categories appropriate to the form-of directive (in addition to the regular categories generated for the {{tl|place}} invocation, which happens in a separate call). The return value is {nil} if no category specs could be located, otherwise an object with the following fields: * `entry_placetype`: the placetype that should be used to construct categories when `true` is one of the returned category specs (normally the same as the `entry_placetype` passed in, but will be different when a "fallback" key exists and is used); * `cat_specs`: list of category specs as described above; * `triggering_holonym`: the triggering holonym (see the comment at the top of the section), or nil if there was no triggering holonym; * `triggering_holonym_index`: the index of the triggering holonym in the list of holonyms in `place_desc`, or nil if an overriding holonym was passed in or there was no triggering holonym. ]=] local function find_placetype_cat_specs(data) local entry_placetype, place_desc, first_holonym_index, overriding_holonym, from_demonym = data.entry_placetype, data.place_desc, data.first_holonym_index, data.overriding_holonym, data.from_demonym local form_of_directive = data.form_of_directive local function fetch_cat_specs(holonym_to_match, index, no_fallback) local holonym_placetype = holonym_to_match.placetype if not holonym_placetype then -- raw text in place of holonym return nil end local holonym_placename = holonym_to_match.unlinked_placename if not holonym_placename then internal_error("Missing unlinked_placename in holonym (index %s): %s", index, holonym_to_match) end local cat_specs, equiv_entry_placetype_and_qualifier = m_placetypes.get_equiv_placetype_prop(entry_placetype, function(equiv_entry_pt) return m_placetypes.get_equiv_placetype_prop(holonym_placetype, function(equiv_holonym_pt) return m_placetypes.political_division_cat_handler { entry_placetype = equiv_entry_pt, holonym_placetype = equiv_holonym_pt, holonym_placename = holonym_placename, holonym_index = index, place_desc = place_desc, from_demonym = from_demonym, } end) end, {no_fallback = no_fallback, form_of_directive = form_of_directive} ) if cat_specs and cat_specs[1] then return cat_specs, equiv_entry_placetype_and_qualifier.placetype end local cat_handler, equiv_entry_placetype_and_qualifier = m_placetypes.get_equiv_placetype_prop(entry_placetype, function(equiv_entry_pt) local entry_placetype_data = m_placetypes.placetype_data[equiv_entry_pt] if entry_placetype_data and entry_placetype_data.cat_handler then return entry_placetype_data.cat_handler end end, {no_fallback = no_fallback, form_of_directive = form_of_directive} ) if cat_handler then local cat_specs = m_placetypes.get_equiv_placetype_prop(holonym_placetype, function(equiv_holonym_pt) return cat_handler { entry_placetype = equiv_entry_placetype_and_qualifier.placetype, holonym_placetype = equiv_holonym_pt, holonym_placename = holonym_placename, holonym_index = index, place_desc = place_desc, from_demonym = from_demonym, } end) if cat_specs and cat_specs[1] then return cat_specs, equiv_entry_placetype_and_qualifier.placetype end end if not no_fallback then local cat_specs, equiv_entry_placetype_and_qualifier = m_placetypes.get_equiv_placetype_prop(entry_placetype, function(equiv_entry_pt) local entry_placetype_data = m_placetypes.placetype_data[equiv_entry_pt] if entry_placetype_data then return m_placetypes.get_equiv_placetype_prop(holonym_placetype, function(equiv_holonym_pt) return entry_placetype_data[equiv_holonym_pt .. "/*"] end) end end, {form_of_directive = form_of_directive} ) if cat_specs and cat_specs[1] then return cat_specs, equiv_entry_placetype_and_qualifier.placetype end end return nil end if overriding_holonym then -- FIXME, change the algorithm to eliminate overriding_holonym local cat_specs, fetched_entry_placetype = fetch_cat_specs(overriding_holonym, nil) if cat_specs and cat_specs[1] then return { entry_placetype = fetched_entry_placetype, cat_specs = cat_specs, triggering_holonym = overriding_holonym, -- no triggering_holonym_index } end else -- We loop twice over holonyms, the first time setting `no_fallback` so that we process only category specs for -- the specifically given entry placetype (possibly with preceding qualifiers). The reason for this is to -- correctly handle cases like [[Poblacion IX]]: -- {{place|en|barangay|mun/Roxas|p/Capiz|c/Philippines}}. -- "barangay" falls back to "neighborhood", and without the `no_fallback` loop, the neighborhood cat handler run -- on the mun/Roxas holonym will take precedence over the barangay-specific setting for p/Capiz because we -- check, for each holonym in turn, first for a matching spec through political_division_cat_handler, then a cat -- handler, then a wildcard spec like country/*. During the first no-fallback loop, we disable checking for -- wildcard specs because it seems a fallback matching exactly or through a cat handler on an earlier holonym -- would be better than a wildcard match for the exact entry placetype at a later holonym. (FIXME: But I don't -- know for sure; maybe we should check wildcard holonyms on the exact entry placetype first, or contrariwise -- maybe we should check only exact-match holonyms through political_division_cat_handler on the exact entry -- placetype first, not even checking other cat handlers.) for i, holonym in ipairs(place_desc.holonyms) do if first_holonym_index and i < first_holonym_index then -- continue else local cat_specs, fetched_entry_placetype = fetch_cat_specs(holonym, i, "no_fallback") if cat_specs and cat_specs[1] then return { entry_placetype = fetched_entry_placetype, cat_specs = cat_specs, triggering_holonym = holonym, triggering_holonym_index = i, } end end end for i, holonym in ipairs(place_desc.holonyms) do if first_holonym_index and i < first_holonym_index then -- continue else local cat_specs, fetched_entry_placetype = fetch_cat_specs(holonym, i) if cat_specs and cat_specs[1] then return { entry_placetype = fetched_entry_placetype, cat_specs = cat_specs, triggering_holonym = holonym, triggering_holonym_index = i, } end end end end return nil end -- Turn a list of category specs (see comment at section top) into the corresponding categories (minus the language -- code prefix). The function is given the following arguments: -- (1) the category specs retrieved using find_placetype_cat_specs(); -- (2) the entry placetype used to fetch the entry in `placetype_data` -- (3) the triggering holonym (a holonym object; see comment at top of file) used to fetch the category specs -- (see top-of-section comment); or nil if no triggering holonym. -- The return value is constructed as described in the top-of-section comment. local function cat_specs_to_categories(place_desc, cat_data) local all_cats = {} local cat_specs, entry_placetype, triggering_holonym, triggering_holonym_index = cat_data.cat_specs, cat_data.entry_placetype, cat_data.triggering_holonym, cat_data.triggering_holonym_index if triggering_holonym then for _, cat_spec in ipairs(cat_specs) do local cat if cat_spec == true then cat = m_placetypes.pluralize_placetype(entry_placetype, "ucfirst") .. " " .. m_placetypes.get_placetype_entry_preposition(entry_placetype) .. " +++" else cat = cat_spec end if cat:find("%+%+%+") then local group, key, spec, container_trail = m_placetypes.find_matching_holonym_location { holonym_placetype = triggering_holonym.placetype, holonym_placename = triggering_holonym.unlinked_placename, holonym_index = triggering_holonym_index, place_desc = place_desc, } if group then cat = cat:gsub("%+%+%+", m_strutils.replacement_escape(m_placetypes.get_prefixed_key(key, spec))) insert(all_cats, cat) else mw.log(("Unable to insert category for cat spec '%s' because holonym '%s/%s' did not match a " .. "known location"):format(cat, triggering_holonym.placetype, triggering_holonym.unlinked_placename)) track("cant-match-holonym-for-category-spec") end else insert(all_cats, cat) end end else for _, cat_spec in ipairs(cat_specs) do local cat if cat_spec == true then cat = m_placetypes.pluralize_placetype(entry_placetype, "ucfirst") else cat = cat_spec if cat:find("%+%+%+") then internal_error("Category %s contains +++ but there is no holonym to substitute", cat) end end insert(all_cats, cat) end end return all_cats end -- Return the categories (without initial lang code) that should be added to the entry, given the place description -- (which specifies the entry placetype(s) and holonym(s); see top of file) and a particular entry placetype (e.g. -- "city"). Note that only the holonyms from the place description are looked at, not the entry placetypes in the place -- description. local function get_placetype_cats(place_desc, entry_placetype, from_demonym, form_of_directive) local cats = {} local first_holonym_index = 1 while first_holonym_index <= #place_desc.holonyms do -- Find the category specs (see top-of-file comment) corresponding to the holonym(s) in the place description. local cat_data = find_placetype_cat_specs { entry_placetype = entry_placetype, place_desc = place_desc, first_holonym_index = first_holonym_index, from_demonym = from_demonym, form_of_directive = form_of_directive, } -- Check if no category spec could be found. if not cat_data then break end local triggering_holonym = cat_data.triggering_holonym if not triggering_holonym then internal_error("find_placetype_cat_specs should have returned a triggering holonym: %s", cat_data) end -- Generate categories for the category specs found. extend(cats, cat_specs_to_categories(place_desc, cat_data)) -- Also generate categories for other holonyms of the same placetype, so that e.g. -- {{place|en|city|s/Kansas|and|s/Missouri|c/USA}} generates both [[:Category:en:Cities in Kansas, USA]] and -- [[:Category:en:Cities in Missouri, USA]]. first_holonym_index = cat_data.triggering_holonym_index -- Loop over non-fallback equivalent placetypes to the triggering holonym's placetype, in case it is -- non-canonical (e.g. `cities/San Francisco`). This matches the loop over equivalent places in -- key_holonym_into_place_desc(). local equiv_triggering_placetypes = m_placetypes.get_placetype_equivs(triggering_holonym.placetype, {no_fallback = true}) for _, equiv in ipairs(equiv_triggering_placetypes) do local other_holonyms_of_same_type = place_desc.holonyms_by_placetype[equiv.placetype] if other_holonyms_of_same_type then for _, other_placename_of_same_type in ipairs(other_holonyms_of_same_type) do if other_placename_of_same_type ~= triggering_holonym.unlinked_placename then local overriding_holonym = { placetype = triggering_holonym.placetype, unlinked_placename = other_placename_of_same_type, } local other_cat_data = find_placetype_cat_specs { entry_placetype = entry_placetype, place_desc = place_desc, overriding_holonym = overriding_holonym, from_demonym = from_demonym, form_of_directive = form_of_directive, } if other_cat_data then extend(cats, cat_specs_to_categories(place_desc, other_cat_data)) end end end end end -- If there are any later-specified holonyms that had the modifier :also, try to produce categories for them -- as well. first_holonym_index = first_holonym_index + 1 while first_holonym_index <= #place_desc.holonyms do if place_desc.holonyms[first_holonym_index].continue_cat_loop then break end first_holonym_index = first_holonym_index + 1 end end if cats[1] then return cats end local entry_pt_default, equiv_entry_placetype_and_qualifier = m_placetypes.get_equiv_placetype_prop(entry_placetype, function(pt) return m_placetypes.placetype_data[pt] and m_placetypes.placetype_data[pt].default end, {form_of_directive = form_of_directive}) if entry_pt_default then return cat_specs_to_categories(place_desc, { cat_specs = entry_pt_default, entry_placetype = equiv_entry_placetype_and_qualifier.placetype, -- no triggering holonym }) end return {} end --[==[ Iterate through each type of place and return a list of the categories that need to be added to the entry. The returned categories need to be formatted using `format_cats`, as they can be either topic-style categories (by default) or langname-style categories (if prefixed with `cln:`). The function is passed the overall place spec, which contains all the parsed info on the {{tl|place}} call (see comment at top of file), the parsed arguments (needed for arguments not parsed by `parse_overall_place_spec` and used primarily to add "bare categories" corresponding to toponyms for known locations), and `from_demonym`, which is true if we're being called from {{tl|demonym-noun}} or {{tl|demonym-adj}} (in this case, we only want certain categories added, specifically bare categories corresponding to the specified holonym(s)). ]==] function export.get_cats(args, overall_place_spec, from_demonym) local cats = {} local place_descriptions = overall_place_spec.descs handle_category_implications(place_descriptions, m_placetypes.cat_implications) m_placetypes.augment_holonyms_with_container(place_descriptions) if overall_place_spec.directives then -- not necessarily when called from [[Module:demonym]] for _, directive_terms in ipairs(overall_place_spec.directives) do local spec_cats = directive_terms.spec.cat if spec_cats then if type(spec_cats) == "string" then spec_cats = {spec_cats} end for _, spec_cat in ipairs(spec_cats) do insert(cats, spec_cat) end end if directive_terms.spec.type_prefix then for _, place_desc in ipairs(place_descriptions) do for _, placetype in ipairs(place_desc.placetypes) do if not m_placetypes.placetype_is_ignorable(placetype) then extend(cats, get_placetype_cats(place_desc, placetype, from_demonym, directive_terms.spec.type_prefix)) end end end end end end if not from_demonym then local bare_categories = m_placetypes.get_bare_categories(args, overall_place_spec) extend(cats, bare_categories) end for _, place_desc in ipairs(place_descriptions) do if not from_demonym then for _, placetype in ipairs(place_desc.placetypes) do if not m_placetypes.placetype_is_ignorable(placetype) then extend(cats, get_placetype_cats(place_desc, placetype)) end end end -- Also add generic place categories for the holonyms listed (e.g. a category like -- [[Category:Places in Merseyside, England]]). This is handled through the special placetype "*". extend(cats, get_placetype_cats(place_desc, "*", from_demonym)) end if args.cat then -- not necessarily when called from [[Module:demonym]] for _, cat in ipairs(args.cat) do local split_cats = split_on_comma(cat) extend(cats, split_cats) end end return cats end -- Return the category link for a category, given the language code and the name of the category. local function format_cats(lang, cats, sort_key) local full_cats = {} local langcode = lang:getFullCode() for _, cat in ipairs(cats) do -- 'cln' corresponds to {{cln}}, which generates lang-name categories like [[:Category:English abbreviations]] -- (as opposed to topic categories like [[:Category:en:Abbreviations of states of the United States]]). local cln_cat = cat:match("^cln:(.*)$") if cln_cat then insert(full_cats, lang:getFullName() .. " " .. cln_cat) else insert(full_cats, langcode .. ":" .. cat) end end return require(utilities_module).format_categories(full_cats, lang, sort_key, nil, force_cat or m_placetypes.get_force_cat()) end ----------- Main entry point --[==[ Implementation of {{tl|place}}. Meant to be callable from another module (specifically, [[Module:transclude]]). The single argument `data` is an object with the following fields: * `template_args`: Raw arguments specified by {{tl|place}}, possibly modified by {{tl|tcl}}. * `from_tcl`: True if we're being invoked from {{tl|tcl}}. * `drop_extra_info`: True if we should drop most of the "extra info" specified using extra info arguments (capital, largest city, etc.). Usually true when invoked from {{tl|tcl}}. Note that some extra info is still displayed even when `drop_extra_info` is set in order to establish the context (e.g. {{para|full}} and {{para|modern}}), and any extra info overridden at the {{tl|tcl}} level is displayed regardless. * `extra_info_overridden_set`: Set of booleans specifying, for each extra info arg, whether it was overridden at the {{tl|tcl}} level. This means, for example, that the values are interpreted according to the language in {{para|1}} instead of always defaulting to English, as is the case when {{tl|place}} is called directly. * `form_of_overridden_args`: Set of objects of the form `{new_directive = ``directive``, new_value = ``value``}` for overriding a given form-of directive (the key) with new directive ``directive`` and new unparsed value ``value``. Both the key and the replacing directive should be canonical. ``value`` will be parsed in the same way as a regular form-of directive except that all specified terms are interpreted in the language specified in {{para|1}}, never in English. This is present so that {{tl|tcl}} can be used on abbreviations like [[GDR]] and [[FYROM]], whose equivalents in a foreign language have language-specific expansions but where the rest of the call should stay the same. * `translation_follows`: If true, any translation specified using t= should follow the definition, after a colon, rather than preceding, with the definition in parens. ]==] function export.format(data) local template_args = data.template_args local list_param = {list = true} local boolean_param = {type = "boolean"} local params = { [1] = {required = true, type = "language", default = "und"}, [2] = {required = true, list = true}, ["t"] = list_param, ["tid"] = {list = true, allow_holes = true}, ["cat"] = list_param, ["nocat"] = boolean_param, ["nocap"] = boolean_param, ["sort"] = true, ["pagename"] = true, -- for testing or documentation purposes ["a"] = true, ["addl"] = true, ["def"] = true, -- params that are only used when transcluding using {{tcl}}/{{transclude}}, to transmit information to {{tcl}}. ["tcl"] = true, ["tcl_t"] = list_param, ["tcl_tid"] = list_param, ["tcl_nolb"] = true, ["tcl_nolc"] = boolean_param, ["tcl_noextratext"] = boolean_param, } -- add "extra info" parameters for _, extra_arg_spec in ipairs(export.extra_info_args) do params[extra_arg_spec.arg] = list_param end -- FIXME, once we've flushed out any uses, delete the following clause. That will cause def= to be ignored. if template_args.def == "" then error("Cannot currently pass def= as an empty parameter; use def=- if you want to suppress the definition display") end local args = require("Module:parameters").process(template_args, params) if args.a then track("a") if args.a:find("^[Aa]n?$") or args.a:find("^[Tt]he$") then track("a/article") else error("a= can only be used to specify a definite or indefinite article (and preferably use |nocap=1 instead to get the initial letter lowercase); see especially the documentation on the [[Template:place#Mixed format|mixed format]], which can be used to add arbitrary text before the placetype") end end data.args = args local overall_place_spec = parse_overall_place_spec(data) data.overall_place_spec = overall_place_spec return get_def(data) .. ( args.nocat and "" or format_cats(args[1], export.get_cats(args, overall_place_spec), args.sort)) end --[==[ Actual entry point of {{tl|place}}. ]==] function export.show(frame) return export.format { template_args = frame:getParent().args, } end return export 14006m3e32f16t6msgvk7fsloksykq2