Module:Foreign names

From the Super Mario Wiki, the Mario encyclopedia
Jump to navigationJump to search

Documentation for this module may be created at Module:Foreign names/doc

-- Module:Foreign names
-- Lua replacement for Template:Foreign names

local p = {}

-- Language definitions: { param prefix, display name, BCP 47 tag, dir, latin, variant }
-- latin:   true if the language always uses Latin script and never needs romanisation
-- variant: nil (standard) | "chinese" | "chineseT"
local languages = {
	{ "Jpn",    "Japanese",                                   "ja",      "ltr"                      },
	{ "Alb",    "Albanian",                                   "sq",      "ltr", true                 },
	{ "Ara",    "Arabic",                                     "ar",      "rtl"                      },
	{ "Arm",    "Armenian",                                   "hy",      "ltr"                      },
	{ "Bsh",    "Bashkir",                                    "ba",      "ltr", true                 },
	{ "Bsq",    "Basque",                                     "eu",      "ltr", true                 },
	{ "Bos",    "Bosnian",                                    "bs",      "ltr", true                 },
	{ "Bul",    "Bulgarian",                                  "bg",      "ltr"                      },
	{ "Cat",    "Catalan",                                    "ca",      "ltr", true                 },
	{ "Chi",    "Chinese",                                    "zh",      "ltr", nil,  "chinese"      },
	{ "ChiS",   "Chinese <small>(Simplified)</small>",        "zh-Hans", "ltr", nil,  "chinese"      },
	{ "ChiT",   "Chinese <small>(Traditional)</small>",       "zh-Hant", "ltr", nil,  "chineseT"    },
	{ "Cro",    "Croatian",                                   "hr",      "ltr", true                 },
	{ "Cze",    "Czech",                                      "cs",      "ltr", true                 },
	{ "Dan",    "Danish",                                     "da",      "ltr", true                 },
	{ "Dut",    "Dutch",                                      "nl",      "ltr", true                 },
	{ "Est",    "Estonian",                                   "et",      "ltr", true                 },
	{ "Fil",    "Filipino",                                   "fil",     "ltr", true                 },
	{ "Fin",    "Finnish",                                    "fi",      "ltr", true                 },
	{ "Fre",    "French",                                     "fr",      "ltr", true                 },
	{ "FreA",   "French <small>(Canadian)</small>",           "fr",      "ltr", true                 },
	{ "FreE",   "French <small>(European)</small>",           "fr",      "ltr", true                 },
	{ "Gal",    "Galician",                                   "gl",      "ltr", true                 },
	{ "Geo",    "Georgian",                                   "ka",      "ltr"                      },
	{ "Ger",    "German",                                     "de",      "ltr", true                 },
	{ "Gre",    "Greek",                                      "el",      "ltr"                      },
	{ "Heb",    "Hebrew",                                     "he",      "rtl"                      },
	{ "Hin",    "Hindi",                                      "hi",      "ltr"                      },
	{ "Hun",    "Hungarian",                                  "hu",      "ltr", true                 },
	{ "Ice",    "Icelandic",                                  "is",      "ltr", true                 },
	{ "Ind",    "Indonesian",                                 "id",      "ltr", true                 },
	{ "Ita",    "Italian",                                    "it",      "ltr", true                 },
	{ "Kor",    "Korean",                                     "ko",      "ltr"                      },
	{ "Lao",    "Lao",                                        "lo",      "ltr"                      },
	{ "Lat",    "Latvian",                                    "lv",      "ltr", true                 },
	{ "Lit",    "Lithuanian",                                 "lt",      "ltr", true                 },
	{ "Mac",    "Macedonian",                                 "mk",      "ltr"                      },
	{ "Mal",    "Malay",                                      "ms",      "ltr", true                 },
	{ "Nor",    "Norwegian",                                  "no",      "ltr", true                 },
	{ "Per",    "Persian",                                    "fa",      "rtl"                      },
	{ "Pol",    "Polish",                                     "pl",      "ltr", true                 },
	{ "Por",    "Portuguese",                                 "pt",      "ltr", true                 },
	{ "PorA",   "Portuguese <small>(Brazilian)</small>",      "pt",      "ltr", true                 },
	{ "PorE",   "Portuguese <small>(European)</small>",       "pt",      "ltr", true                 },
	{ "Rom",    "Romanian",                                   "ro",      "ltr", true                 },
	{ "Rus",    "Russian",                                    "ru",      "ltr"                      },
	{ "Ser",    "Serbian",                                    "sr",      "ltr", true                 },
	{ "SerCro", "Serbo-Croatian",                             "sh",      "ltr", true                 },
	{ "Snd",    "Sindhi",                                     "sd",      "rtl"                      },
	{ "Snh",    "Sinhala",                                    "si",      "ltr"                      },
	{ "Svk",    "Slovak",                                     "sk",      "ltr", true                 },
	{ "Svn",    "Slovenian",                                  "sl",      "ltr", true                 },
	{ "Spa",    "Spanish",                                    "es",      "ltr", true                 },
	{ "SpaA",   "Spanish <small>(Latin American)</small>",    "es",      "ltr", true                 },
	{ "SpaE",   "Spanish <small>(European)</small>",          "es",      "ltr", true                 },
	{ "Swe",    "Swedish",                                    "sv",      "ltr", true                 },
	{ "Tha",    "Thai",                                       "th",      "ltr"                      },
	{ "Tur",    "Turkish",                                    "tr",      "ltr", true                 },
	{ "Ukr",    "Ukrainian",                                  "uk",      "ltr"                      },
	{ "Vie",    "Vietnamese",                                 "vi",      "ltr", true                 },
}

-- Strip HTML tags (used to clean display names for category sort keys)
local function stripTags(s)
	return (s:gsub("<[^>]+>", ""))
end

-- Return the parameter key for entry index idx under a given prefix.
-- Index 1  -> bare prefix ("Jpn")
-- Index 2+ -> prefix .. index ("Jpn2", "Jpn3", ...)
local function paramKey(prefix, idx)
	if idx == 1 then return prefix end
	return prefix .. idx
end

-- Count how many consecutive value entries exist for a prefix.
local function countEntries(args, prefix)
	if not args[prefix] or args[prefix] == "" then return 0 end
	local n = 1
	while args[prefix .. (n + 1)] and args[prefix .. (n + 1)] ~= "" do
		n = n + 1
	end
	return n
end

-- Resolve a romanisation parameter for entry idx, walking back through "^".
-- suffix = "R" | "RM" | "RC"
local function resolveR(args, prefix, idx, suffix)
	local val = args[paramKey(prefix, idx) .. suffix] or ""
	-- Walk back through "^" inherit markers
	local i = idx
	while val == "^" and i > 1 do
		i = i - 1
		val = args[paramKey(prefix, i) .. suffix] or ""
	end
	if val == "^" then val = "" end
	return val
end

-- Compute rowspan for the Meaning column starting at entry idx.
-- Consecutive entries with "^" or identical M values are merged.
local function meaningRowspan(args, prefix, idx, total)
	local baseM = args[paramKey(prefix, idx) .. "M"] or ""
	local span = 1
	local j = idx + 1
	while j <= total do
		local mj = args[paramKey(prefix, j) .. "M"] or ""
		if mj == "^" or mj == baseM then
			span = span + 1
			j = j + 1
		else
			break
		end
	end
	return span
end

-- Compute rowspan for the Note column starting at entry idx.
local function noteRowspan(args, prefix, idx, total)
	local baseN = args[paramKey(prefix, idx) .. "N"] or ""
	local span = 1
	local j = idx + 1
	while j <= total do
		local nj = args[paramKey(prefix, j) .. "N"] or ""
		if nj == "^" or nj == baseN then
			span = span + 1
			j = j + 1
		else
			break
		end
	end
	return span
end

-- Resolve an M or N value that may be "^", walking back to the nearest real value.
local function resolveCaretParam(args, prefix, idx, suffix)
	local val = args[paramKey(prefix, idx) .. suffix] or ""
	local i = idx
	while val == "^" and i > 1 do
		i = i - 1
		val = args[paramKey(prefix, i) .. suffix] or ""
	end
	if val == "^" then val = "" end
	return val
end

-- Test whether a string contains non-ASCII characters, indicating a non-Latin
-- script that needs romanisation. Only called for languages without the latin
-- flag, so we don't need to worry about accented Latin characters.
-- Certain non-ASCII ranges are stripped before checking:
--   U+FF01–U+FF5E  Fullwidth ASCII variants (e.g. A, 3)
--   U+2000–U+2BFF  Symbols and punctuation (e.g. ☆, →, …)
--   U+3000–U+303F  CJK punctuation (e.g. 。、「」)
local function needsRomanisation(s)
	local normalized = s
		:gsub("\239\188[\129-\191]", "x")  -- U+FF01–U+FF3F
		:gsub("\239\189[\128-\158]", "x")  -- U+FF40–U+FF5E
		:gsub("\226[\128-\175][\128-\191]", "x")  -- U+2000–U+2BFF
		:gsub("\227\128[\128-\191]", "x")  -- U+3000–U+303F
	return normalized:find("[\128-\255]") ~= nil
end

-- Wrap text in a nowrap span (disabled when wrap mode is active).
local wrapMode = false
local function nw(text)
	if wrapMode then return text end
	return '<span class="nowrap">' .. text .. '</span>'
end

-- Wrap text in a lang-tagged span.
local function langSpan(tag, text)
	return '<span lang="' .. tag .. '">' .. text .. '</span>'
end

-- ---------------------------------------------------------------------------
-- Row builder
-- ---------------------------------------------------------------------------

local function buildLangRows(out, args, prefix, display, langTag, dir,
	variant, isContent, nocitMode, unsourcedCount, isLatin)

	local total = countEntries(args, prefix)
	if total == 0 then return end

	local displayClean = stripTags(display)

	-- Countdown trackers so we only emit rowspan cells at the right entries.
	local mSpanLeft = 0
	local nSpanLeft = 0

	for idx = 1, total do
		local vKey  = paramKey(prefix, idx)   -- e.g. "Jpn", "Jpn2"
		local value = args[vKey] or ""

		-- ----------------------------------------------------------------
		-- Name cell content
		-- ----------------------------------------------------------------
		local nameParts = {}

		-- Determine the lang subtag (ChiT may need -HK for Cantonese-only)
		local ltag = langTag
		if variant == "chineseT" then
			local hasRM = (args[vKey .. "RM"] or "") ~= ""
			local hasR  = (args[vKey .. "R"]  or "") ~= ""
			local hasRC = (args[vKey .. "RC"] or "") ~= ""
			if hasRC and not hasRM and not hasR then
				ltag = "zh-Hant-HK"
			end
		end

		table.insert(nameParts, nw(langSpan(ltag, value)))

		if variant == "chinese" or variant == "chineseT" then
			-- Chinese: separate Mandarin (RM) and Cantonese (RC) romanisations
			local rm = resolveR(args, prefix, idx, "RM")
			if rm ~= "" then
				table.insert(nameParts, "<br />" .. nw("''" .. rm .. "''") .. " <small>(Mandarin)</small>")
			end
			local rc = resolveR(args, prefix, idx, "RC")
			if rc ~= "" then
				table.insert(nameParts, "<br />" .. nw("''" .. rc .. "''") .. " <small>(Cantonese)</small>")
			end
			local r = resolveR(args, prefix, idx, "R")
			if r ~= "" then
				table.insert(nameParts, "<br />" .. nw("''" .. r .. "''"))
			elseif rm == "" and rc == "" then
				if isContent and not isLatin and needsRomanisation(value) then
					table.insert(nameParts, "[[Category:Articles with missing foreign name romanizations|" .. displayClean .. "]]")
				end
			end
		else
			local r = resolveR(args, prefix, idx, "R")
			if r ~= "" then
				table.insert(nameParts, "<br />" .. nw("''" .. r .. "''"))
			else
				local hasRM = (args[vKey .. "RM"] or "") ~= ""
				local hasRC = (args[vKey .. "RC"] or "") ~= ""
				if not hasRM and not hasRC then
					if isContent and not isLatin and needsRomanisation(value) then
						table.insert(nameParts, "[[Category:Articles with missing foreign name romanizations|" .. displayClean .. "]]")
					end
				end
			end
		end

		local nameCell = table.concat(nameParts)

		-- ----------------------------------------------------------------
		-- Meaning cell (emitted only when a new rowspan begins)
		-- ----------------------------------------------------------------
		local mCellContent = nil
		local mCellSpan    = nil
		if mSpanLeft <= 0 then
			local mspan      = meaningRowspan(args, prefix, idx, total)
			mSpanLeft        = mspan
			mCellSpan        = mspan

			local resolvedM  = resolveCaretParam(args, prefix, idx, "M")
			if resolvedM == "" then
				local cat = isContent
					and ("[[Category:Articles with unknown foreign name meanings|" .. displayClean .. "]]")
					or  ""
				mCellContent = '<span class="foreign-meaning">?</span>' .. cat
			elseif resolvedM == "-" then
				mCellContent = '<span class="no-foreign-meaning">-</span>'
			else
				mCellContent = '<span class="foreign-meaning">' .. resolvedM .. '</span>'
			end
		end
		mSpanLeft = mSpanLeft - 1

		-- ----------------------------------------------------------------
		-- Note cell (emitted only when a new rowspan begins)
		-- ----------------------------------------------------------------
		local nCellContent = nil
		local nCellSpan    = nil
		if nSpanLeft <= 0 then
			local nspan      = noteRowspan(args, prefix, idx, total)
			nSpanLeft        = nspan
			nCellSpan        = nspan

			local resolvedN  = resolveCaretParam(args, prefix, idx, "N")
			if resolvedN ~= "" then
				nCellContent = '<span class="foreign-note"><small>' .. resolvedN .. '</small></span>'
			else
				nCellContent = '<span class="no-foreign-note"></span>'
			end
		end
		nSpanLeft = nSpanLeft - 1

		-- ----------------------------------------------------------------
		-- Ref cell
		-- ----------------------------------------------------------------
		local citation = args[vKey .. "C"] or ""
		local refContent
		if citation ~= "" then
			refContent = '<span class="foreign-ref">' .. nw(citation) .. '</span>'
		elseif nocitMode then
			refContent = '<span class="no-foreign-ref"></span>'
		else
			unsourcedCount.n = unsourcedCount.n + 1
			if isContent then
				local cat = '[[Category:Articles with unsourced foreign names|' .. unsourcedCount.n .. ']]'
				refContent = '<sup><abbr title="Citation needed">[?]</abbr></sup>' .. cat
			else
				refContent = '<span class="no-foreign-ref"></span>'
			end
		end

		-- ----------------------------------------------------------------
		-- Assemble the wikitable row
		-- ----------------------------------------------------------------
		-- MediaWiki wikitable row syntax:
		--   |-
		--   | cell1 || cell2 || cell3
		-- Cells with attributes:  | attrs | content
		-- We build one line per row using || to separate cells.
		-- ----------------------------------------------------------------

		-- Collect cell strings (each already includes leading space)
		local cells = {}

		-- Language label cell: only on first entry, spans all rows
		if idx == 1 then
			if total > 1 then
				table.insert(cells, 'rowspan="' .. total .. '" | ' .. nw(display))
			else
				table.insert(cells, nw(display))
			end
		end

		-- Name cell (has a dir= attribute)
		table.insert(cells, 'dir="' .. dir .. '" | ' .. nameCell)

		-- Meaning cell
		if mCellContent ~= nil then
			if mCellSpan > 1 then
				table.insert(cells, 'rowspan="' .. mCellSpan .. '" | ' .. mCellContent)
			else
				table.insert(cells, mCellContent)
			end
		end

		-- Note cell
		if nCellContent ~= nil then
			if nCellSpan > 1 then
				table.insert(cells, 'rowspan="' .. nCellSpan .. '" | ' .. nCellContent)
			else
				table.insert(cells, nCellContent)
			end
		end

		-- Ref cell
		table.insert(cells, refContent)

		-- Emit row: "|-" then "| cell1 || cell2 || ..."
		table.insert(out, "|-")
		table.insert(out, "| " .. table.concat(cells, " || "))
	end
end

-- ---------------------------------------------------------------------------
-- Main entry point
-- ---------------------------------------------------------------------------

function p.main(frame)
	-- Support both transclusion (parent frame) and direct invocation
	local args
	if frame:getParent() then
		args = frame:getParent().args
	else
		args = frame.args
	end

	local title     = mw.title.getCurrentTitle()
	local contentNamespaces = { [0] = true, [102] = true, [106] = true }
	local isContent = contentNamespaces[title.namespace] == true

	local nocitMode = (args["citations"] or "") ~= "" or not isContent
	wrapMode = (args["wrap"] or "") ~= ""

	local unsourcedCount = { n = 0 }
	local out = {}
	table.insert(out, '{| id="foreign-names" class="wikitable"')
	table.insert(out, '! Language')
	table.insert(out, '! Name')
	table.insert(out, '! Meaning')
	table.insert(out, '! Note(s)')
	table.insert(out, '! <abbr title="Reference(s)">Ref.</abbr>')

	for _, langDef in ipairs(languages) do
		local prefix  = langDef[1]
		local display = langDef[2]
		local tag     = langDef[3]
		local dir     = langDef[4] or "ltr"
		local isLatin = langDef[5] == true
		local variant = langDef[6]

		buildLangRows(out, args, prefix, display, tag, dir,
			variant, isContent, nocitMode, unsourcedCount, isLatin)
	end

	table.insert(out, '|}')

	return table.concat(out, "\n")
end

return p