Модул:auto cat

Извор: Викиречник
Иди на навигацију Иди на претрагу

This module implements {{auto cat}}.


local export = {}


local function splitLabelLang(titleObject)
	local getByCanonicalName = require("Модул:languages").getByCanonicalName
	local canonicalName
	local lang
	
	-- Progressively add another word to the potential canonical name until it
	-- matches an actual canonical name.
	local words = mw.text.split(titleObject.text, " ")
	for i = #words - 1, 1, -1 do
		canonicalName = table.concat(words, " ", 1, i)
		lang = getByCanonicalName(canonicalName)
		if lang then
			break
		end
	end
	
	local label = lang and titleObject.text:sub(#canonicalName + 2)
		or titleObject.text
	
	return label, lang
end


local function getObj(name, family)
	if not family then
		return require("Модул:languages").getByCanonicalName(name)
			or require("Модул:etymology languages").getByCanonicalName(name)
	else
		return require("Модул:families").getByCanonicalName(name)
	end
end


-- List of handler functions that try to match the page name.
-- A handler should return a table of template title plus arguments
-- that is passed to frame:expandTemplate.
-- If a handler does not recognise the page name, it should return nil.
-- Note that the order of functions matters!

local handlers = {}

local function add_handler(func)
	table.insert(handlers, func)
end


-- request cat
add_handler(function(titleObject)
	if not titleObject.text:find("^Захтеви") then
		return nil
	end
	
	return {title = "request cat"}
end)


local personal_name_types = {
	"презимена", "дата имена",
	"мушка дата имена", "женска дата имена", "унисекс дата имена",
	"diminutives of male given names", "diminutives of female given names",
	"diminutives of unisex given names",
	"augmentatives of male given names", "augmentatives of female given names",
	"augmentatives of unisex given names"
}

-- topic name cat; must go before general topic cat
add_handler(function(titleObject)
	local code, label = titleObject.text:match("^([a-z-]+):(.+)$")

	if not code then
		return nil
	end

	for _, nametype in ipairs(personal_name_types) do
		local sourcename = label:match("^(.+) " .. nametype .. "$")
		
		if sourcename then
			local source = getObj(sourcename)
			if source then
				return {title = "topic name cat", args = {code, source:getCode(), nametype}}
			end
		end
	end

	return nil
end)


-- Topical categories
add_handler(function(titleObject)
	if not titleObject.text:find("^[a-z-]+:.") then
		return nil
	end
	
	local code, label = titleObject.text:match("^([a-z-]+):(.+)$")
	return {title = "topic cat", args = {code, label}}
end)
local topic_cat_with_lang = handlers[#handlers]


-- script cat
-- should go before things like "derived cat" that also look for categories
-- ending in "languages"
add_handler(function(titleObject)
	local script_labels = {
		"додатци",
		"карактери",
		"карактери по језику",
		"језици",
		"модули",
		"шаблони",
	}
	local scripts_without_script_in_category = {
		-- We hardcode this rather than checking for all scripts without the
		-- word "script", else we'd trigger on "Arabic languages", because
		-- Arabic is both a script and family (as well as a language).
		["Morse code"] = true,
		["Flag semaphore"] = true,
	}
	local script, label = titleObject.text:match("^(.+) (script)$")
	if not script and scripts_without_script_in_category[titleObject.text] then
		script = titleObject.text
		label = "текст"
	end
	if not script then
		for _, lab in ipairs(script_labels) do
			script, label = titleObject.text:match("^(.+) script (" .. lab .. ")$")
			if script then
				break
			end
			-- Check for e.g. 'Morse code characters' or 'Flag semaphore templates'.
			script, label = titleObject.text:match("^(.+) (" .. lab .. ")$")
			if script then
				if scripts_without_script_in_category[script] then
					break
				else
					script = nil
				end
			end
		end
	end
	if not script then
		return nil
	end

	local scriptObj = require("Модул:scripts").getByCanonicalName(script) or
		-- [[Category:Undetermined script languages]] vs. name of script = "undetermined" 
		require("Модул:scripts").getByCanonicalName(mw.ustring.lower(script))
	if not scriptObj then
		return nil
	end
	
	if label == "текст" then
		return { title = "script cat", args = { scriptObj:getCode() } }
	else
		return { title = "script cat", args = { scriptObj:getCode(), label } }
	end
end)


--[[	langcatboiler
		Shouldn't be used because there are additional parameters, such as
		countries where that the language is or was spoken,
		that should always be supplied.

add_handler(function(titleObject)
	if titleObject.text:find(" по језику$") or not titleObject.text:find("[јЈ]език$") then
		return nil
	end
	
	local langName = titleObject.text:match("^(.+) језик$")
	
	-- Use the entire category name if it doesn't end in "language", to handle
	-- cases where "language" is part of the name, e.g. ASL.
	local lang = require("Модул:languages").getByCanonicalName(langName) or require("Модул:languages").getByCanonicalName(titleObject.text)
	
	if not lang then
		local lang2 = require("Модул:languages").getByName(langName)
		if lang2 then
			error('"' .. langName .. '" није важеће канонско име. Kористите "' .. lang2:getCanonicalName() .. '" уместо њега.')
		end
		
		return nil
	end
	
	return { title = "langcatboiler", args = { lang:getCode() } }
end)
 
--]]

-- Letter names
add_handler(function(titleObject)
	if not titleObject.text:find("имена слова$") then
		return nil
	end
	
	local langCode = titleObject.text:match("^([^:]+):")
	local lang, cat
	
	if langCode then
		lang = require("Модул:languages").getByCode(langCode) or error('Кôд језика "' .. langCode .. '" није важећи.')
		cat = titleObject.text:match(":(.+)$")
	else
		cat = titleObject.text
	end
	
	return {title = "topic cat", args = {lang and lang:getCode() or nil, cat}}
end)


-- letter cat
add_handler(function(titleObject)
	-- Only recognize cases consisting of an uppercase letter followed by the
	-- corresponding lowercase letter, either as the entire category name or
	-- followed by a colon (for cases like [[Category:Gg: ⠛]]). Cases that
	-- don't fit this profile (e.g. for Turkish [[Category:İi]] and
	-- [[Category:Iı]]) need to call {{letter cat}} directly. Formerly this
	-- handler was much less restrictive and would fire on categories named
	-- [[Category:zh:]], [[Category:RFQ]], etc.
	local upper, lower = mw.ustring.match(titleObject.text, "^(%u)(%l)%f[:%z]")
	if not upper or mw.ustring.upper(lower) ~= upper then
		return nil
	end

	return {title = "letter cat"}
end)


-- Japanese kanji reading cat
add_handler(function(titleObject)
	if not titleObject.text:find("^Јапански канџи") then
		return nil
	end
	
	return {title = "ja-readings-cat"}
end)


-- Okinawan kanji reading cat
add_handler(function(titleObject)
	if not titleObject.text:find("^Okinawan kanji") then
		return nil
	end
	
	return {title = "ryu-readings-cat"}
end)


-- FIXME! Move this to a general word-to-number converter.
local word_to_number = {
	one = 1,
	two = 2,
	three = 3,
	four = 4,
	five = 5,
	six = 6,
	seven = 7,
	eight = 8,
	nine = 9,
	ten = 10,
	eleven = 11,
	twelve = 12,
	thirteen = 13,
	fourteen = 14,
	fifteen = 15,
	sixteen = 16,
	seventeen = 17,
	eighteen = 18,
	nineteen = 19,
	twenty = 20
}


-- Japanese/Okinawan character count cat
add_handler(function(titleObject)
	local langname, count = titleObject.text:match("^(.+) terms written with (.+) Han script characters?$")
	if not langname then
		return nil
	end
	local langcode
	if langname == "Јапански" then
		langcode = "ja"
	elseif langname == "Okinawan" then
		langcode = "ryu"
	else
		return nil
	end
	local num = word_to_number[count]
	if not num then
		return nil
	end
	return {title = langcode .. "-cat-written with n kanji", args = {num}}
end)


-- Japanese/Okinawan read-as cat
add_handler(function(titleObject)
	local langname, reading = titleObject.text:match("^(.+) terms spelled with kanji read as (.+)$")
	if not langname then
		return nil
	end
	local langcode
	if langname == "Јапански" then
		langcode = "ja"
	elseif langname == "Okinawan" then
		langcode = "ryu"
	else
		return nil
	end
	return {title = langcode .. "-readascat", args = {reading}}
end)


-- Unicode block cat
add_handler(function(titleObject)
	if not titleObject.text:find("block$") then
		return nil
	end
	
	return {title = "Unicode block cat"}
end)


-- Proto-Indo-European xxx-shape roots
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not lang or lang:getCode() ~= "ine-pro" then
		return nil
	end
	
	local shape = label:match("^(.+)-shape roots$")
	
	if not shape then
		return nil
	end
	
	return {title = "ine-root shape cat", args = {shape}}
end)


-- Dutch prefixed verbs
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not lang or lang:getCode() ~= "nl" then
		return nil
	end
	
	local pref = label:match("^prefixed verbs with (.+%-)$")
	
	if not pref then
		return nil
	end
	
	return {title = "nl-pref verb cat", args = {pref}}
end)


-- Dutch separable verbs
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not lang or lang:getCode() ~= "nl" then
		return nil
	end
	
	local sep = label:match("^separable verbs with (.+)$")
	
	if not sep then
		return nil
	end
	
	return {title = "nl-sep verb cat", args = {sep}}
end)


-- PIE root cat
add_handler(function(titleObject)
	if not titleObject.text:find("[Пп]ојмови изведени од PIE корена") then
		return nil
	end
	
	return {title = "PIE root cat"}
end)


-- PIE word cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^[пП]ојмови изведени од PIE речи %*") then
		return nil
	end
	
	local word = label:match("^[Пп]ојмови изведени од PIE речи %*(.+)$")
	return {title = "PIE word cat", args = {lang and lang:getCode() or nil, word}}
end)


-- ar-root cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not (lang and lang:getCode() == "ar" and label:find("^изрази који припадају корену .+")) then
		return nil
	end
	
	return {title = "ar-root cat"}
end)


--HE root cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not (lang and lang:getCode() == "he" and label:find("^термини који припадају корену .+")) then
		return nil
	end
	
	local root = label:match("^термини који припадају корену (.+)$")
	local parts = mw.text.split(root, "־", true)
	return {title = "HE root cat", args = parts}
end)


-- he-patterncat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not (lang and lang:getCode() == "he" and label:find("^термини у образцу .+")) then
		return nil
	end
	
	return {title = "he-patterncat"}
end)


-- root cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	local sourcename, root = label:match("^[Ии]зрази изведени из (.+) корена (.+)$")
	if not sourcename then
		return nil
	end
	
	local source = getObj(sourcename)
	
	if source then
		return {title = "root cat", args = {lang and lang:getCode() or nil, source:getCode(), root}}
	end
end)


-- tone cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not lang then
		return nil
	end
	
	local pos, tone = label:match("^(.+) with tone ([^ ]+)$")
	if not pos then
		return nil
	end
	
	return {title = "tone cat", args = {lang:getCode(), pos, tone}}
end)


-- classifier cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not lang then
		return nil
	end
	
	local pos, classifier = label:match("^(именице) класификоване по (.+)$")
	if not pos then
		return nil
	end
	
	return {title = "classifier cat", args = {lang:getCode(), pos, classifier}}
end)


-- derived cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^[Ии]зрази изведени из .") then
		return nil
	end
	
	local sourcename = label:match("^[Ии]зрази изведени из (.+)$")
	local source
	
	if sourcename:find(" [Јј]езици$") then
		sourcename = sourcename:gsub(" језици$", "")
		source = getObj(sourcename, true)
	else
		source = getObj(sourcename)
	end
	
	if source then
		return {title = "derived cat", args = {lang and lang:getCode() or nil, source:getCode()}}
	end
end)


-- inherited cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^[Ии]зрази наслеђени од .") then
		return nil
	end
	
	local sourcename = label:match("^[Ии]зрази наслеђени од (.+)$")
	local source = getObj(sourcename)
	
	if source then
		return {title = "inherited cat", args = {lang and lang:getCode() or nil, source:getCode()}}
	end
end)


-- borrowed cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^[Ии]зрази позајмљени од .") then
		return nil
	end
	
	local sourcename = label:match("^[Ии]зрази позајмљени од (.+)$")
	local source
	
	if sourcename:find(" [Јј]езици$") then
		sourcename = sourcename:gsub(" језици$", "")
		source = getObj(sourcename, true)
	else
		source = getObj(sourcename)
	end
	
	if source then
		return {title = "borrowed cat", args = {lang and lang:getCode() or nil, source:getCode()}}
	end
end)


-- unadapted borrowing cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^[Uu]nadapted borrowings from .") then
		return nil
	end
	
	local sourcename = label:match("^[Uu]nadapted borrowings from (.+)$")
	local source
	
	if sourcename:find(" [Јј]езици$") then
		sourcename = sourcename:gsub(" језици$", "")
		source = getObj(sourcename, true)
	else
		source = getObj(sourcename)
	end
	
	if source then
		return {title = "unadapted borrowing cat", args = {lang and lang:getCode() or nil, source:getCode()}}
	end
end)


-- calque cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	local partially, sourcename = label:match("^[Tt]erms (%l*) ?calqued from (.+)$")
	if not (partially == "" or partially == "partially") then
		return nil
	end
	
	local source
	
	if sourcename:find(" [Јј]езици$") then
		sourcename = sourcename:gsub(" језици$", "")
		source = getObj(sourcename, true)
	else
		source = getObj(sourcename)
	end
	
	if source then
		return {
			title = "calque cat",
			args = {
				lang and lang:getCode() or nil,
				source:getCode(),
				partial = partially ~= "",
			}
		}
	end
end)

-- semantic loan cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^[Ss]emantic loans from .") then
		return nil
	end
	
	local sourcename = label:match("^[Ss]emantic loans from (.+)$")
	local source
	
	if sourcename:find(" [Јј]езици$") then
		sourcename = sourcename:gsub(" језици$", "")
		source = getObj(sourcename, true)
	else
		source = getObj(sourcename)
	end
	
	if source then
		return {title = "semantic loan cat", args = {lang and lang:getCode() or nil, source:getCode()}}
	end
end)

-- translit cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^термини су пренесени са других језика") then
		return nil
	end
	
	return {title = "translit cat", args = {lang and lang:getCode() or nil}}
end)


-- translit cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^[Tt]ransliterations of") then
		return nil
	end
	
	local sourcename = label:match("[Tt]ransliterations of (.+) terms")
	local source = getObj(sourcename)
	
	if not lang then
		local lang = ""
	end
	
	if source then
		return {title = "translit cat", args = {lang and lang:getCode(), source:getCode()}}
	end
end)


-- circumfix cat, infix cat, interfix cat, prefix cat, suffix cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	for _, affixtype in ipairs({"circumfix", "infix", "interfix", "prefix", "suffix"}) do
		if label:find("^.+ " .. affixtype .. "ed with .") then
			local pos, after = label:match("^(.+) " .. affixtype .. "ed with (.+)$")
			
			if pos == "words" then
				pos = nil
			end
			
			local term, id
			
			if after:find(". %([^()]+%)$") then
				term, id = after:match("^(.+) %(([^()]+)%)$")
			else
				term = after
			end
			
			return {title = affixtype .. " cat", args = {lang:getCode(), term, pos = pos, id = id}}
		end
	end
end)


-- name cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not lang then
		return nil
	end
	
	for _, nametype in ipairs(personal_name_types) do
		local sourcename = label:match("^" .. nametype .. " from (.+)$")
		
		if sourcename then
			local source = getObj(sourcename)
			source = source and source:getCode() or sourcename
			
			return {title = "name cat", args = {lang:getCode(), source, nametype}}
		end
	end

	return nil
end)


-- charactercat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)

	-- Don't fire on [[Категорија:Japanese terms spelled with 発 read as はつ]]
	-- and similar.
	if not label:find("^термини који се пишу са .+") or label:find("^термини који се пишу са .+ читане као .+") then
		return nil
	end
	
	local term = label:match("^термини који се пишу са (.+)$")
	return {title = "charactercat", args = {lang:getCode(), term}}
end)


-- phrasebook cat
add_handler(function(titleObject)
	if titleObject.text == "Књига фраза по језику" then
		return {title = "phrasebook cat", args = {}}
	else
		local label, lang = splitLabelLang(titleObject)
		
		if label == "phrasebook" then
			return {title = "phrasebook cat", args = {lang:getCode()}}
		elseif label:find("^phrasebook/.") then
			label = label:match("^phrasebook/(.+)$")
			return {title = "phrasebook cat", args = {lang:getCode(), label}}
		end
	end
end)


-- no entry cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("entries that don't exist$") then
		return nil
	end
	
	return { title = "no entry cat", args = { lang:getCode() } }
end)

-- Azerbaijani compound vebs
add_handler(function(titleObject)
	local with_what = titleObject.text:match("^Azerbaijani compound verbs with (.+)$")
	if not with_what then
		return nil
	end
	
	return {title = "az-compound cat", args = { with_what }}
end)


--[[	family cat
		
		Must go after the "derived", "borrowed", and "transliterated" category handlers,
		which sometimes have "languages" at the end, but before poscatboiler,
		or families that have the same names as languages will get intercepted.
]]
add_handler(function(titleObject)
	if not titleObject.text:find("језици$") then
		return nil
	end
	
	local familyName = titleObject.text:match("^(.+) језици$")
	
	local family = require("Модул:families").getByCanonicalName(familyName) or
		require("Модул:families").getByCanonicalName(mw.ustring.lower(familyName))
	
	if not family then
		return nil
	end
	
	return { title = "family cat", args = { family:getCode() } }
end)


-- poscatboiler
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	if lang then
		local baseLabel, script = label:match("(.+) in (.-) script$")
		if script and baseLabel ~= "terms" then
			local scriptObj = require("Модул:scripts").getByCanonicalName(script)
			if scriptObj then
				return {title = "poscatboiler", args = {lang:getCode(), baseLabel, sc = scriptObj:getCode() }}
			end
		end
		return {title = "poscatboiler", args = {lang:getCode(), label}}
	elseif label:find(". по језику$") then
		local label = mw.getContentLanguage():lcfirst(label:match("^(.+) по језику$"))
		return {title = "poscatboiler", args = {nil, label}}
	end
end)


-- redundant translit cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^Термини са редундантним транслитерацијама") then
		return nil
	end
	
	langCode = label:match("/(.+)")
	
	if langCode then
		return {title = "redundant translit cat", args = {langCode}}
	end
end)


-- manual translit diff cat
add_handler(function(titleObject)
	local label, lang = splitLabelLang(titleObject)
	
	if not label:find("^Изрази са ручним транслитерацијама различитим од аутоматизованих") then
		return nil
	end
	
	local langCode = label:match("/(.+)")
	
	if langCode then
		return {title = "manual translit diff cat", args = {langCode}}
	end
end)


-- topic cat
add_handler(function(titleObject)
	return {title = "topic cat", args = {nil, titleObject.text}}
end)
local topic_cat_without_lang = handlers[#handlers]


function export.show(frame)
	local args = require("Модул:parameters").process(frame:getParent().args, {
		nopos = { type = "boolean" },
	})
	local titleObject = mw.title.getCurrentTitle()
	
	if titleObject.nsText == "Шаблон" then
		return "(This template should be used on pages in the Category: namespace.)"
	elseif titleObject.nsText ~= "Категорија" then
		error("This template/module can only be used on pages in the Category: namespace.")
	end

	local first_error_cattext

	-- Go through each handler in turn. If a handler doesn't recognize the format of the
	-- category, it will return nil, and we will consider the next handler. Otherwise,
	-- it returns a template name and arguments to call it with, but even then, that template
	-- might return an error, and we need to consider the next handler. This happens,
	-- for example, with the category "CAT:Mato Grosso, Brazil", where "Mato" is the name of
	-- a language, so the handler for {{poscatboiler}} fires and tries to find a label
	-- "Grosso, Brazil". This throws an error, and previously, this blocked fruther handler
	-- consideration, but now we check for the error and continue checking handlers;
	-- eventually, {{topic cat}} will fire and correctly handle the category.
	--
	-- FIXME: Will the topic_cat handlers correctly handle "letter names" categories?
	for _, handler in ipairs(args.nopos and { topic_cat_with_lang, topic_cat_without_lang } or handlers) do
		local templateObject = handler(titleObject)
		
		if templateObject then
			require("Модул:debug").track("auto cat/" .. templateObject.title)
			local cattext = frame:expandTemplate(templateObject)
			-- FIXME! We check for specific text found in most or all error messages generated
			-- by category tree templates (in particular, the second piece of text below should be
			-- in all error messages generated when a given module doesn't recognize a category name).
			-- If this text ever changes in the source modules (e.g. [[Module:category tree]],
			-- it needs to be changed here as well.)
			if cattext:find("Категорија:Categories with invalid label") or
				cattext:find("The automatically%-generated contents of this category has errors") then
				if not first_error_cattext then
					first_error_cattext = cattext
				end
			else
				return cattext
			end
		end
	end
	
	if first_error_cattext then
		return first_error_cattext
	end
	error("{{auto cat}} couldn't recognize format of category name")
end

-- test function for injecting title string
function export.test(title)
	if type(title) == "table" then
		title = title:getParent().args[1]
	end
	
	local titleObject = {}
	titleObject.text = title
	
	for _, handler in ipairs(handlers) do
		local t = handler(titleObject)
		
		if t then
			return t.title
		end
	end	
end

return export

-- For Vim, so we get 4-space tabs
-- vim: set ts=4 sw=4 noet: