« Module:Linguistique » : différence entre les versions

Dernière version du 22 février 2026 à 00:16

La documentation pour ce module peut être créée à Module:Linguistique/doc
-- Ne fonctionne qu'en français. Si besoin est, on peut s'inspirer de [[wikidata:Module:Linguistic]] pour ajouter d'autres langues.

local p = {}
local lang = 'fr'
local langobj = mw.language.new(lang)

local vowels = 'aeiouyąăẵằẳặȃắâẫấầẩậãäǟāáàȁǎảẚåǻḁạǡæǣǽĕȇêễếềểệḙẽḛëēḕéḗèȅěẻẹęȩḝǝĭȋîĩḭïḯīíìȉǐỉịįıŏȏôỗốồổộõṏṍöōṑóṓòȍǒỏọǫǭơỡớờởợøǿŭȗûṷũṻṹṵüǖǘǜǚṳūúùȕǔủůụųưữứừửựŷỹÿȳýỳỷẙỵ'

-- i18n
local wordor = ' ou '
local wordand = ' et '
local comma = ', '
local fullstop = '. '
local wordsep = ' '

local function isin(str, pattern)
	if str and pattern and mw.ustring.find(str, pattern, 1, true ) then
		return true
	end
end


local function processgender(str)
	if (str == 'f') or (str == 'fem') or (str == 'feminine') then 
		return 'feminine'
	elseif (str == 'n') or (str == 'neutral') then 
		return 'neutral'
	else
		return 'masculine'
	end
end

local function processnumber(str)
	if (str == 'p') or (str == 'plural') then
		return 'plural'
	else 
		return 'singular'
	end
end

function p.vowelfirst (str)
	if str and #str > 0 then return isin(vowels, mw.ustring.lower(mw.ustring.sub(str, 1, 1))) end
end

function p.inparentheses(str, lang, space)
	if (not str) or str == '' then
		return str
	end
	str = '(' .. str .. ')'
	if not space then
		space = '&#32;'
	end
	return space .. str
end

function p.of(word, gender, number, determiner, raw) 
	if not word then
		word = ''
	end
	word = mw.text.trim( word )
	if not raw then  --texte non mis en forme pour gérer les élisions
		raw = p.texteLien(word) or word
	end
	gender = processgender(gender)
	number = processnumber(number)
	local vowel = p.vowelfirst(raw)
	local feminine = (gender== 'feminine')
	-- raw is the string without the Wikiformatting so that it correctly analyses the string that is [[:fr:Italie|Italie]] -> 'italie'
	-- any way to automate this ?

	
	if number == 'plural' then
		return 'des ' .. word
	elseif determiner and (determiner ~= '-') then-- de la, du // determiner ~= '-' veut dire renseigné comme vide
		if vowel then
			return 'de l’' .. word
		elseif feminine then
			return 'de la ' .. word
		else
			return 'du ' .. word
		end
	else
		if vowel then
			return 'd’' .. word
		else
			return 'de ' .. word
		end
	end
end

function p.noungroup(noun, adj)
	if not noun or noun == '' then 
		return nil -- not '' so that it is not counted as a string by mw.listToText
	end
	return noun .. wordsep(lang) .. adj -- lorsque c'est en français
end

function p.quickconj(args, conjtype)
	local separator, conjunction
	
	-- cas où separator ~= conj
	if type(conjtype) == 'function' then
		conjtype = conjtype()	
	end
	if (not conjtype) or conjtype == 'and' then
		separator, conjunction = comma, wordand
	elseif conjtype == 'or' then
		separator, conjunction = comma, wordor
	end
	if (separator and conjunction) then
		return  mw.text.listToText(args, separator, conjunction)
	end
	-- autres cas
	if conjtype == 'comma' then
		separator = comma
	elseif conjtype == 'new line' or conjtype == 'lowercase new line' then
		separator = '<br />'
		if conjtype == 'new line' then
			for i, j in pairs(args) do -- ajoute une majuscule
				args[i] = p.ucfirst(j)
			end
		end
	else
		separator = conjtype
	end
	return table.concat(args, separator)
end

function p.conj(args, conjtype)
	if (not args) then
		return nil
	end
	local keys = {}
	for k, _ in pairs(args) do
		keys[#keys + 1] = k
	end
	if #keys == 0 then
		return nil
	end
	table.sort(keys)
	local newargs = {}
	for i = 1, #keys do
		newargs[#newargs + 1] = args[keys[i]]
	end
	return p.quickconj(newargs, conjtype)
end
 
function p.conjfromWiki(frame)
	args = frame.args
	if not args or not args[1] then
		args = mw.getCurrentFrame():getParent().args
	end
	local conjtype = args.type
	newargs = {}  -- transform args metatable into a table so it can be concetenated
	for i, j in pairs(args) do
			if type(i) == 'number' then
				j = mw.text.trim(j)
				if j ~= '' then
					table.insert(newargs, j)
				end
			else 
				if i ~= 'type' and i ~= 'lang' then 
					return error('bad parameter in template:Conj:' .. i), '[[Category:Pages with incorrect template usage/Conj|A]]'
				end
			end
	end
	return p.conj(newargs, conjtype)
end

local function findcomplement(str, beginswith) -- retourne le nom principal et le complément du nom ou nil et nil si échec
	local particles = {" de la ", " de l'", " des ", " de l’", " de ", " d’", " d'", " du "," en "," à "," au "," aux "}
	if beginswith and (not mw.ustring.find(str, "^" .. beginswith)) then
		return nil
	end
	for i, pattern in pairs(particles) do
		local pos = mw.ustring.find(str, pattern)
		if pos then
			local main = mw.ustring.sub(str, 1, pos -1)
			local comp = mw.ustring.sub(str, pos + string.len(pattern))
			return main, comp
		end
	end
	return nil
end


function p.keepcomplement(str, beginswith) -- par exemple "gare de Lyon" -> "Lyon"
	local main, compl = findcomplement(str, beginswith)
	if compl then
		return compl
	end
	return str
end

function p.removecomplement(str, beginswith) -- par exemple "gare de Lyon" -> "gare"
	local main, compl = findcomplement(str, beginswith)
	if main then
		return main
	end
	return str
end

--[=[
	texteLien le lien intere initial '^[[lien|texte]]' de str et retourne : texte, lien
	Si le lien est '[[texte]]', retourne : texte, texte.
	Si str ne commence pas par un lien interwiki, retourne : nil
]=]
function p.texteLien( str )
	if type( str ) == 'string' then
		local lien, texte = str:match( '^%[%[ *([^%[%]|]*)|? *([^%[%]]*)%]%]' )
		if not lien then
			lien, texte = str:match( '^%b<>%[%[ *([^%[%]|]*)|? *([^%[%]]*)%]%]' )
		end
		if lien then
			local testlien = string.lower( lien )
			local fichier = string.match( testlien, '^fichier:' ) 
				or  string.match( testlien, '^image:' )
				or  string.match( testlien, '^file:' )
			if not fichier then
				texte = ( texte ~= '' and texte ) or lien
				return texte, lien
			end
		end
	end
	return nil
end

function p.ucfirst(str)
	if (type (str ) ~= 'string') or (string == "") then
		return str
	end
	local strTemp, tag, tagTemp = str, ''
		-- sépare les balises html initiales (span ou autres)
	while strTemp:match( '^%b<>' ) do
		tagTemp, strTemp = strTemp:match( '^(%b<>)(.*)$' )
		tag = tag .. tagTemp
	end
	local texte = p.texteLien( strTemp )
	if texte then
		-- ajoute les crochets de fin de lien pour être sur de ne remplacer que le texte du lien
		texte = texte .. ']]'
		-- échappe les caractère magique
		local pattern = texte:gsub( '([$%%()*+%-.?()^])', '%%%1' )
		-- ajoute la majuscule au texte du lien
		str = str:gsub( pattern, p.ucfirst( texte ), 1 )
	else
		str = tag .. langobj:ucfirst( strTemp )
	end
	return str
end

function p.ucfirstE(frame)
	return p.ucfirst(frame.args[1])
end

function p.lcfirst(str)
	if (type (str ) ~= 'string') or (string == "") then
		return str
	end
	local strTemp, tag, tagTemp = str, ''
		-- sépare les balises html initiales (span ou autres)
	while strTemp:match( '^%b<>' ) do
		tagTemp, strTemp = strTemp:match( '^(%b<>)(.*)$' )
		tag = tag .. tagTemp
	end
	local texte = p.texteLien( strTemp )
	if texte then
		-- ajoute les crochets de fin de lien pour être sur de ne remplacer que le texte du lien
		texte = texte .. ']]'
		-- échappe les caractère magique
		local pattern = texte:gsub( '([$%%()*+%-.?()^])', '%%%1' )
		-- ajoute la majuscule au texte du lien
		str = str:gsub( pattern, p.lcfirst( texte ), 1 )
	else
		str = tag .. langobj:lcfirst( strTemp )
	end
	return str
end

function p.lcfirstE(frame)
	return p.lcfirst(frame.args[1])
end

--[[
function p.toascii(str)
	local convtable = mw.loadData("Module:Linguistique/ASCII")
	for i, j in pairs(convtable) do -- manquent les majuscules
		str = mw.ustring.gsub(str, '[' .. i .. ']', j)
	end
	return str
end
]]-- 

function p.stripDiacritics(str)
  local tableNodiacr = {
    ["À"] = "A", ["Á"] = "A", ["Â"] = "A", ["Â"] = "A", ["Ã"] = "A", ["Ä"] = "A", ["Å"] = "A", ["Æ"] = "AE",
	["Ç"] = "C", ["Ð"] = "Dh", ["È"] = "E", ["É"] = "E", ["Ê"] = "E", ["Ë"] = "E",
    ["Ì"] = "I", ["Í"] = "I", ["Î"] = "I", ["Ï"] = "I", ["İ"] = "I", ["Ñ"] = "N",
    ["Ò"] = "O", ["Ó"] = "O", ["Ô"] = "O", ["Õ"] = "O", ["Ö"] = "O", ["Ø"] = "O", ["Þ"] = "Th",
    ["Ù"] = "U", ["Ú"] = "U", ["Û"] = "U", ["Ü"] = "U", ["Ý"] = "Y", ["Ÿ"] = "Y",
    ["à"] = "a", ["á"] = "a", ["â"] = "a", ["ã"] = "a", ["ä"] = "a", ["å"] = "a", ["æ"] = "ae",
    ["ç"] = "c", ["ð"] = "dh", ["è"] = "e", ["é"] = "e", ["ê"] = "e", ["ệ"] = "e", ["ë"] = "e",
    ["ì"] = "i", ["í"] = "i", ["î"] = "i", ["ï"] = "i", ["ı"] = "i", ["ñ"] = "n",
    ["ò"] = "o", ["ó"] = "o", ["ô"] = "o", ["õ"] = "o", ["ö"] = "o", ["ø"] = "o", ["ß"] = "s", ["þ"] = "th",
    ["ù"] = "u", ["ú"] = "u", ["û"] = "u", ["ü"] = "u", ["ủ"] = "u", ["ý"] = "y", ["ÿ"] = "y"
    }
	local strippedString = str: gsub("[%z\1-\127\194-\244][\128-\191]*", tableNodiacr)
	return strippedString
end

function p.stripDiacriticsE(frame)
	return p.stripDiacritics(frame.args[1])
end

--clé de tri automatique enlevant les articles Le, La, Les, L' et diacritiques.
--Valable pour la plupart des toponymes et titres d'oeuvres mais pas les noms de personne (ne sépare pas nom/prénom)
function p.makeSortkey(str) 
	local strlen = mw.ustring.len(str)
	for _, article in pairs({"Le ","La ","Les ","L'"}) do
		local artlen = mw.ustring.len(article)
		if strlen > artlen and mw.ustring.sub(str,1,artlen) == article then
			local shortarticle = article
			if mw.ustring.sub(article, artlen, artlen) == " " then
				shortarticle = mw.ustring.sub(article, 1, artlen - 1)
			end
			str = mw.ustring.sub(str, artlen + 1, strlen) .. ', ' .. shortarticle
		end
	end
	str = p.stripDiacritics(str)
	return str
end

function p.makeSortkeyE(frame)
	return p.makeSortkey(frame.args[1])
end

return p