विभाग:Ancient Greek/typing

local p = {}

local sparse_concat = require("Module:table").sparseConcat
local ustring = mw.ustring
local U = ustring.char
local get_codepoint = ustring.codepoint
local ufind = ustring.find
local ugsub = ustring.gsub
local decompose = ustring.toNFD
local str_gsub = string.gsub

local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" -- roughly equivalent to "." in Ustring patterns
local one_UTF8_char_or_none = "[%z\1-\127\194-\244]?[\128-\191]*" -- roughly equivalent to ".?" in Ustring patterns

local subscript = U(0x345) -- iota subscript (ypogegrammeni)
local macron = U(0x304) -- macron
local spacing_macron = U(0xAF)
local modifier_macron = U(0x2C9) -- modifier letter macron
local breve = U(0x306) -- breve
local spacing_breve = "˘" -- spacing breve
local diaeresis = U(0x308) -- diaeresis
local rough = U(0x314) -- rough breathing (reversed comma)
local smooth = U(0x313) -- smooth breathing (comma)
local acute = U(0x301) -- acute
local grave = U(0x300) -- grave
local circumflex = U(0x342) -- Greek circumflex (perispomeni)
local question_mark = U(0x37E) -- Greek question mark
local spacing_rough = "῾" -- spacing rough breathing
local spacing_smooth = "᾿" -- spacing smooth breathing

local combining_diacritic = table.concat{
	"[",
	macron, breve,
	rough, smooth, diaeresis,
	acute, grave, circumflex,
	subscript,
	"]",
}

-- The numbers are used to sort series of diacritics.
local diacritic_position = {
	[macron] = 1,
	[breve] = 2,
	[rough] = 3,
	[smooth] = 3,
	[diaeresis] = 3,
	[acute] = 4,
	[grave] = 4,
	[circumflex] = 4,
	[subscript] = 5,
}

-- Perform a function on each Unicode character in a string.
local function for_each(str, func)
	for char in string.gmatch(str, UTF8_char) do
		func(char)
	end
end

--[=[	This function arranges diacritics in the following order:
			1. macron or breve
			2. breathings or diaeresis
			3. acute, circumflex, or grave
			4. iota subscript
		Used by [[Module:typing-aids]].
		
		Returns an error if a sequence of diacritics contains more than one
		of each category.
]=]
local function get_relative_position(diacritic1, diacritic2)
	return diacritic_position[diacritic1] < diacritic_position[diacritic2]
end

local function chars_to_table(chars)
	local t = {}
	local i = 0
	for char in string.gmatch(chars, "[%z\1-\127\194-\244][\128-\191]*") do
		i = i + 1
		t[i] = char
	end
	return t
end

local function reorder_diacritic_sequence(diacritics)
	diacritics = chars_to_table(diacritics)
	table.sort(diacritics, get_relative_position)
	return table.concat(diacritics)
end

function p.reorder_diacritics(text)
	return (ugsub(decompose(text),
		combining_diacritic .. combining_diacritic .. "+",
		reorder_diacritic_sequence))
end

local multiple = {
	["_i"] = subscript,
}

local single = {
	["a"] = "α", ["A"] = "Α",
	["b"] = "β", ["B"] = "Β",
	["c"] = "ξ", ["C"] = "Ξ",
	["d"] = "δ", ["D"] = "Δ",
	["e"] = "ε", ["E"] = "Ε",
	["f"] = "φ", ["F"] = "Φ",
	["g"] = "γ", ["G"] = "Γ",
	["h"] = "η", ["H"] = "Η",
	["i"] = "ι", ["I"] = "Ι",
	["k"] = "κ", ["K"] = "Κ",
	["l"] = "λ", ["L"] = "Λ",
	["m"] = "μ", ["M"] = "Μ",
	["n"] = "ν", ["N"] = "Ν",
	["o"] = "ο", ["O"] = "Ο",
	["p"] = "π", ["P"] = "Π",
	["q"] = "θ", ["Q"] = "Θ",
	["r"] = "ρ", ["R"] = "Ρ",
	["s"] = "σ", ["S"] = "Σ",
	["t"] = "τ", ["T"] = "Τ",
	["u"] = "υ", ["U"] = "Υ",
	["v"] = "ϝ", ["V"] = "Ϝ",
	["w"] = "ω", ["W"] = "Ω",
	["x"] = "χ", ["X"] = "Χ",
	["y"] = "ψ", ["Y"] = "Ψ",
	["z"] = "ζ", ["Z"] = "Ζ",
	
	-- vowel length
	["_"] = macron, [spacing_macron] = macron, [modifier_macron] = macron,
	["^"] = breve, [spacing_breve] = breve,
	
	-- diaeresis and breathings
	["+"] = diaeresis, ["("] = rough, [")"] = smooth,
	
	-- accents
	["/"] = acute, ["\\"] = grave,
	["="] = circumflex, ["{{=}}"] = circumflex, ["~"] = circumflex,
	
	-- punctuation
	["'"] = "’",
	["?"] = question_mark,
	[";"] = "·",
	["*"] = "", -- place after s to prevent it from turning into final sigma
	
	-- pipe
	["!"] = "|",
}

local function convert_s_to_sigma(text)
	text = str_gsub(text,
		"s(" .. one_UTF8_char_or_none .. ")",
		function (following)
			return ((following == ""
				or following ~= "*" and following ~= "-" and ufind(following, "[%s%p]"))
				and  "ς"
				or "σ") .. following
		end)
	return text
end

local function combining_to_spacing(text)
	for _, accents in ipairs{ { rough, spacing_rough }, { smooth, spacing_smooth } } do
		local combining, spacing = unpack(accents)
		text = str_gsub(text,
			"(" .. one_UTF8_char_or_none .. ")" .. combining,
			function (preceding)
				if preceding == "" then
					return spacing
				else
					return preceding .. combining
				end
			end)
	end
	
	return text
end

function p.to_Greek(text)
	if type(text) ~= "string" then
		error("first argument to to_greek should be string, not " .. type(text))
	end
	
	text = convert_s_to_sigma(text)
	for k, v in pairs(multiple) do
		text = str_gsub(text, k, v)
	end
	text = str_gsub(text, UTF8_char, single)
	text = combining_to_spacing(text)
	return p.reorder_diacritics(text)
end

function p.to_Greek_t(frame)
	local args = {}
	for k, v in pairs(frame:getParent().args) do
		if k == 1 then
			v = mw.text.trim(v)
			if v == "" then
				v = nil
			end
			args[k] = v
		end
	end
	
	if not args[1] then
		if mw.title.getCurrentTitle().nsText == "Template" then
			args[1] = "le/cis"
		else
			error("Parameter 1 is required.")
		end
	end
	
	return p.to_Greek(args[1])
end

local function process(char)
	if char == "" then
		return char
	end
	local entity = ("&#x%X;"):format(get_codepoint(char))
	if diacritic_position[char] then
		return "◌" .. entity
	else
		return entity
	end
end

function p.show_shortcuts(frame)
	local output = { '{| class="wikitable"' }
	
	local function comp(item1, item2)
		 -- non-letters after letters
		if item1:find("^%a$") ~= item2:find("^%a$") then
			return item1:find("^%a$")
		end
		
		local lower1, lower2 = item1:lower(), item2:lower()
		-- capitals before lowercase
		if lower1 == lower2 then
			return item1 < item2
		-- otherwise case-insensitive sorting
		else
			return lower1 < lower2
		end
	end
	
	local i = 1
	for k, v in require("Module:Table").sortedPairs(single, comp) do
		i = i + 1
		output[i] = '| <code>' .. k .. '</code> || <span lang="grc">' .. process(v) .. '</span>'
		if i % 3 == 0 then -- 3 because each row consists of row syntax |- and two pairs of cells
			i = i + 1
			output[i] = '|-'
		end
	end
	
	table.insert(output, '|}')
	
	return table.concat(output, '\n')	
end

return p