Модул:Jpan-sortkey
Изглед
Документацију овог модула можете да направите на страници Модул:Jpan-sortkey/док
local export = {}
local concat = table.concat
local find = mw.ustring.find
local gsub = mw.ustring.gsub
local insert = table.insert
local match = mw.ustring.match
local sub = mw.ustring.sub
local toNFC = mw.ustring.toNFC
local range = mw.loadData("Модул:ja/data/range")
local kanji_pattern = range.kanji
local ideograph_pattern = range.ideograph
local kana_graph_pattern = range.kana_graph
local latin_pattern = range.latin
local get_by_code = require("Модул:languages").getByCode
local Hani_sort = require("Модул:Hani-sortkey").makeSortKey
local track = require("Модул:debug/track")
function export.makeSortKey(text, lang, sc)
-- Determine reading.
local seen_pages, langname = {}
while lang ~= "mul" and (not seen_pages[text]) and find(text, "[0-9" .. kanji_pattern .. ideograph_pattern .. kana_graph_pattern .. latin_pattern .. "]") do
repeat
langname = langname or
get_by_code(lang)
:getCanonicalName()
seen_pages[text] = true
local content = mw.title.new(toNFC(text)):getContent()
content = require("Модул:utilities").get_section(content, langname, 2)
if not content then
break
end
local findTemplates = require("Модул:template parser").findTemplates
local kanjitab, br
for template, args in findTemplates(content) do
local templates = {
[lang .. "-head"] = true,
[lang .. "-pos"] = true,
}
if templates[template] and args[2] then
text = args[2]:gsub("[ %-%.^%%]", "")
br = true
break
elseif (template == "head" or template == "head-lite") and args[1] == lang then
for i, arg in ipairs(args) do
if arg == "kana" then
local kana = args[i+1]
if kana then
text = kana
br = true
break
end
end
end
end
templates = {
[lang .. "-noun"] = true,
[lang .. "-verb"] = true,
[lang .. "-adj"] = true,
[lang .. "-phrase"] = true,
[lang .. "-verb form"] = true,
[lang .. "-verb-suru"] = true,
[lang .. "-see"] = true,
[lang .. "-see-kango"] = true,
[lang .. "-gv"] = true,
}
if templates[template] and args[1] then
text = args[1]:gsub("[ %-%.^%%]", "")
br = true
break
elseif template == lang .. "-kanjitab" then
kanjitab = kanjitab or args
end
end
if (not br) and kanjitab then
track{"Jpan-sortkey/kanjitab", "Jpan-sortkey/kanjitab/" .. lang}
if kanjitab.sortkey then
text = kanjitab.sortkey
break
end
-- extract kanji and non-kanji
local kanji = {}
local non_kanji = {}
local kanji_border = 1
gsub(text, "()([" .. kanji_pattern .. "々])()", function(p1, w1, p2)
insert(non_kanji, sub(text, kanji_border, p1 - 1))
kanji_border = p2
insert(kanji, w1)
end)
insert(non_kanji, sub(text, kanji_border))
-- 々
for i, v in ipairs(kanji) do
if v == "々" then kanji[i] = kanji[i - 1] end
end
-- process readings
local readings = {}
local readings_actual = {}
local reading_length_total = 0
for i in ipairs(kanjitab) do
local reading_kana, reading_length = match(kanjitab[i] or "", "^([^0-9]*)([0-9]*)$")
reading_kana = reading_kana ~= "" and reading_kana or nil
reading_length = reading_kana and tonumber(reading_length) or 1
insert(readings, {reading_kana, reading_length})
reading_length_total = reading_length_total + reading_length
for i = reading_length_total + 1, #kanji do
insert(readings, {nil, 1})
end
if reading_kana then
local actual_reading = kanjitab["k" .. i]
local okurigana = kanjitab["o" .. i]
readings_actual[i] = {(actual_reading or reading_kana) .. (okurigana or ""), reading_length}
else
readings_actual[i] = {nil, 1}
end
end
local sortkey = {non_kanji[1]}
local id = 1
for _, v in ipairs(readings_actual) do
id = id + v[2]
v[1] = v[1] ~= "-" and v[1]
insert(sortkey, (v[1] or "") .. (non_kanji[id] or ""))
end
sortkey = concat(sortkey)
if sortkey ~= "" then
text = sortkey
end
end
until true
end
-- Use hiragana sort.
text = require("Модул:Hira-sortkey").makeSortKey(text, lang, sc)
-- Run through Hani sort, to catch any stray kanji. This shouldn't happen but often does, and we still want to handle them sensibly in the time before the entry is fixed.
local ret = Hani_sort(text, lang, sc)
if not (lang == "mul" or ret == text) then
track{"Jpan-sortkey/fallback", "Jpan-sortkey/fallback/" .. lang}
end
return ret
end
return export