Module:OsmPageTitleParser/sandbox
Jump to navigation
Jump to search
![]() | This is the module sandbox page for Module:OsmPageTitleParser (diff). See also the companion subpage for test cases (run). |
This module parses title object into language, key value, and an optional tag value. This module is designed to be used by other modules. See also testcases code.
2 tests failed.
Text | Expected | Actual | |
---|---|---|---|
![]() |
Key:something:* | {key="something:",language="en"} | {language="en",key="something:*"} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
Key:something | {key="something",language="en"} | {language="en",key="something"} |
![]() |
Key:some thing | {key="some_thing",language="en"} | {language="en",key="some_thing"} |
![]() |
Key:some_thing | {key="some_thing",language="en"} | {language="en",key="some_thing"} |
![]() |
Key:some:thing | {key="some:thing",language="en"} | {language="en",key="some:thing"} |
![]() |
KEY::some:thing | {key=":some:thing",language="en"} | {language="en",key=":some:thing"} |
![]() |
Key:some:thing:aa | {key="some:thing:aa",language="en"} | {language="en",key="some:thing:aa"} |
![]() |
Key:some:thing:aa: | {key="some:thing:aa:",language="en"} | {language="en",key="some:thing:aa:"} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
cnr:Key:name | {key="name",language="cnr"} | {language="cnr",key="name"} |
![]() |
cnr:Key:name:cnr | {key="name:cnr",language="cnr"} | {language="cnr",key="name:cnr"} |
![]() |
ko:key:some:thing | {key="some:thing",language="ko"} | {language="ko",key="some:thing"} |
![]() |
kO:key:some:thing:o | {key="some:thing:o",language="ko"} | {language="ko",key="some:thing:o"} |
![]() |
kO:key:so me:thing:o: | {key="so_me:thing:o:",language="ko"} | {language="ko",key="so_me:thing:o:"} |
![]() |
No:key:abc | {key="abc",language="no"} | {language="no",key="abc"} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
ru:Key:something | {key="something",language="ru"} | {language="ru",key="something"} |
![]() |
rU:KEY:some:thing:o | {key="some:thing:o",language="ru"} | {language="ru",key="some:thing:o"} |
![]() |
rU:keY:so me:thing:o: | {key="so_me:thing:o:",language="ru"} | {language="ru",key="so_me:thing:o:"} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
en | ||
![]() |
EN | ||
![]() |
fr | FR: | FR: |
![]() |
Fr | FR: | FR: |
![]() |
PT | Pt: | Pt: |
![]() |
|||
![]() |
(nil) |
Text | Expected | Actual | |
---|---|---|---|
![]() |
bh:Main | {language="bh",_parseFailed=true} | {language="bh",_parseFailed=true} |
![]() |
ca-valencia:Main | {language="ca-valencia",_parseFailed=true} | {language="ca-valencia",_parseFailed=true} |
![]() |
cnr:Main | {language="cnr",_parseFailed=true} | {language="cnr",_parseFailed=true} |
![]() |
gcf:Main | {language="gcf",_parseFailed=true} | {language="gcf",_parseFailed=true} |
![]() |
gsw:Main | {language="gsw",_parseFailed=true} | {language="gsw",_parseFailed=true} |
![]() |
kbp:Main | {language="kbp",_parseFailed=true} | {language="kbp",_parseFailed=true} |
![]() |
kfa:Main | {language="kfa",_parseFailed=true} | {language="kfa",_parseFailed=true} |
![]() |
mrw:Main | {language="mrw",_parseFailed=true} | {language="mrw",_parseFailed=true} |
![]() |
rcf:Main | {language="rcf",_parseFailed=true} | {language="rcf",_parseFailed=true} |
![]() |
sr-cyrl:Main | {language="sr-cyrl",_parseFailed=true} | {language="sr-cyrl",_parseFailed=true} |
![]() |
sr-latn:Main | {language="sr-latn",_parseFailed=true} | {language="sr-latn",_parseFailed=true} |
![]() |
sxu:Main | {language="sxu",_parseFailed=true} | {language="sxu",_parseFailed=true} |
![]() |
swg:Main | {language="swg",_parseFailed=true} | {language="swg",_parseFailed=true} |
![]() |
sxu:Main | {language="sxu",_parseFailed=true} | {language="sxu",_parseFailed=true} |
![]() |
trp:Main | {language="trp",_parseFailed=true} | {language="trp",_parseFailed=true} |
![]() |
tzm:Main | {language="tzm",_parseFailed=true} | {language="tzm",_parseFailed=true} |
![]() |
zgh:Main | {language="zgh",_parseFailed=true} | {language="zgh",_parseFailed=true} |
![]() |
Nds:Main | {language="nds",_parseFailed=true} | {language="nds",_parseFailed=true} |
![]() |
Gcf:Test | {language="gcf",_parseFailed=true} | {language="gcf",_parseFailed=true} |
![]() |
Zh-hant:Relation | {language="zh-hant",_parseFailed=true} | {language="zh-hant",_parseFailed=true} |
![]() |
Pt-br:Main | {language="pt-br",_parseFailed=true} | {language="pt-br",_parseFailed=true} |
![]() |
No:Main | {language="no",_parseFailed=true} | {language="no",_parseFailed=true} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
POI:Scotiabank | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} |
![]() |
Switzerland:Berne | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
akey=avalue | {k="akey",v="avalue"} | {k="akey",v="avalue"} |
![]() |
akey | {k="akey"} | {k="akey"} |
![]() |
akey= | {k="akey",v=""} | {k="akey",v=""} |
![]() |
akey=ava=lue | {k="akey",v="ava=lue"} | {k="akey",v="ava=lue"} |
![]() |
akey==ava=lue= | {k="akey",v="=ava=lue="} | {k="akey",v="=ava=lue="} |
![]() |
{k=""} | {k=""} | |
![]() |
(nil) | {} | {} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
Tag:something=abc | {value="abc",key="something",language="en"} | {value="abc",key="something",language="en"} |
![]() |
Tag:some:thing=abc:xyz | {value="abc:xyz",key="some:thing",language="en"} | {value="abc:xyz",key="some:thing",language="en"} |
![]() |
TAG::some:thing=aa=bb=c | {value="aa=bb=c",key=":some:thing",language="en"} | {value="aa=bb=c",key=":some:thing",language="en"} |
![]() |
taG:some:thing:aa bb=yy | {value="yy",key="some:thing:aa_bb",language="en"} | {value="yy",key="some:thing:aa_bb",language="en"} |
![]() |
Tag:some:thing:aa:=a b | {value="a_b",key="some:thing:aa:",language="en"} | {value="a_b",key="some:thing:aa:",language="en"} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
cnr:Tag:name:cnr=Studentska | {value="Studentska",key="name:cnr",language="cnr"} | {value="Studentska",key="name:cnr",language="cnr"} |
![]() |
ko:key:some:thing=abc x:yz | {value="abc_x:yz",key="some:thing",language="ko"} | {value="abc_x:yz",key="some:thing",language="ko"} |
![]() |
kO:key:some:thing:o=:a: | {value=":a:",key="some:thing:o",language="ko"} | {value=":a:",key="some:thing:o",language="ko"} |
![]() |
kO:key:so me:thing:o:=* | {value="*",key="so_me:thing:o:",language="ko"} | {value="*",key="so_me:thing:o:",language="ko"} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
ru:Key:something=abc | {value="abc",key="something",language="ru"} | {value="abc",key="something",language="ru"} |
![]() |
rU:KEY:some:thing:o=a=b:c | {value="a=b:c",key="some:thing:o",language="ru"} | {value="a=b:c",key="some:thing:o",language="ru"} |
![]() |
rU:keY:so me:thing:o:=== | {value="==",key="so_me:thing:o:",language="ru"} | {value="==",key="so_me:thing:o:",language="ru"} |
![]() |
RU:Moscow | {language="ru",_parseFailed=true} | {language="ru",_parseFailed=true} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
Talk:Main | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} |
![]() |
Talk:Pt:Creating an Account | {language="pt",_parseFailed=true} | {language="pt",_parseFailed=true} |
![]() |
Talk:Tag:phone=tag | {value="tag",key="phone",language="en",_parseFailed=true} | {value="tag",key="phone",language="en"} |
![]() |
JA talk:Bus routes in Kanagawa | {language="ja",_parseFailed=true} | {language="ja",_parseFailed=true} |
![]() |
Talk:POI:The Church of Jesus Christ of Latter-day Saints | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
something | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} |
![]() |
FR:something | {language="fr",_parseFailed=true} | {language="fr",_parseFailed=true} |
![]() |
ko:something | {language="ko",_parseFailed=true} | {language="ko",_parseFailed=true} |
![]() |
some:thing | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} |
![]() |
FR:some:thing | {language="fr",_parseFailed=true} | {language="fr",_parseFailed=true} |
![]() |
KO:some:thing | {language="ko",_parseFailed=true} | {language="ko",_parseFailed=true} |
![]() |
{_parseFailed=true} | {_parseFailed=true} |
local p = {}
local data = mw.loadData('Module:OSM Constants')
-- Module_talk:OsmPageTitleParser/testcases has many test cases showing how to use this module
-- Simple debugging:
-- =p.parseTitle(mw.title.new('Key:test')).key
-- A wrapper to return the parse results. See p.parseTitleToObj
function p.parseTitle(title)
local result = {}
p.parseTitleToObj(result, title)
return result
end
-- given a title string in a form of (lang-code:)?(tag|key):(tagkey)(=tagvalue)?
-- tries to parse it into language (object), key, and optional value strings
-- We have to do it manually because Lua regex support is not that great
-- The actual tag and key prefixes are ignored. Value will be nil if no equal sign.
-- The output result object will always get the language unless title is nil,
-- but no other values will be set unless successfuly parsed
-- Params: title object, result table to get the results
-- Returns true if parsed, false otherwise
function p.parseTitleToObj(result, title)
if not title then return false end
local language, prefix, tagkey, tagvalue
-- if this is one of the known language namespaces, do not allow more language codes
local ns = title.namespace / 2 * 2
ns = ns - ns % 2 -- treat talk pages as their corresponding main pages
local langCode = data.nsToLangCodeMap[ns]
if langCode then
language = mw.getLanguage(langCode)
end
local canonicalTitle = mw.ustring.gsub(title.text, ' ', '_')
local canonicalTitle = canonicalTitle:gsub(':%*',':') -- remove * in title for key-prefixes
local canonicalTitle = canonicalTitle:gsub('%*:',':') -- remove * in title for key-suffixes
local keyvalue
for _, val in ipairs(mw.text.split(canonicalTitle, ':', true)) do
if not prefix then
-- this could be the language code or the prefix (tag or key)
local lval = string.lower(val)
if lval == 'tag' or lval == 'key' then
prefix = lval
elseif not language and (data.customLangCodes[lval] or mw.language.isSupportedLanguage(lval)) then
-- mw.getLanguage will creat an object even if the language is not supported
language = mw.getLanguage(lval)
else
-- unrecognized, there was no tag or key as first or second part
break
end
else
-- combine the values back into a single string after we found the prefix
if keyvalue then
keyvalue = keyvalue .. ':' .. val
else
keyvalue = val
end
end
end
if keyvalue then
-- now split the keyvalue into key and (optional) value
tagkey, tagvalue = p.splitKeyValue(keyvalue)
if not tagkey and not tagvalue then
keyvalue = nil
end
end
if not language then
result.language = mw.language.getContentLanguage()
else
result.language = language
end
if keyvalue then
result.key = tagkey
if tagvalue then result.value = tagvalue end
return true
else
return false
end
end
-- given a key=value string, split it into two parts and return both
-- if there is no equal sign, return key and nil value
function p.splitKeyValue(keyvalue)
if not keyvalue then return end
local tagkey, tagvalue
local eqlSignPos = mw.ustring.find(keyvalue, '=', 1, true)
if not eqlSignPos then
tagkey = keyvalue
else
local keyvalLen = mw.ustring.len(keyvalue)
if eqlSignPos > 1 then
tagkey = mw.ustring.sub(keyvalue, 1, eqlSignPos - 1)
tagvalue = mw.ustring.sub(keyvalue, eqlSignPos + 1)
end
end
return tagkey, tagvalue
end
-- Given a language code, returns proper title prefix:
-- empty string for English, all caps for the namespaces, first letter cap for rest
function p.langPrefix(langCode)
langCode = langCode and mw.ustring.lower(langCode) or ''
if not langCode or langCode == '' or langCode == 'en' then
return ''
end
for _,v in pairs(data.nsToLangCodeMap) do
if v == langCode then
return string.upper(langCode) .. ':'
end
end
return mw.getContentLanguage():ucfirst(langCode) .. ':'
end
function p.languageCodeInTitle(frame)
local title = mw.title.new(frame.args[1])
local language = p.parseTitle(title).language
return language.code
end
function p.keyInTitle(frame)
local title = mw.title.new(frame.args[1])
return p.parseTitle(title).key
end
function p.valueInTitle(frame)
local title = mw.title.new(frame.args[1])
return p.parseTitle(title).value
end
function p.dataItemLabelFromTitle(frame)
local title = mw.title.new(frame.args[1])
local parsedTitle = p.parseTitle(title)
if parsedTitle.value then
return parsedTitle.key .. "=" .. parsedTitle.value
end
return parsedTitle.key
end
return p