Module:OsmPageTitleParser
Jump to navigation
Jump to search
This documentation is transcluded from Module:OsmPageTitleParser/doc. (Edit | history)
Note to editors: Please don't categorize this template by editing it directly. Instead, place the category in its documentation page, in its "includeonly" section.
Note to editors: Please don't categorize this template by editing it directly. Instead, place the category in its documentation page, in its "includeonly" section.
This module parses title object into language, key value, and an optional tag value. This module is designed to be used by other modules. See also testcases code.
1 tests failed.
Text | Expected | Actual | |
---|---|---|---|
![]() |
Key:something | {key="something",language="en"} | {language="en",key="something"} |
![]() |
Key:some thing | {key="some_thing",language="en"} | {language="en",key="some_thing"} |
![]() |
Key:some_thing | {key="some_thing",language="en"} | {language="en",key="some_thing"} |
![]() |
Key:some:thing | {key="some:thing",language="en"} | {language="en",key="some:thing"} |
![]() |
KEY::some:thing | {key=":some:thing",language="en"} | {language="en",key=":some:thing"} |
![]() |
Key:some:thing:aa | {key="some:thing:aa",language="en"} | {language="en",key="some:thing:aa"} |
![]() |
Key:some:thing:aa: | {key="some:thing:aa:",language="en"} | {language="en",key="some:thing:aa:"} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
ko:key:some:thing | {key="some:thing",language="ko"} | {language="ko",key="some:thing"} |
![]() |
kO:key:some:thing:o | {key="some:thing:o",language="ko"} | {language="ko",key="some:thing:o"} |
![]() |
kO:key:so me:thing:o: | {key="so_me:thing:o:",language="ko"} | {language="ko",key="so_me:thing:o:"} |
![]() |
No:key:abc | {key="abc",language="no"} | {language="no",key="abc"} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
ru:Key:something | {key="something",language="ru"} | {language="ru",key="something"} |
![]() |
rU:KEY:some:thing:o | {key="some:thing:o",language="ru"} | {language="ru",key="some:thing:o"} |
![]() |
rU:keY:so me:thing:o: | {key="so_me:thing:o:",language="ru"} | {language="ru",key="so_me:thing:o:"} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
en | ||
![]() |
EN | ||
![]() |
fr | FR: | FR: |
![]() |
Fr | FR: | FR: |
![]() |
PT | Pt: | Pt: |
![]() |
|||
![]() |
(nil) |
Text | Expected | Actual | |
---|---|---|---|
![]() |
bh:Main | {language="bh",_parseFailed=true} | {language="bh",_parseFailed=true} |
![]() |
ca-valencia:Main | {language="ca-valencia",_parseFailed=true} | {language="ca-valencia",_parseFailed=true} |
![]() |
gcf:Main | {language="gcf",_parseFailed=true} | {language="gcf",_parseFailed=true} |
![]() |
gsw:Main | {language="gsw",_parseFailed=true} | {language="gsw",_parseFailed=true} |
![]() |
kbp:Main | {language="kbp",_parseFailed=true} | {language="kbp",_parseFailed=true} |
![]() |
kfa:Main | {language="kfa",_parseFailed=true} | {language="kfa",_parseFailed=true} |
![]() |
mrw:Main | {language="mrw",_parseFailed=true} | {language="mrw",_parseFailed=true} |
![]() |
rcf:Main | {language="rcf",_parseFailed=true} | {language="rcf",_parseFailed=true} |
![]() |
sr-cyrl:Main | {language="sr-cyrl",_parseFailed=true} | {language="sr-cyrl",_parseFailed=true} |
![]() |
sr-latn:Main | {language="sr-latn",_parseFailed=true} | {language="sr-latn",_parseFailed=true} |
![]() |
sxu:Main | {language="sxu",_parseFailed=true} | {language="sxu",_parseFailed=true} |
![]() |
swg:Main | {language="swg",_parseFailed=true} | {language="swg",_parseFailed=true} |
![]() |
sxu:Main | {language="sxu",_parseFailed=true} | {language="sxu",_parseFailed=true} |
![]() |
trp:Main | {language="trp",_parseFailed=true} | {language="trp",_parseFailed=true} |
![]() |
tzm:Main | {language="tzm",_parseFailed=true} | {language="tzm",_parseFailed=true} |
![]() |
zgh:Main | {language="zgh",_parseFailed=true} | {language="zgh",_parseFailed=true} |
![]() |
Nds:Main | {language="nds",_parseFailed=true} | {language="nds",_parseFailed=true} |
![]() |
Gcf:Test | {language="gcf",_parseFailed=true} | {language="gcf",_parseFailed=true} |
![]() |
Zh-hant:Relation | {language="zh-hant",_parseFailed=true} | {language="zh-hant",_parseFailed=true} |
![]() |
Pt-br:Main | {language="pt-br",_parseFailed=true} | {language="pt-br",_parseFailed=true} |
![]() |
No:Main | {language="no",_parseFailed=true} | {language="no",_parseFailed=true} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
POI:Scotiabank | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} |
![]() |
Switzerland:Berne | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
akey=avalue | {k="akey",v="avalue"} | {k="akey",v="avalue"} |
![]() |
akey | {k="akey"} | {k="akey"} |
![]() |
akey= | {k="akey",v=""} | {k="akey",v=""} |
![]() |
akey=ava=lue | {k="akey",v="ava=lue"} | {k="akey",v="ava=lue"} |
![]() |
akey==ava=lue= | {k="akey",v="=ava=lue="} | {k="akey",v="=ava=lue="} |
![]() |
{k=""} | {k=""} | |
![]() |
(nil) | {} | {} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
Tag:something=abc | {value="abc",key="something",language="en"} | {value="abc",key="something",language="en"} |
![]() |
Tag:some:thing=abc:xyz | {value="abc:xyz",key="some:thing",language="en"} | {value="abc:xyz",key="some:thing",language="en"} |
![]() |
TAG::some:thing=aa=bb=c | {value="aa=bb=c",key=":some:thing",language="en"} | {value="aa=bb=c",key=":some:thing",language="en"} |
![]() |
taG:some:thing:aa bb=yy | {value="yy",key="some:thing:aa_bb",language="en"} | {value="yy",key="some:thing:aa_bb",language="en"} |
![]() |
Tag:some:thing:aa:=a b | {value="a_b",key="some:thing:aa:",language="en"} | {value="a_b",key="some:thing:aa:",language="en"} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
ko:key:some:thing=abc x:yz | {value="abc_x:yz",key="some:thing",language="ko"} | {value="abc_x:yz",key="some:thing",language="ko"} |
![]() |
kO:key:some:thing:o=:a: | {value=":a:",key="some:thing:o",language="ko"} | {value=":a:",key="some:thing:o",language="ko"} |
![]() |
kO:key:so me:thing:o:=* | {value="*",key="so_me:thing:o:",language="ko"} | {value="*",key="so_me:thing:o:",language="ko"} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
ru:Key:something=abc | {value="abc",key="something",language="ru"} | {value="abc",key="something",language="ru"} |
![]() |
rU:KEY:some:thing:o=a=b:c | {value="a=b:c",key="some:thing:o",language="ru"} | {value="a=b:c",key="some:thing:o",language="ru"} |
![]() |
rU:keY:so me:thing:o:=== | {value="==",key="so_me:thing:o:",language="ru"} | {value="==",key="so_me:thing:o:",language="ru"} |
![]() |
RU:Moscow | {language="ru",_parseFailed=true} | {language="ru",_parseFailed=true} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
Talk:Main | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} |
![]() |
Talk:Pt:Creating an Account | {language="pt",_parseFailed=true} | {language="pt",_parseFailed=true} |
![]() |
Talk:Tag:phone=tag | {value="tag",key="phone",language="en",_parseFailed=true} | {value="tag",key="phone",language="en"} |
![]() |
JA talk:Bus routes in Kanagawa | {language="ja",_parseFailed=true} | {language="ja",_parseFailed=true} |
![]() |
Talk:POI:The Church of Jesus Christ of Latter-day Saints | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} |
Text | Expected | Actual | |
---|---|---|---|
![]() |
something | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} |
![]() |
FR:something | {language="fr",_parseFailed=true} | {language="fr",_parseFailed=true} |
![]() |
ko:something | {language="ko",_parseFailed=true} | {language="ko",_parseFailed=true} |
![]() |
some:thing | {language="en",_parseFailed=true} | {language="en",_parseFailed=true} |
![]() |
FR:some:thing | {language="fr",_parseFailed=true} | {language="fr",_parseFailed=true} |
![]() |
KO:some:thing | {language="ko",_parseFailed=true} | {language="ko",_parseFailed=true} |
![]() |
{_parseFailed=true} | {_parseFailed=true} |
1 local p = {}
2 local data = mw.loadData('Module:OSM Constants')
3
4 -- Module_talk:OsmPageTitleParser/testcases has many test cases showing how to use this module
5 -- Simple debugging:
6 -- =p.parseTitle(mw.title.new('Key:test')).key
7
8 -- A wrapper to return the parse results. See p.parseTitleToObj
9 function p.parseTitle(title)
10 local result = {}
11 p.parseTitleToObj(result, title)
12 return result
13 end
14
15 -- given a title string in a form of (lang-code:)?(tag|key):(tagkey)(=tagvalue)?
16 -- tries to parse it into language (object), key, and optional value strings
17 -- We have to do it manually because Lua regex support is not that great
18 -- The actual tag and key prefixes are ignored. Value will be nil if no equal sign.
19 -- The output result object will always get the language unless title is nil,
20 -- but no other values will be set unless successfuly parsed
21 -- Params: title object, result table to get the results
22 -- Returns true if parsed, false otherwise
23 function p.parseTitleToObj(result, title)
24 if not title then return false end
25
26 local language, prefix, tagkey, tagvalue
27
28 -- if this is one of the known language namespaces, do not allow more language codes
29 local ns = title.namespace / 2 * 2
30 ns = ns - ns % 2 -- treat talk pages as their corresponding main pages
31 local langCode = data.nsToLangCodeMap[ns]
32 if langCode then
33 language = mw.getLanguage(langCode)
34 end
35
36 local canonicalTitle = mw.ustring.gsub(title.text, ' ', '_')
37 local keyvalue
38 for _, val in ipairs(mw.text.split(canonicalTitle, ':', true)) do
39 if not prefix then
40 -- this could be the language code or the prefix (tag or key)
41 local lval = string.lower(val)
42 if lval == 'tag' or lval == 'key' then
43 prefix = lval
44 elseif not language and (data.customLangCodes[lval] or mw.language.isSupportedLanguage(lval)) then
45 -- mw.getLanguage will creat an object even if the language is not supported
46 language = mw.getLanguage(lval)
47 else
48 -- unrecognized, there was no tag or key as first or second part
49 break
50 end
51 else
52 -- combine the values back into a single string after we found the prefix
53 if keyvalue then
54 keyvalue = keyvalue .. ':' .. val
55 else
56 keyvalue = val
57 end
58 end
59 end
60
61 if keyvalue then
62 -- now split the keyvalue into key and (optional) value
63 tagkey, tagvalue = p.splitKeyValue(keyvalue)
64 if not tagkey and not tagvalue then
65 keyvalue = nil
66 end
67 end
68
69 if not language then
70 result.language = mw.language.getContentLanguage()
71 else
72 result.language = language
73 end
74
75 if keyvalue then
76 result.key = tagkey
77 if tagvalue then result.value = tagvalue end
78 return true
79 else
80 return false
81 end
82 end
83
84 -- given a key=value string, split it into two parts and return both
85 -- if there is no equal sign, return key and nil value
86 function p.splitKeyValue(keyvalue)
87 if not keyvalue then return end
88 local tagkey, tagvalue
89 local eqlSignPos = mw.ustring.find(keyvalue, '=', 1, true)
90 if not eqlSignPos then
91 tagkey = keyvalue
92 else
93 local keyvalLen = mw.ustring.len(keyvalue)
94 if eqlSignPos > 1 then
95 tagkey = mw.ustring.sub(keyvalue, 1, eqlSignPos - 1)
96 tagvalue = mw.ustring.sub(keyvalue, eqlSignPos + 1)
97 end
98 end
99 return tagkey, tagvalue
100 end
101
102 -- Given a language code, returns proper title prefix:
103 -- empty string for English, all caps for the namespaces, first letter cap for rest
104 function p.langPrefix(langCode)
105 langCode = langCode and mw.ustring.lower(langCode) or ''
106 if not langCode or langCode == '' or langCode == 'en' then
107 return ''
108 end
109 for _,v in pairs(data.nsToLangCodeMap) do
110 if v == langCode then
111 return string.upper(langCode) .. ':'
112 end
113 end
114 return mw.getContentLanguage():ucfirst(langCode) .. ':'
115 end
116
117 function p.languageCodeInTitle(frame)
118 local title = mw.title.new(frame.args[1])
119 local language = p.parseTitle(title).language
120 return language.code
121 end
122
123 function p.keyInTitle(frame)
124 local title = mw.title.new(frame.args[1])
125 return p.parseTitle(title).key
126 end
127
128 function p.valueInTitle(frame)
129 local title = mw.title.new(frame.args[1])
130 return p.parseTitle(title).value
131 end
132
133 function p.dataItemLabelFromTitle(frame)
134 local title = mw.title.new(frame.args[1])
135 local parsedTitle = p.parseTitle(title)
136 if parsedTitle.value then
137 return parsedTitle.key .. "=" .. parsedTitle.value
138 end
139 return parsedTitle.key
140 end
141
142 return p