Module:OsmPageTitleParser

From OpenStreetMap Wiki
Jump to navigation Jump to search
[Edit] [Purge] Documentation

This module parses title object into language, key value, and an optional tag value. This module is designed to be used by other modules. See also testcases code.

1 tests failed.

test_keys_en
Text Expected Actual
☑Y Key:something {key="something",language="en"} {language="en",key="something"}
☑Y Key:some thing {key="some_thing",language="en"} {language="en",key="some_thing"}
☑Y Key:some_thing {key="some_thing",language="en"} {language="en",key="some_thing"}
☑Y Key:some:thing {key="some:thing",language="en"} {language="en",key="some:thing"}
☑Y KEY::some:thing {key=":some:thing",language="en"} {language="en",key=":some:thing"}
☑Y Key:some:thing:aa {key="some:thing:aa",language="en"} {language="en",key="some:thing:aa"}
☑Y Key:some:thing:aa: {key="some:thing:aa:",language="en"} {language="en",key="some:thing:aa:"}
test_keys_international
Text Expected Actual
☑Y ko:key:some:thing {key="some:thing",language="ko"} {language="ko",key="some:thing"}
☑Y kO:key:some:thing:o {key="some:thing:o",language="ko"} {language="ko",key="some:thing:o"}
☑Y kO:key:so me:thing:o: {key="so_me:thing:o:",language="ko"} {language="ko",key="so_me:thing:o:"}
☑Y No:key:abc {key="abc",language="no"} {language="no",key="abc"}
test_keys_international_known_NS
Text Expected Actual
☑Y ru:Key:something {key="something",language="ru"} {language="ru",key="something"}
☑Y rU:KEY:some:thing:o {key="some:thing:o",language="ru"} {language="ru",key="some:thing:o"}
☑Y rU:keY:so me:thing:o: {key="so_me:thing:o:",language="ru"} {language="ru",key="so_me:thing:o:"}
test_langPrefix
Text Expected Actual
☑Y en
☑Y EN
☑Y fr FR: FR:
☑Y Fr FR: FR:
☑Y PT Pt: Pt:
☑Y
☑Y (nil)
test_language_pseudonamespaces
Text Expected Actual
☑Y bh:Main {language="bh",_parseFailed=true} {language="bh",_parseFailed=true}
☑Y ca-valencia:Main {language="ca-valencia",_parseFailed=true} {language="ca-valencia",_parseFailed=true}
☑Y gcf:Main {language="gcf",_parseFailed=true} {language="gcf",_parseFailed=true}
☑Y gsw:Main {language="gsw",_parseFailed=true} {language="gsw",_parseFailed=true}
☑Y kbp:Main {language="kbp",_parseFailed=true} {language="kbp",_parseFailed=true}
☑Y kfa:Main {language="kfa",_parseFailed=true} {language="kfa",_parseFailed=true}
☑Y mrw:Main {language="mrw",_parseFailed=true} {language="mrw",_parseFailed=true}
☑Y rcf:Main {language="rcf",_parseFailed=true} {language="rcf",_parseFailed=true}
☑Y sr-cyrl:Main {language="sr-cyrl",_parseFailed=true} {language="sr-cyrl",_parseFailed=true}
☑Y sr-latn:Main {language="sr-latn",_parseFailed=true} {language="sr-latn",_parseFailed=true}
☑Y sxu:Main {language="sxu",_parseFailed=true} {language="sxu",_parseFailed=true}
☑Y swg:Main {language="swg",_parseFailed=true} {language="swg",_parseFailed=true}
☑Y sxu:Main {language="sxu",_parseFailed=true} {language="sxu",_parseFailed=true}
☑Y trp:Main {language="trp",_parseFailed=true} {language="trp",_parseFailed=true}
☑Y tzm:Main {language="tzm",_parseFailed=true} {language="tzm",_parseFailed=true}
☑Y zgh:Main {language="zgh",_parseFailed=true} {language="zgh",_parseFailed=true}
☑Y Nds:Main {language="nds",_parseFailed=true} {language="nds",_parseFailed=true}
☑Y Gcf:Test {language="gcf",_parseFailed=true} {language="gcf",_parseFailed=true}
☑Y Zh-hant:Relation {language="zh-hant",_parseFailed=true} {language="zh-hant",_parseFailed=true}
☑Y Pt-br:Main {language="pt-br",_parseFailed=true} {language="pt-br",_parseFailed=true}
☑Y No:Main {language="no",_parseFailed=true} {language="no",_parseFailed=true}
test_non_language_pseudonamespaces
Text Expected Actual
☑Y POI:Scotiabank {language="en",_parseFailed=true} {language="en",_parseFailed=true}
☑Y Switzerland:Berne {language="en",_parseFailed=true} {language="en",_parseFailed=true}
test_splitKeyValue
Text Expected Actual
☑Y akey=avalue {k="akey",v="avalue"} {k="akey",v="avalue"}
☑Y akey {k="akey"} {k="akey"}
☑Y akey= {k="akey",v=""} {k="akey",v=""}
☑Y akey=ava=lue {k="akey",v="ava=lue"} {k="akey",v="ava=lue"}
☑Y akey==ava=lue= {k="akey",v="=ava=lue="} {k="akey",v="=ava=lue="}
☑Y {k=""} {k=""}
☑Y (nil) {} {}
test_tags_en
Text Expected Actual
☑Y Tag:something=abc {value="abc",key="something",language="en"} {value="abc",key="something",language="en"}
☑Y Tag:some:thing=abc:xyz {value="abc:xyz",key="some:thing",language="en"} {value="abc:xyz",key="some:thing",language="en"}
☑Y TAG::some:thing=aa=bb=c {value="aa=bb=c",key=":some:thing",language="en"} {value="aa=bb=c",key=":some:thing",language="en"}
☑Y taG:some:thing:aa bb=yy {value="yy",key="some:thing:aa_bb",language="en"} {value="yy",key="some:thing:aa_bb",language="en"}
☑Y Tag:some:thing:aa:=a b {value="a_b",key="some:thing:aa:",language="en"} {value="a_b",key="some:thing:aa:",language="en"}
test_tags_international
Text Expected Actual
☑Y ko:key:some:thing=abc x:yz {value="abc_x:yz",key="some:thing",language="ko"} {value="abc_x:yz",key="some:thing",language="ko"}
☑Y kO:key:some:thing:o=:a: {value=":a:",key="some:thing:o",language="ko"} {value=":a:",key="some:thing:o",language="ko"}
☑Y kO:key:so me:thing:o:=* {value="*",key="so_me:thing:o:",language="ko"} {value="*",key="so_me:thing:o:",language="ko"}
test_tags_international_known_NS
Text Expected Actual
☑Y ru:Key:something=abc {value="abc",key="something",language="ru"} {value="abc",key="something",language="ru"}
☑Y rU:KEY:some:thing:o=a=b:c {value="a=b:c",key="some:thing:o",language="ru"} {value="a=b:c",key="some:thing:o",language="ru"}
☑Y rU:keY:so me:thing:o:=== {value="==",key="so_me:thing:o:",language="ru"} {value="==",key="so_me:thing:o:",language="ru"}
☑Y RU:Moscow {language="ru",_parseFailed=true} {language="ru",_parseFailed=true}
test_talkpages
Text Expected Actual
☑Y Talk:Main {language="en",_parseFailed=true} {language="en",_parseFailed=true}
☑Y Talk:Pt:Creating an Account {language="pt",_parseFailed=true} {language="pt",_parseFailed=true}
☒N Talk:Tag:phone=tag {value="tag",key="phone",language="en",_parseFailed=true} {value="tag",key="phone",language="en"}
☑Y JA talk:Bus routes in Kanagawa {language="ja",_parseFailed=true} {language="ja",_parseFailed=true}
☑Y Talk:POI:The Church of Jesus Christ of Latter-day Saints {language="en",_parseFailed=true} {language="en",_parseFailed=true}
test_unparsable_titles
Text Expected Actual
☑Y something {language="en",_parseFailed=true} {language="en",_parseFailed=true}
☑Y FR:something {language="fr",_parseFailed=true} {language="fr",_parseFailed=true}
☑Y ko:something {language="ko",_parseFailed=true} {language="ko",_parseFailed=true}
☑Y some:thing {language="en",_parseFailed=true} {language="en",_parseFailed=true}
☑Y FR:some:thing {language="fr",_parseFailed=true} {language="fr",_parseFailed=true}
☑Y KO:some:thing {language="ko",_parseFailed=true} {language="ko",_parseFailed=true}
☑Y {_parseFailed=true} {_parseFailed=true}

  1 local p = {}
  2 local data = mw.loadData('Module:OSM Constants')
  3 
  4 -- Module_talk:OsmPageTitleParser/testcases  has many test cases showing how to use this module
  5 -- Simple debugging:
  6 --  =p.parseTitle(mw.title.new('Key:test')).key
  7 
  8 -- A wrapper to return the parse results. See p.parseTitleToObj
  9 function p.parseTitle(title)
 10   local result = {}
 11   p.parseTitleToObj(result, title)
 12   return result
 13 end
 14 
 15 -- given a title string in a form of (lang-code:)?(tag|key):(tagkey)(=tagvalue)?
 16 -- tries to parse it into language (object), key, and optional value strings
 17 -- We have to do it manually because Lua regex support is not that great
 18 -- The actual tag and key prefixes are ignored. Value will be nil if no equal sign.
 19 -- The output result object will always get the language unless title is nil,
 20 -- but no other values will be set unless successfuly parsed
 21 -- Params:  title object,  result table to get the results
 22 -- Returns true if parsed, false otherwise
 23 function p.parseTitleToObj(result, title)
 24   if not title then return false end
 25 
 26   local language, prefix, tagkey, tagvalue
 27 
 28   -- if this is one of the known language namespaces, do not allow more language codes
 29   local ns = title.namespace / 2 * 2
 30   ns = ns - ns % 2 -- treat talk pages as their corresponding main pages
 31   local langCode = data.nsToLangCodeMap[ns]
 32   if langCode then
 33     language = mw.getLanguage(langCode)
 34   end
 35 
 36   local canonicalTitle = mw.ustring.gsub(title.text, ' ', '_')
 37   local keyvalue
 38   for _, val in ipairs(mw.text.split(canonicalTitle, ':', true)) do
 39     if not prefix then
 40       -- this could be the language code or the prefix (tag or key)
 41       local lval = string.lower(val)
 42       if lval == 'tag' or lval == 'key' then
 43         prefix = lval
 44       elseif not language and (data.customLangCodes[lval] or mw.language.isSupportedLanguage(lval)) then
 45       	-- mw.getLanguage will creat an object even if the language is not supported
 46         language = mw.getLanguage(lval)
 47       else
 48         -- unrecognized, there was no tag or key as first or second part
 49         break
 50       end
 51     else
 52       -- combine the values back into a single string after we found the prefix
 53       if keyvalue then
 54         keyvalue = keyvalue .. ':' .. val
 55       else
 56         keyvalue = val
 57       end
 58     end
 59   end
 60 
 61   if keyvalue then
 62     -- now split the keyvalue into key and (optional) value
 63     tagkey, tagvalue = p.splitKeyValue(keyvalue)
 64     if not tagkey and not tagvalue then
 65       keyvalue = nil
 66     end
 67   end
 68 
 69   if not language then
 70     result.language = mw.language.getContentLanguage()
 71   else
 72     result.language = language
 73   end
 74 
 75   if keyvalue then
 76     result.key = tagkey
 77     if tagvalue then result.value = tagvalue end
 78     return true
 79   else
 80     return false
 81   end
 82 end
 83 
 84 -- given a key=value string, split it into two parts and return both
 85 -- if there is no equal sign, return key and nil value
 86 function p.splitKeyValue(keyvalue)
 87 	if not keyvalue then return end
 88 	local tagkey, tagvalue
 89     local eqlSignPos = mw.ustring.find(keyvalue, '=', 1, true)
 90     if not eqlSignPos then
 91       tagkey = keyvalue
 92     else
 93       local keyvalLen = mw.ustring.len(keyvalue)
 94       if eqlSignPos > 1 then
 95         tagkey = mw.ustring.sub(keyvalue, 1, eqlSignPos - 1)
 96         tagvalue = mw.ustring.sub(keyvalue, eqlSignPos + 1)
 97       end
 98     end
 99     return tagkey, tagvalue
100 end
101 
102 -- Given a language code, returns proper title prefix:
103 -- empty string for English, all caps for the namespaces, first letter cap for rest
104 function p.langPrefix(langCode)
105 	langCode = langCode and mw.ustring.lower(langCode) or ''
106 	if not langCode or langCode == '' or langCode == 'en' then
107 		return ''
108 	end
109 	for _,v in pairs(data.nsToLangCodeMap) do
110 	  if v == langCode then
111 	    return string.upper(langCode) .. ':'
112 	  end
113 	end
114 	return mw.getContentLanguage():ucfirst(langCode) .. ':'
115 end
116 
117 function p.languageCodeInTitle(frame)
118 	local title = mw.title.new(frame.args[1])
119 	local language = p.parseTitle(title).language
120 	return language.code
121 end
122 
123 function p.keyInTitle(frame)
124 	local title = mw.title.new(frame.args[1])
125 	return p.parseTitle(title).key
126 end
127 
128 function p.valueInTitle(frame)
129 	local title = mw.title.new(frame.args[1])
130 	return p.parseTitle(title).value
131 end
132 
133 function p.dataItemLabelFromTitle(frame)
134 	local title = mw.title.new(frame.args[1])
135 	local parsedTitle = p.parseTitle(title)
136 	if parsedTitle.value then
137 		return parsedTitle.key .. "=" .. parsedTitle.value
138 	end
139 	return parsedTitle.key
140 end
141 
142 return p