Module:OsmPageTitleParser

From OpenStreetMap Wiki
Jump to navigation Jump to search
[Edit] [Purge] Documentation

This module parses title object into language, key value, and an optional tag value. This module is designed to be used by other modules. See also testcases code.

2 tests failed.

test_keyprefixes_en
Text Expected Actual
☒N Key:something:* {key="something:",language="en"} {language="en",key="something:*"}
test_keys_en
Text Expected Actual
☑Y Key:something {key="something",language="en"} {language="en",key="something"}
☑Y Key:some thing {key="some_thing",language="en"} {language="en",key="some_thing"}
☑Y Key:some_thing {key="some_thing",language="en"} {language="en",key="some_thing"}
☑Y Key:some:thing {key="some:thing",language="en"} {language="en",key="some:thing"}
☑Y KEY::some:thing {key=":some:thing",language="en"} {language="en",key=":some:thing"}
☑Y Key:some:thing:aa {key="some:thing:aa",language="en"} {language="en",key="some:thing:aa"}
☑Y Key:some:thing:aa: {key="some:thing:aa:",language="en"} {language="en",key="some:thing:aa:"}
test_keys_international
Text Expected Actual
☑Y ko:key:some:thing {key="some:thing",language="ko"} {language="ko",key="some:thing"}
☑Y kO:key:some:thing:o {key="some:thing:o",language="ko"} {language="ko",key="some:thing:o"}
☑Y kO:key:so me:thing:o: {key="so_me:thing:o:",language="ko"} {language="ko",key="so_me:thing:o:"}
☑Y No:key:abc {key="abc",language="no"} {language="no",key="abc"}
test_keys_international_known_NS
Text Expected Actual
☑Y ru:Key:something {key="something",language="ru"} {language="ru",key="something"}
☑Y rU:KEY:some:thing:o {key="some:thing:o",language="ru"} {language="ru",key="some:thing:o"}
☑Y rU:keY:so me:thing:o: {key="so_me:thing:o:",language="ru"} {language="ru",key="so_me:thing:o:"}
test_langPrefix
Text Expected Actual
☑Y en
☑Y EN
☑Y fr FR: FR:
☑Y Fr FR: FR:
☑Y PT Pt: Pt:
☑Y
☑Y (nil)
test_language_pseudonamespaces
Text Expected Actual
☑Y bh:Main {language="bh",_parseFailed=true} {language="bh",_parseFailed=true}
☑Y ca-valencia:Main {language="ca-valencia",_parseFailed=true} {language="ca-valencia",_parseFailed=true}
☑Y gcf:Main {language="gcf",_parseFailed=true} {language="gcf",_parseFailed=true}
☑Y gsw:Main {language="gsw",_parseFailed=true} {language="gsw",_parseFailed=true}
☑Y kbp:Main {language="kbp",_parseFailed=true} {language="kbp",_parseFailed=true}
☑Y kfa:Main {language="kfa",_parseFailed=true} {language="kfa",_parseFailed=true}
☑Y mrw:Main {language="mrw",_parseFailed=true} {language="mrw",_parseFailed=true}
☑Y rcf:Main {language="rcf",_parseFailed=true} {language="rcf",_parseFailed=true}
☑Y sr-cyrl:Main {language="sr-cyrl",_parseFailed=true} {language="sr-cyrl",_parseFailed=true}
☑Y sr-latn:Main {language="sr-latn",_parseFailed=true} {language="sr-latn",_parseFailed=true}
☑Y sxu:Main {language="sxu",_parseFailed=true} {language="sxu",_parseFailed=true}
☑Y swg:Main {language="swg",_parseFailed=true} {language="swg",_parseFailed=true}
☑Y sxu:Main {language="sxu",_parseFailed=true} {language="sxu",_parseFailed=true}
☑Y trp:Main {language="trp",_parseFailed=true} {language="trp",_parseFailed=true}
☑Y tzm:Main {language="tzm",_parseFailed=true} {language="tzm",_parseFailed=true}
☑Y zgh:Main {language="zgh",_parseFailed=true} {language="zgh",_parseFailed=true}
☑Y Nds:Main {language="nds",_parseFailed=true} {language="nds",_parseFailed=true}
☑Y Gcf:Test {language="gcf",_parseFailed=true} {language="gcf",_parseFailed=true}
☑Y Zh-hant:Relation {language="zh-hant",_parseFailed=true} {language="zh-hant",_parseFailed=true}
☑Y Pt-br:Main {language="pt-br",_parseFailed=true} {language="pt-br",_parseFailed=true}
☑Y No:Main {language="no",_parseFailed=true} {language="no",_parseFailed=true}
test_non_language_pseudonamespaces
Text Expected Actual
☑Y POI:Scotiabank {language="en",_parseFailed=true} {language="en",_parseFailed=true}
☑Y Switzerland:Berne {language="en",_parseFailed=true} {language="en",_parseFailed=true}
test_splitKeyValue
Text Expected Actual
☑Y akey=avalue {k="akey",v="avalue"} {k="akey",v="avalue"}
☑Y akey {k="akey"} {k="akey"}
☑Y akey= {k="akey",v=""} {k="akey",v=""}
☑Y akey=ava=lue {k="akey",v="ava=lue"} {k="akey",v="ava=lue"}
☑Y akey==ava=lue= {k="akey",v="=ava=lue="} {k="akey",v="=ava=lue="}
☑Y {k=""} {k=""}
☑Y (nil) {} {}
test_tags_en
Text Expected Actual
☑Y Tag:something=abc {value="abc",key="something",language="en"} {value="abc",key="something",language="en"}
☑Y Tag:some:thing=abc:xyz {value="abc:xyz",key="some:thing",language="en"} {value="abc:xyz",key="some:thing",language="en"}
☑Y TAG::some:thing=aa=bb=c {value="aa=bb=c",key=":some:thing",language="en"} {value="aa=bb=c",key=":some:thing",language="en"}
☑Y taG:some:thing:aa bb=yy {value="yy",key="some:thing:aa_bb",language="en"} {value="yy",key="some:thing:aa_bb",language="en"}
☑Y Tag:some:thing:aa:=a b {value="a_b",key="some:thing:aa:",language="en"} {value="a_b",key="some:thing:aa:",language="en"}
test_tags_international
Text Expected Actual
☑Y ko:key:some:thing=abc x:yz {value="abc_x:yz",key="some:thing",language="ko"} {value="abc_x:yz",key="some:thing",language="ko"}
☑Y kO:key:some:thing:o=:a: {value=":a:",key="some:thing:o",language="ko"} {value=":a:",key="some:thing:o",language="ko"}
☑Y kO:key:so me:thing:o:=* {value="*",key="so_me:thing:o:",language="ko"} {value="*",key="so_me:thing:o:",language="ko"}
test_tags_international_known_NS
Text Expected Actual
☑Y ru:Key:something=abc {value="abc",key="something",language="ru"} {value="abc",key="something",language="ru"}
☑Y rU:KEY:some:thing:o=a=b:c {value="a=b:c",key="some:thing:o",language="ru"} {value="a=b:c",key="some:thing:o",language="ru"}
☑Y rU:keY:so me:thing:o:=== {value="==",key="so_me:thing:o:",language="ru"} {value="==",key="so_me:thing:o:",language="ru"}
☑Y RU:Moscow {language="ru",_parseFailed=true} {language="ru",_parseFailed=true}
test_talkpages
Text Expected Actual
☑Y Talk:Main {language="en",_parseFailed=true} {language="en",_parseFailed=true}
☑Y Talk:Pt:Creating an Account {language="pt",_parseFailed=true} {language="pt",_parseFailed=true}
☒N Talk:Tag:phone=tag {value="tag",key="phone",language="en",_parseFailed=true} {value="tag",key="phone",language="en"}
☑Y JA talk:Bus routes in Kanagawa {language="ja",_parseFailed=true} {language="ja",_parseFailed=true}
☑Y Talk:POI:The Church of Jesus Christ of Latter-day Saints {language="en",_parseFailed=true} {language="en",_parseFailed=true}
test_unparsable_titles
Text Expected Actual
☑Y something {language="en",_parseFailed=true} {language="en",_parseFailed=true}
☑Y FR:something {language="fr",_parseFailed=true} {language="fr",_parseFailed=true}
☑Y ko:something {language="ko",_parseFailed=true} {language="ko",_parseFailed=true}
☑Y some:thing {language="en",_parseFailed=true} {language="en",_parseFailed=true}
☑Y FR:some:thing {language="fr",_parseFailed=true} {language="fr",_parseFailed=true}
☑Y KO:some:thing {language="ko",_parseFailed=true} {language="ko",_parseFailed=true}
☑Y {_parseFailed=true} {_parseFailed=true}

local p = {}
local data = mw.loadData('Module:OSM Constants')

-- Module_talk:OsmPageTitleParser/testcases  has many test cases showing how to use this module
-- Simple debugging:
--  =p.parseTitle(mw.title.new('Key:test')).key

-- A wrapper to return the parse results. See p.parseTitleToObj
function p.parseTitle(title)
  local result = {}
  p.parseTitleToObj(result, title)
  return result
end

-- given a title string in a form of (lang-code:)?(tag|key):(tagkey)(=tagvalue)?
-- tries to parse it into language (object), key, and optional value strings
-- We have to do it manually because Lua regex support is not that great
-- The actual tag and key prefixes are ignored. Value will be nil if no equal sign.
-- The output result object will always get the language unless title is nil,
-- but no other values will be set unless successfuly parsed
-- Params:  title object,  result table to get the results
-- Returns true if parsed, false otherwise
function p.parseTitleToObj(result, title)
  if not title then return false end

  local language, prefix, tagkey, tagvalue

  -- if this is one of the known language namespaces, do not allow more language codes
  local ns = title.namespace / 2 * 2
  ns = ns - ns % 2 -- treat talk pages as their corresponding main pages
  local langCode = data.nsToLangCodeMap[ns]
  if langCode then
    language = mw.getLanguage(langCode)
  end

  local canonicalTitle = mw.ustring.gsub(title.text, ' ', '_')
  local keyvalue
  for _, val in ipairs(mw.text.split(canonicalTitle, ':', true)) do
    if not prefix then
      -- this could be the language code or the prefix (tag or key)
      local lval = string.lower(val)
      if lval == 'tag' or lval == 'key' then
        prefix = lval
      elseif not language and (data.customLangCodes[lval] or mw.language.isSupportedLanguage(lval)) then
      	-- mw.getLanguage will creat an object even if the language is not supported
        language = mw.getLanguage(lval)
      else
        -- unrecognized, there was no tag or key as first or second part
        break
      end
    else
      -- combine the values back into a single string after we found the prefix
      if keyvalue then
        keyvalue = keyvalue .. ':' .. val
      else
        keyvalue = val
      end
    end
  end

  if keyvalue then
    -- now split the keyvalue into key and (optional) value
    tagkey, tagvalue = p.splitKeyValue(keyvalue)
    if not tagkey and not tagvalue then
      keyvalue = nil
    end
  end

  if not language then
    result.language = mw.language.getContentLanguage()
  else
    result.language = language
  end

  if keyvalue then
    result.key = tagkey
    if tagvalue then result.value = tagvalue end
    return true
  else
    return false
  end
end

-- given a key=value string, split it into two parts and return both
-- if there is no equal sign, return key and nil value
function p.splitKeyValue(keyvalue)
	if not keyvalue then return end
	local tagkey, tagvalue
    local eqlSignPos = mw.ustring.find(keyvalue, '=', 1, true)
    if not eqlSignPos then
      tagkey = keyvalue
    else
      local keyvalLen = mw.ustring.len(keyvalue)
      if eqlSignPos > 1 then
        tagkey = mw.ustring.sub(keyvalue, 1, eqlSignPos - 1)
        tagvalue = mw.ustring.sub(keyvalue, eqlSignPos + 1)
      end
    end
    return tagkey, tagvalue
end

-- Given a language code, returns proper title prefix:
-- empty string for English, all caps for the namespaces, first letter cap for rest
function p.langPrefix(langCode)
	langCode = langCode and mw.ustring.lower(langCode) or ''
	if not langCode or langCode == '' or langCode == 'en' then
		return ''
	end
	for _,v in pairs(data.nsToLangCodeMap) do
	  if v == langCode then
	    return string.upper(langCode) .. ':'
	  end
	end
	return mw.getContentLanguage():ucfirst(langCode) .. ':'
end

function p.languageCodeInTitle(frame)
	local title = mw.title.new(frame.args[1])
	local language = p.parseTitle(title).language
	return language.code
end

function p.keyInTitle(frame)
	local title = mw.title.new(frame.args[1])
	return p.parseTitle(title).key
end

function p.valueInTitle(frame)
	local title = mw.title.new(frame.args[1])
	return p.parseTitle(title).value
end

function p.dataItemLabelFromTitle(frame)
	local title = mw.title.new(frame.args[1])
	local parsedTitle = p.parseTitle(title)
	if parsedTitle.value then
		return parsedTitle.key .. "=" .. parsedTitle.value
	end
	return parsedTitle.key
end

return p