SPARQL examples

From OpenStreetMap Wiki
Jump to: navigation, search


Some queries that no longer work were moved to a subpage - they need to be updated to use Wikidata federation service instead of assuming that all Wikidata data is present in the same database.

Simple Queries

These basic queries help to understand SPARQL, the Wikibase RDF format and OSM RDF format .

List all OSM objects with a place tag

Get started with this simple query to list OSM objects by a particular type or tag. (Compare it with a similar Overpass Query).

SELECT * WHERE {
  # Limit to subjects that have an OSM type ('n', 'r', 'w').
  # Replace ?osmType with a string 'r' to show only relations.
  ?osmId osmm:type ?osmType .

  # Limit to subjects that have an OSM tag `place`
  # Replace ?place with a string 'city' to filter the tag value to `place=city`
  ?osmId osmt:place ?place  .

  # Uncomment this line to only show places that have no `name:en` tag
  # FILTER NOT EXISTS { ?osmId osmt:name:en ?nameen . }

} LIMIT 50

Run it (edit query)

OSM Metadata

Queries to examine OSM keys and tags data, as documented on this wiki.

Most used keys without description in any language

SELECT ?osmd ?keyId ?key_usage WHERE {

  # has a key and a count, but without any descriptions
  ?osmd osmdt:P16 ?keyId;
        osmm:count_all ?key_usage.
  FILTER NOT EXISTS { ?osmd schema:description ?description. }
}
ORDER BY DESC(?key_usage)
LIMIT 100

Run it (edit query)

Description counts per language

SELECT ?lang (COUNT(*) AS ?count) WHERE {
  
  VALUES ?key_or_tag { osmd:Q7 osmd:Q2 }
  
  ?item_id osmdt:P2 ?key_or_tag;
           schema:description ?description.
  
  BIND(LANG(?description) AS ?lang)
}
GROUP BY ?lang
ORDER BY DESC(?count)

Run it (edit query)

Find all untranslated Keys and Tags by usage

SELECT ?osmd ?osmdLabel ?osmdDescription ?key_usage ?tag_key_usage WHERE {
  
  # Which language to search
  VALUES ?lang { 'ru' }
  
  # We are only interested in Tag:* and Key:* data items
  VALUES ?tag_or_key { osmd:Q2 osmd:Q7 }
  
  # Instanceof = tag or key; and must have __a__ description
  ?osmd osmdt:P2 ?tag_or_key;
        schema:description ?description_en.

  # Limit the result to just english descriptions
  FILTER EXISTS {
    FILTER (lang(?description_en) = 'en')
  }

  # Remove deprecated items from the list
  MINUS { ?osmd osmdt:P6 osmd:Q5061 }

  # If the data item contains a description in the local language, skip it
  FILTER NOT EXISTS {
    ?osmd schema:description ?description_local.
    FILTER (lang(?description_local) = ?lang)
  }

  # Counts are only available for keys, so if this is a tag, get corresponding key usage
  OPTIONAL { ?osmd osmm:count_all ?key_usage. }
  OPTIONAL { ?osmd osmdt:P10/osmm:count_all ?tag_key_usage. }
  
  # Get English label and description (generates *Label and *Description vars)
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }

} ORDER BY DESC(?key_usage) DESC(?tag_key_usage)

Run it (edit query)

Find all Keys:* and Tags:* with identical descriptions in multiple languages

# Group by description of all keys and tags (ignoring language)
# Filter those groups that have more than one item, and where list of languages has a needed code
# Add "en" label for output
SELECT ?osmd ?osmdLabel ?description ?langs WHERE {
  {
    SELECT ?osmd ?description
           (COUNT(*) AS ?count)
           (group_concat(?lang;separator=', ') as ?langs)
    WHERE {
      VALUES ?key_or_tag { osmd:Q7 osmd:Q2 }
      ?osmd osmdt:P2 ?key_or_tag;
            schema:description ?desc.
      BIND(STR(?desc) AS ?description)
      BIND(LANG(?desc) AS ?lang)
    }
    GROUP BY ?osmd ?description
  }
  FILTER(?count > 1)
  # Uncomment to filter by the language code
  # FILTER(contains(?langs, "fr"))
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}

Run it (edit query)

Mismatched STATUS between languages in keys and tags

# List of all OSM Keys and Tags that have mismatching STATUS (P6) in various languages, most used first.
# Shows default value (English) and the mismatched value plus all regions that have it.
# If there are more than one mismatching value, they are shown on separate lines.
SELECT ?osmd ?osmdLabel ?defaultLabel ?mismatchLabel ?regions ?key_usage ?tag_key_usage WHERE {
  {
    # See qualifier docs: https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Qualifiers,_References_and_Ranks
    SELECT ?osmd ?mismatch (GROUP_CONCAT(DISTINCT ?langCode) as ?regions) WHERE {
      ?osmd osmp:P6 ?statement.
      ?statement osmps:P6 ?mismatch.
      ?statement osmpq:P26/osmdt:P32 ?langCode.
      ?statement wikibase:rank wikibase:NormalRank.
    } group by ?osmd ?mismatch
  }
  
  ?osmd osmdt:P6 ?default.
  # Counts are only available for keys, so if this is a tag, get corresponding key usage
  OPTIONAL { ?osmd osmm:count_all ?key_usage. }
  OPTIONAL { ?osmd osmdt:P10/osmm:count_all ?tag_key_usage. }
  
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
} ORDER BY DESC(?key_usage) DESC(?tag_key_usage) ?osmdLabel ?mismatchLabel

Run it (edit query)

Quality Control


Find objects that fail regex validation defined in data item

SELECT ?feature ?value WHERE {
  # Get regex and subject ref for the needed key.
  # For "population"  it will be  "[0-9]+"  and  osmt:population
  ?osmd osmdt:P16 "population";
        osmdt:P13 ?regex;
        osmm:key ?keySubj.
  # Surround the regex with ^ and $ characters (to match the whole value string)
  BIND (CONCAT("^", ?regex, "$") as ?re)
  
  # Find all OSM features with the needed key,
  # And only include those whose value doesn't match needed regex
  ?feature ?keySubj ?value.  
  FILTER(!REGEX(?value, ?re))
} LIMIT 10

Run it (edit query)

Find mismatching "wikipedia" and "wikidata" tags (faster)

SELECT * WHERE {
  # must have both WP and WD tags
  ?osmId osmt:wikipedia ?wp ;
         osmt:wikidata ?wd .
  
  # ignore non-standard values for wp & wd
  FILTER (ISURI(?wp) && ISURI(?wd))
  
  # when WP sitelink is not a part of the WD entry
  FILTER NOT EXISTS {
    ?wp schema:about ?wd .
  }
} LIMIT 10

Run it (edit query)

mismatching "wikipedia" and "wikidata" tags (slower)

#defaultView:Map
SELECT ?osmId ?wdLabel ?wpFromOSM ?wpFromWd ?wd ?loc WHERE {
  ?osmId osmt:wikidata ?wd ;         # osm object must have a "wikidata" tag
         osmt:wikipedia ?wpFromOSM ; # osm object must have a "wikipedia" tag
         osmm:loc ?loc .             # get object's location

  # wikipedia tag link must not be listed in the corresponding wikidata object
  FILTER NOT EXISTS { ?wpFromOSM schema:about ?wd . }

  OPTIONAL {
    # if Wikidata has another link in the same domain, show it
    ?wpFromWd schema:about ?wd .
    FILTER( SUBSTR( STR(?wpFromOSM), 0, 25 ) = SUBSTR( STR(?wpFromWd), 0, 25 ) )
  }

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Run it (edit query)

Find when more than one relation link to the same Wikidata ID

SELECT ?osmid ?adminlvl ?wd ?wdLabel {
  # find relation with a wikidata tag
  ?osmid osmm:type 'r' ;
         osmt:wikidata ?wd ;
         osmt:admin_level ?adminlvl .

  # add user's or english label to the found wikidata
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }

  # the ?wd variable must also be matching this subquery
  # it finds wd tags that appear more than once
  { SELECT ?wd { 
    ?osmid osmm:type 'r' ;
           osmt:wikidata ?wd ;
           osmt:admin_level ?adminlvl .
    }
    GROUP BY ?wd
    HAVING (COUNT(*) > 1)
  }

} LIMIT 10

Run it (edit query)

Show relations with no sub-relations or nodes, having same wikidata tag with the parent **SLOW**

#defaultView:Map
SELECT
  ?rel
  (SAMPLE(?location) as ?location)
  (sum(?failed) as ?failCount)
  (count(?mwd) as ?memberWithWdCount)
  (count(?member) as ?memberCount)
  ((count(?member) - count(?mwd)) as ?diffCount)

WHERE {
  # Find relations with wikidata tag and at least one member
  ?rel osmm:type 'r';
       osmt:wikidata ?wd;
       osmm:loc ?location;
       osmm:has ?member .

  # Get member's type
  ?member osmm:type ?mtype .

  # Get member's wikidata tag if it exists
  OPTIONAL { ?member osmt:wikidata ?mwd }

  # If any of the conditions are met, set ?failed to 1.
  # The sum of ?failed must be 0 for the relation to be shown
  BIND (if((?mtype='r' || ?mtype='n' || (bound(?mwd) && ?mwd!=?wd)), 1, 0) as ?failed)
}
GROUP BY ?rel
HAVING (?memberWithWdCount > 0 && ?failCount = 0)
ORDER BY DESC(?memberCount)

Run it (edit query)

Find OSM objects, where wikidata should probably be brand:wikidata

#defaultView:Map
SELECT ?osmId ?location ?bwd ?bwdLabel ?bwdDescription WHERE {

  # Subquery finds brand:wikidata IDs used more than 10 times
  {
    SELECT ?bwd (count(*) as ?count) WHERE {
      ?o osmt:brand:wikidata ?bwd .
    }
    group by ?bwd
    having (?count > 10)
  }

  # Find OSM objects where wikidata or operator:wikidata tag
  # is one of the common brand:wikidata IDs
  VALUES ?tag { osmt:wikidata osmt:operator:wikidata }

  ?osmId ?tag ?bwd ;
         osmm:loc ?location .

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en,fr,ru,es,de,zh,ja". }
}

Run it (edit query)

Find OSM objects, where wikidata should probably be species:wikidata

#defaultView:Map
SELECT ?osmId ?location ?swd ?swdLabel ?swdDescription WHERE {

  # Subquery finds brand:wikidata IDs used more than 10 times
  {
    SELECT ?swd (count(*) as ?count) WHERE {
      ?o osmt:species:wikidata ?swd .
    }
    group by ?swd
    having (?count > 3)
  }

  # Find OSM objects where wikidata tag is one of the common brand:wikidata IDs
  ?osmId osmt:wikidata ?swd ;
         osmm:loc ?location .

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en,fr,ru,es,de,zh,ja". }
}

Run it (edit query)


website or url the same as wikipedia tag

#defaultView:Map
SELECT ?osmId ?wp ?loc WHERE {
  
  { SELECT ?osmId ?wp ?loc WHERE {
    ?osmId osmt:wikipedia ?wp ;
           osmt:url ?url ;
           osmm:loc ?loc .
    FILTER( REPLACE( ?url, 'http://', 'https://') = str(?wp) )
  } }
UNION
  { SELECT ?osmId ?wp ?loc WHERE {
    ?osmId osmt:wikipedia ?wp ;
           osmt:website ?url ;
           osmm:loc ?loc .
    FILTER( REPLACE( ?url, 'http://', 'https://') = str(?wp) )
  } }
}

Run it (edit query)

Miscellaneous

Calculate simple centroid of a relation

The location is calculated by taking all relation's members, and averaging all of their longitudes and latitudes.
Note: This is not very accurate, especially for relations whose members are near anti-meridian.

#defaultView:Map
SELECT
  ?rel
  # Calculate average lng and lat, and reconstruct a geopoint literal
  (strdt(
    concat(
      'Point(', STR(xsd:float(AVG(?longitude))), ' ', STR(xsd:float(AVG(?latitude))), ')'
    ), geo:wktLiteral
  ) as ?loc)
WHERE {

  # Limit to just a few relation IDs, otherwise the server times out
  BIND( osmrel:13 as ?rel )

  # Subject must be a relation, and must not already have a location
  ?rel osmm:type 'r' .
  FILTER NOT EXISTS { ?rel osmm:loc ?relLoc . }

  # Relation must have members, and those members must have locations
  ?rel osmm:has ?member .
  ?member osmm:loc ?loc .

  # Extract each member's longitude and latitude
  BIND( geof:longitude(?loc) as ?longitude )
  BIND( geof:latitude(?loc) as ?latitude )
}
GROUP BY ?rel  # group all found items by relation
LIMIT 10

Run it (edit query)

Use regex to search tags: a word in wikipedia tag ends in a "ville"

SELECT ?osmId ?place ?wp ?loc WHERE {
  # must have place and wikipedia tags
  ?osmId osmt:place ?place ;
         osmt:wikipedia ?wp .
  
  # Find any wikipedia link with a word ending in "ville"
  # Unlike other tags, wikipedia and wikidata tags are stored as URIs,
  # so they must be converted into a string before using string functions.
  FILTER (regex(str(?wp), "ville([^a-z]|$)", "i"))
  
  # If location metatag exists, include it
  # Use "Table" button to switch to the map view
  OPTIONAL { ?osmId osmm:loc ?loc . }
}
LIMIT 10

Run it (edit query)


Show a map of the user's edits between dates, if edit is still last, by obj type

#defaultView:Map
SELECT * WHERE {

  ?osmId osmm:user 'nyuriks' ;
         osmm:loc ?loc ;
         osmm:type ?layer .

  # Filter by timestamp
  ?osmId osmm:timestamp ?ts .
  FILTER ("2017-03-01T00:00:00Z"^^xsd:dateTime < ?ts &&
          ?ts < "2017-03-05T00:00:00Z"^^xsd:dateTime)
}

Run it (edit query)

Show geoshapes (geometries) of all political subdivisions of a country

#defaultView:MapRegions
SELECT 
  ?id
  (SAMPLE(?label_en) as ?label_en)
  (SAMPLE(?label_fr) as ?label_fr)
  (SAMPLE(?iso_3166_2) as ?iso_3166_2)
  (SAMPLE(?flag) as ?flag)
WHERE {
  # List of regions, whose sub-regions we want
  VALUES ?entity { wd:Q16 } 

  SERVICE <https://query.wikidata.org/sparql> { 
    # P150 = "contains administrative territorial entity"
    # but must not have a P582 (end date) qualifier
    ?entity p:P150 ?statement .
    ?statement ps:P150 ?id .
    FILTER NOT EXISTS { ?statement pq:P582 ?x }

    # Get labels, ISO code, and flag image
    # for each sub-region, if available
    OPTIONAL { ?id rdfs:label ?label_en . FILTER(LANG(?label_en) = "en") }
    OPTIONAL { ?id rdfs:label ?label_fr . FILTER(LANG(?label_fr) = "fr") }
    OPTIONAL { ?id wdt:P300 ?iso_3166_2 }
    OPTIONAL { ?id wdt:P41 ?flag }
  }
}
# remove possible ID duplicates
GROUP BY ?id

Run it (edit query)