WikiProject Belgium/De Lijndata

From OpenStreetMap Wiki
Jump to: navigation, search
     
WikiProject Belgium +/-
Conventions
Subprojects

I'm going to describe the process of converting the data from De Lijn, public transport company for Flanders.

Preparation

All source code is licensed GPL v3. Hopefully it's useful as inspiration for others.

Software requirements

  • Python v3
  • PostgreSQL v9.x
  • PostGIS v2.x


To repeat the steps below, it will be necessary to make a request to De Lijn for getting access to their data. The login for their FTP goes into credentials.txt. Username on the first line, password on the second line. The script expects it in the same directory the script is run from.

Creation of the database and adding data to it

The data consists of a zip file which is updated regularly. It needs to be converted to UTF-8 from latin1.

It is, of course, possible to use Filezilla, WinSCP or even wget to do this, but I wrote a Python script which automates downloading (after checking it is necessary), unzipping and recoding:


#!/bin/python
# -*- coding: utf-8 -*-
import os, sys, re, zipfile, ftplib
import argparse
zipre = re.compile('\d\d\d\d-\d\d-\d\d\.zip')


parser = argparse.ArgumentParser(description='Fetch data from FTP server of De Lijn, unzip it and recode to UTF-8')
parser.add_argument('--skipdownload', '-d', action='store_true',
                   help="Don't contact the FTP server, work with the most recent local file")
parser.add_argument('--dontcallsuccessor', '-s', action='store_true',
                   help="don't call NewDBfromCSV.py when done")

args = parser.parse_args()

""" Fetch the latest zip file from the ftp site of De Lijn """

class Callback(object):
    '''This prints a nice progress status on the command line'''
    def __init__(self, totalsize, fp):
        self.totalsize = totalsize
        self.fp = fp
        self.received = 0

    def __call__(self, data):
        self.fp.write(data)
        self.received += len(data)
        print('\r%i%% complete' % (100.0*self.received/self.totalsize), end='\r')

if not(args.skipdownload):
    print ('Reading credentials from "credentials.txt"')
    with open("credentials.txt") as credentials:
        username, password = credentials.readlines()
        #print (username, password)

    print ("Opening connection to FTP site of De Lijn")
    ftp=ftplib.FTP(host='poseidon.delijn.be', user=username, passwd=password)
    print ("CD to current")
    ftp.cwd('current')
    print ("Get name of file")
    fn = ftp.nlst()[0]
    size = ftp.size(fn)
    if not(fn in os.listdir()):
        # Only download if a newer file is available
        print (fn + " found, downloading latest version of De Lijndata")
        with open(fn, 'wb') as fh:
            w = Callback(size, fh)
            #ftp.set_pasv(0)
            ftp.retrbinary('RETR %s' % fn, w, 32768)

        ftp.quit()
    else:
        print('Latest version already present, nothing to do')
        sys.exit()
""" Unzip the latest file we have available in the current directory """

files = os.listdir()
zipfn=''
for file in files:
    if re.match(zipre, file):
        if file > zipfn:
            zipfn = file

zfile = zipfile.ZipFile(zipfn)
print(); print(); print("Found " + zipfn)
for name in zfile.namelist():
    """Recode csv-file with textual content to UTF-8 """
    (dirname, filename) = os.path.split(name)
    print("Decompressing " + filename)
    fd = open(name,"wb")
    fd.write(zfile.read(name).decode('latin-1').replace('\r','').replace('"','').encode('utf-8'))
    fd.close()

if not(args.dontcallsuccessor): import NewDBfromCSV

Populate database

Now that we have unpacked the zip file to several csv files, it's time to put them into a PostGIS database. I created a stored procedure to take care of the conversion between Lambert72 and WGS84.

If you put the following in 'NewDBfromCSV.py', it will get started at the end of the previous script automatically if needed.

import postgresql
# -*- coding: utf-8 -*-

db = postgresql.open('pq://Jo:tttttt@localhost:5432/DL')

print("Granting user rights")
createDB = db.execute("""

GRANT ALL ON DATABASE "DL" TO "Jo";
GRANT ALL ON ALL TABLES IN SCHEMA public TO "Jo";
GRANT ALL ON TABLE spatial_ref_sys TO "Jo";

""")


print("Creating tables")
createDB = db.execute("""
--CREATE TABLESPACE pg_dl
--  OWNER postgres
--  LOCATION E'C:\\Data\\OSM\\PostGIS_Tablespace';

DROP TABLE IF EXISTS stops CASCADE;

--DROP EXTENSION postgis CASCADE;
--CREATE EXTENSION postgis;

DROP TABLE IF EXISTS places;
CREATE TABLE IF NOT EXISTS places ( placeid int NOT NULL PRIMARY KEY, placeidentifier text, placedescription text )
 WITH ( OIDS=FALSE );
ALTER TABLE places OWNER TO postgres;

DROP TABLE IF EXISTS calendar;
CREATE TABLE calendar ( vscid int NOT NULL PRIMARY KEY, vsid bigint, vscdate date, vscday text )
 WITH ( OIDS=FALSE );
ALTER TABLE calendar OWNER TO postgres;

DROP TABLE IF EXISTS routes;
CREATE TABLE routes
 ( routeid int NOT NULL PRIMARY KEY, routeidentifier text, routedescription text, routepublicidentifier text, routeversion text, routeservicetype text, routeservicemode text )
  WITH ( OIDS=FALSE );
ALTER TABLE routes OWNER TO postgres;

DROP TABLE IF EXISTS trips;
CREATE TABLE trips
 ( tripid bigint NOT NULL PRIMARY KEY, routeid int, vscid int, tripnoteidentifier text, tripnotetext text, tripstart text, tripend text, tripshiftstart integer, tripshiftend integer, tripnoteidentifier2 text, tripnotetext2 text, placeidstart bigint, placeidend bigint, naturalkey text )
   WITH ( OIDS=FALSE );
ALTER TABLE trips OWNER TO postgres;

DROP TABLE IF EXISTS segments;
CREATE TABLE segments
 ( segmentid bigint NOT NULL PRIMARY KEY, tripid bigint, stopid int, segmentsequence int, segmentstart text, segmentend text, segmentshiftstart integer, segmentshiftend integer )
  WITH ( OIDS=FALSE );
ALTER TABLE segments OWNER TO postgres;
""")

routescount = db.prepare('SELECT COUNT(*) FROM routes;')
segmentscount = db.prepare('SELECT COUNT(*) FROM segments;')
tripscount = db.prepare('SELECT COUNT(*) FROM trips;')

print("Copying data into tables")
createDB = db.execute("""

COPY places FROM 'C:/Data/De Lijn/places.csv' DELIMITERS ';' CSV HEADER;
COPY calendar FROM 'C:/Data/De Lijn/calendar.csv' DELIMITERS ';' CSV HEADER;
COPY routes FROM 'C:/Data/De Lijn/routes.csv' DELIMITERS ';' CSV HEADER;
COPY trips FROM 'C:/Data/De Lijn/trips.csv' DELIMITERS ';' CSV HEADER;
COPY segments FROM 'C:/Data/De Lijn/segments.csv' DELIMITERS ';' CSV HEADER;

""")

print("Creating tables for stops")
createDB = db.execute("""

DROP TABLE IF EXISTS stops;
DROP TABLE IF EXISTS stops_DL;
DROP TABLE IF EXISTS stops_OSM;
CREATE TABLE stops
 ( stopid INT NOT NULL PRIMARY KEY, stopidentifier INT, description text, street text, municipality text, parentmunicipality text, x INT, y INT, stopisaccessible BOOLEAN, stopispublic BOOLEAN )
  WITH ( OIDS=FALSE );
ALTER TABLE stops OWNER TO postgres;
 
COPY stops FROM 'C:/Data/De Lijn/stops.csv' DELIMITERS ';' CSV HEADER;
 
ALTER TABLE stops
 ADD COLUMN lat DOUBLE PRECISION, 
 ADD COLUMN lon DOUBLE PRECISION,
 ADD COLUMN route_ref text,
 --ADD COLUMN geomDL geometry,
 --ADD COLUMN geomOSM geometry,
 ADD COLUMN OSM_name text,
 ADD COLUMN OSM_city text,
 ADD COLUMN OSM_street text,
 ADD COLUMN OSM_operator text,
 ADD COLUMN OSM_route_ref text,
 ADD COLUMN OSM_source text,
 ADD COLUMN OSM_node_ID text,
 ADD COLUMN OSM_last_modified_by_user text,
 ADD COLUMN OSM_last_modified_timestamp TIMESTAMP,
 ADD COLUMN OSM_zone text,
 ADD COLUMN zoneid integer;
 
SELECT AddGeometryColumn ('public','stops','geomdl',4326,'POINT',2);
SELECT AddGeometryColumn ('public','stops','geomosm',4326,'POINT',2);
 
CREATE TABLE stops_DL
 ( stopsPK int NOT NULL PRIMARY KEY,
   last_change_timestamp timestamp,
   stopidentifier int,                  -- this corresponds to ref in OSM
   description text,                    -- this corresponds to name in OSM
   street text,
   municipality text,
   parentmunicipality text,
   stopisaccessible boolean,
   stopispublic boolean, 
   route_ref text,                      -- this is calculated
   geomDL geometry)
   WITH ( OIDS=FALSE );
ALTER TABLE stops OWNER TO postgres;

CREATE TABLE stops_OSM
 ( stopsPK int NOT NULL PRIMARY KEY,
   last_change_timestamp timestamp,
   ref text,                            -- this corresponds to stopidentifier
   name text,
   operator text,
   route_ref text,
   zone text,
   source text,
   node_ID text,                        -- over time a stop may have used different nodes
   version int,
   last_modified_by text,
   geomOSM geometry)
  WITH ( OIDS=FALSE );
ALTER TABLE stops OWNER TO postgres;

""")

print("Create index on routes (routeidentifier), routes (routeversion), trips(routeid) and on segments (tripid)")
createDB = db.execute("""

CREATE INDEX ix_routeidentifier ON routes (routeidentifier);
CREATE INDEX ix_routeversion ON routes (routeversion);
CREATE INDEX ix_tripsrouteid ON trips(routeid);
CREATE INDEX ix_segmentstripid ON segments (tripid);
""")


print("Vacuum analyze to gather statistics for efficient use of indexes")
createDB = db.execute("""

VACUUM ANALYZE --VERBOSE;

""")

print(routescount(), tripscount(), segmentscount())
print("Remove older route versions")
createDB = db.execute("""

    WITH currentversions AS (SELECT rte1.routeid FROM routes rte1
                             WHERE rte1.routeversion = (SELECT MAX(rte2.routeversion)
                                                        FROM routes rte2
                                                        JOIN trips ON trips.routeid=rte2.routeid -- we want the highest version in the routes table for which there are actual trips
                                                        WHERE rte1.routeidentifier=rte2.routeidentifier))
    DELETE FROM routes rte
      WHERE rte.routeid NOT IN (SELECT routeid from currentversions);
 """)

print("Remove trips for older route versions")
createDB = db.execute("""
DELETE FROM trips trp
 WHERE NOT EXISTS
   (SELECT trp2.routeid FROM trips trp2
     JOIN routes rte ON trp2.routeid=rte.routeid
      AND trp.routeid=rte.routeid);
""")

print("Remove segments for older route versions")
createDB = db.execute("""
DELETE FROM segments sgt
 WHERE NOT EXISTS (SELECT sgt2.segmentid FROM segments sgt2
                     JOIN trips trp ON sgt2.tripid=trp.tripid
                      AND sgt2.segmentid=sgt.segmentid);
""")

print("Creating indexes")
createDB = db.execute("""

CREATE INDEX ix_stopidentifier ON stops (stopidentifier);
CREATE INDEX ix_OSM_node_ID ON stops (OSM_node_ID);
CREATE INDEX ix_description ON stops (description);
CREATE INDEX ix_routepublicidentifier ON routes (routepublicidentifier);
CREATE INDEX ix_segmentstopid ON segments (stopid);

""")

print("Vacuum analyze to gather statistics for efficient use of indexes")
createDB = db.execute("""

VACUUM ANALYZE --VERBOSE;

""")

print(routescount(), tripscount(), segmentscount())

print("Creating stored procedures")
createDB = db.execute("""
CREATE OR REPLACE FUNCTION AllLinesPassingAtaStop(stopidentifierparameter int) RETURNS text AS $BODY$
  DECLARE outlines text :='';
    l record;
    line text;
  BEGIN
    FOR l IN SELECT distinct(lpad(rte.routepublicidentifier, 5, '0'))
				FROM public.trips    trp
				JOIN public.routes   rte      ON rte.routeid=trp.routeid AND
                                                 rte.routepublicidentifier NOT LIKE 'F%'
				JOIN public.segments seg      ON seg.tripid=trp.tripid
				JOIN public.stops    stp      ON seg.stopid=stp.stopid
				WHERE
				  stp.stopidentifier = stopidentifierparameter
                ORDER BY lpad(rte.routepublicidentifier, 5, '0')
    LOOP
      line := l;
      line := trim(leading '(' FROM line);
      IF line = '00000'
      THEN line:= '0';
      ELSE line := trim(leading '0' FROM line);
      END IF;
      line := trim(trailing ')' FROM line);
      outlines := outlines || ';' || line;
    END LOOP;

    RETURN trim(both ';' FROM outlines);
  END $BODY$
LANGUAGE plpgsql VOLATILE COST 100;
ALTER FUNCTION AllLinesPassingAtaStop(int) OWNER TO postgres;
GRANT EXECUTE ON FUNCTION AllLinesPassingAtaStop(int) TO public;

CREATE OR REPLACE FUNCTION filloutlines() RETURNS void AS $BODY$
  DECLARE
    l record;
    res text;
    b text;
    a int4;
    coords geometry;
    vlat double precision;
    vlon double precision;

  BEGIN
    DROP INDEX IF EXISTS ix_geomDL;
    FOR l IN SELECT stopid, stopidentifier, x, y FROM stops
      LOOP res := AllLinesPassingAtaStop(l.stopidentifier);
        coords := st_transform(st_setSRID(st_Point(l.x, l.y), 31370),4326);
        vlat := st_y(coords);
        vlon := st_x(coords);

      UPDATE stops
        SET route_ref=res,
          geomDL = coords,
          lat = vlat,
          lon = vlon
        WHERE stops.stopid=l.stopid;
	  RAISE NOTICE  '% set to %',l.stopidentifier, res;
     END LOOP;
   END 
$BODY$ LANGUAGE plpgsql VOLATILE COST 11;
ALTER FUNCTION filloutlines() OWNER TO postgres;
GRANT EXECUTE ON FUNCTION filloutlines() TO public;

""")

print("Converting from Lambert72 and adding route_ref to stops table")

filloutlines = db.proc('filloutlines()')
print(filloutlines())

# -- COPY stops (stopid, stopidentifier, description, street, municipality, parentmunicipality, lat, lon, route_ref) TO 'C:/Data/De Lijn/De Lijnstops.csv' DELIMITERS '#' CSV;

print("Creating index on description and spatial column containing coordinates from De Lijn")
createDB = db.execute("""

CREATE INDEX ix_geomDL ON stops USING gist(geomDL);

""")

createDB = db.execute("""
DROP TABLE IF EXISTS zones;

CREATE TABLE zones
(
  zoneid serial NOT NULL,
  zone text,
  geomzone geometry,
  CONSTRAINT zones_pkey PRIMARY KEY (zoneid)
)
WITH (
  OIDS=FALSE
);
--SELECT AddGeometryColumn ('public','zones','geomzone',4326,'POLYGON',2);
CREATE INDEX ix_geomzone ON zones USING gist(geomzone);
ALTER TABLE zones
  OWNER TO postgres;
GRANT ALL ON TABLE zones TO postgres;
GRANT ALL ON TABLE zones TO "Jo";
""")
print("Vacuum analyze to gather statistics for efficient use of indexes")


createDB = db.execute("""

VACUUM ANALYZE --VERBOSE;

""")

print("Creating a stored procedure for later use")
createDB = db.execute("""

CREATE OR REPLACE FUNCTION AllTripsForARouteVerbose(routeidentifierparam text) RETURNS table(tripid int,routeidentifier text,routedescription text,tripstart text,start text,terminus text) AS $BODY$
  BEGIN
    RETURN QUERY 

		SELECT DISTINCT
		  trp.tripid,
		  rte.routeidentifier,
		  rte.routedescription,
		  trp.tripstart,
		  (SELECT 
			st.description
			FROM 
			  public.stops st
			JOIN public.segments seg1 ON seg1.stopid = st.stopid AND seg1.tripid = trp.tripid
			WHERE 
			  seg1.segmentsequence = (SELECT MIN(seg2.segmentsequence) FROM public.segments seg2 WHERE seg2.tripid = trp.tripid)) AS Start,
		  (SELECT 
			st.description
			FROM 
			  public.stops st
			JOIN public.segments seg1 ON seg1.stopid = st.stopid AND seg1.tripid = trp.tripid
			WHERE 
			  seg1.segmentsequence = (SELECT MAX(seg2.segmentsequence) FROM public.segments seg2 WHERE seg2.tripid = trp.tripid)) AS Terminus
		FROM public.trips    trp
		JOIN public.routes   rte      ON rte.routeid=trp.routeid
		JOIN public.segments seg      ON seg.tripid=trp.tripid
		JOIN public.stops    stp      ON seg.stopid=stp.stopid
		WHERE
		  rte.routeidentifier = routeidentifierparam
		ORDER BY
		  trp.tripstart ASC;
  END; $BODY$
LANGUAGE plpgsql VOLATILE COST 100;
ALTER FUNCTION AllTripsForARouteVerbose(text) OWNER TO postgres;

""")

import IntegrateStopsFromOSM
IntegrateStopsFromOSM.main('C:/Data/OSM/Overpass API queries/PT.osm')

#import UpdateZonesFromPDF
#UpdateZonesFromPDF.main('C:/Data/De Lijn/zones.txt')

import DeLijnData_in_Postgis_2_OSM
DeLijnData_in_Postgis_2_OSM.main()

import CreateWikiReport
CreateWikiReport.main()

Download relevant data from Openstreetmap with Overpass API

Download all bus stops and route relations in Flanders. recurse up and down to fetch all related stops, route relations and ways for the itineraries. Be aware that this is a hefty query even for the Overpass API. It returns 90MB of data and when run at the wrong time, it occasionally fails.

area["name:nl"~"Vlaams-Brabant|Antwerpen|..st-Vlaanderen"] -> .VBAntw; 
area["alt_name:nl"="Belgisch Limburg"] -> .Limb;

(
  node(area.VBAntw);
  node(area.Limb);
) -> .allnodesinArea;
(
  rel(bn.allnodesinArea)
    ["type"="route"]
    ["route"~"bus|tram"]
  ->.routes;
  .routes << ->.route_masters;
  (
    node.allnodesinArea
      ["highway"="bus_stop"]
    ->.stops;
    node.allnodesinArea
      ["amenity"="bus_station"]
    ->.busstations;
    node.allnodesinArea
      ["railway"="tram_stop"]
    ->.tstops;
    node.allnodesinArea
      ["public_transport"]
    ->.platforms;
    node
      ["highway"="bus_stop"]
      ["operator"~"De Lijn|TEC|STIB|MIVB"];
    node
      ["railway"="tram_stop"]
      ["operator"~"De Lijn|TEC|STIB|MIVB"];
    node
      ["public_transport"="platform"]
      ["operator"~"De Lijn|TEC|STIB|MIVB"];
  ) -> .stopsPlatformsAndStopPositions;
  (
    way(bn.allnodesinArea)
      ["highway"="bus_guideway"];
    way(bn.allnodesinArea)
      ["railway"="tram"];
  ) ->.tracks;
  way(bn.allnodesinArea)
    ["amenity"="shelter"]
    ["shelter_type"="public_transport"]
  ->.shelters;
  node(w.shelters);
  node.allnodesinArea
    ["amenity"="shelter"]
    ["shelter_type"="public_transport"]
  ->.shelternodes;
  node(r.routes);
  way(bn)->.parentways1;
  node(w.parentways1);
  rel(bw.parentways1);
  way(r.routes);
  node(w);
  way(bn.stopsPlatformsAndStopPositions);
  rel(bn.stopsPlatformsAndStopPositions)
    ["type"="public_transport"]
  -> .stopAttributes;
  node(r.stopAttributes);
  way(r.stopAttributes);
  node(w);
  node(w.tracks);
);
out meta;

This can be called as follows: Overpass recently added the possibility to download a zipped file. Browsers unzip this automatically, at the command prompt we still need to do this ourselves.

PT.cmd:

cd "C:\OSM\Overpass API queries\"

"C:\Program Files (x86)\GnuWin32\bin\wget.exe" --header="accept-encoding: gzip" -O "PT.osm.zip" --post-file="PT.overpass" "http://overpass-api.de/api/interpreter"

"C:\Program Files\7-Zip\7z.exe" e -y "PT.osm.zip" PT.osm

pause

Add data downloaded from OSM to this database

At the moment only stops which have a ref tag in OSM are added.

import postgresql, xml.sax, re, sys
# -*- coding: utf-8 -*-

sixdigitsRE= re.compile(r'\d\d\d\d\d\d')
db = postgresql.open('pq://Jo:tttttt@localhost:5432/DL')

nodeexists = db.prepare("""SELECT stopidentifier FROM stops
                           WHERE OSM_node_ID = $1;""")
refexists = db.prepare("""SELECT OSM_node_ID FROM stops
                           WHERE stopidentifier = $1;""")
purgeOSMdata = db.prepare("""UPDATE stops SET OSM_node_ID = NULL,
                                              OSM_name = NULL,
                                              OSM_city = NULL,
                                              OSM_street = NULL,
                                              OSM_operator = NULL,
                                              OSM_route_ref = NULL,
                                              OSM_source = NULL,
                                              OSM_last_modified_by_user = NULL,
                                              OSM_last_modified_timestamp = NULL,
                                              OSM_zone = NULL
                                           WHERE
                                              OSM_node_ID IS NOT NULL;""")

updateOSMdata = db.prepare("""UPDATE stops SET OSM_node_ID = $1,
                                              OSM_name = $2,
                                              OSM_city = $3,
                                              OSM_street = $4,
                                              OSM_operator = $5,
                                              OSM_route_ref = $6,
                                              OSM_source = $7,
                                              OSM_last_modified_by_user = $8,
                                              OSM_last_modified_timestamp = $9::TEXT::TIMESTAMP,
                                              OSM_zone = $10,
                                              geomOSM = ST_SetSRID(ST_MakePoint($11::TEXT::DOUBLE PRECISION, $12::TEXT::DOUBLE PRECISION),4326)
                                           WHERE
                                              stopidentifier = $13::TEXT::INTEGER;""")

class OSMContentHandler(xml.sax.ContentHandler):
    def __init__(self):
        xml.sax.ContentHandler.__init__(self)
        self.nodeattributes = {}
        self.tags = {}
        print("Purge OSM data from database first")
        purgeOSMdata()
        print("Updating database")

    def startElement(self, tagname, attrs):
        if tagname == "tag":
            self.tags[attrs.getValue("k")] = attrs.getValue("v")
        elif tagname == "node":
            self.nodeattributes = attrs
            self.tags = {}

    def endElement(self, tagname):
        name = operator = ref = route_ref = source = zone = addr_city = addr_street = user = timestamp = None
        #updatequery = "UPDATE stops SET "

        if tagname == "node":
            if 'highway' in self.tags and self.tags['highway'] in ['bus_stop', 'bus_station'] or 'railway' in self.tags and self.tags['railway'] in ['tram_stop', 'station'] or 'public_transport' in self.tags and self.tags['public_transport'] in ['platform']:
                #if self.nodeattributes["id"][0]=='-': print('Negative node id, upload your data first, then save in JOSM'); sys.exit()
                if 'action' in self.nodeattributes and self.nodeattributes["action"]== 'delete': return

                # First try to find out which record to update
                # Do we have a ref? 
                if 'ref' in self.tags and self.tags['ref']!='noref':
                    if re.match(sixdigitsRE, self.tags['ref']):
                        ref = self.tags['ref']
                        refalreadyexists = refexists(int(ref))[0][0]
                        #print (ref + ' ' + str(refalreadyexists))
                        if refalreadyexists:
                            print (str(refalreadyexists) + ' ' + ref + ' is used twice, better fix this')
                    #if nodeexists(str(self.nodeattributes["id"])): print(self.nodeattributes["id"] + " already exists in the database"); pass
                if 'name' in self.tags:
                    name = self.tags['name'].replace("'","''") # updatequery += "OSM_name = '" + self.tags['name'].replace("'","''") + "',"
                if 'operator' in self.tags:
                    operator = self.tags['operator'] # updatequery += "OSM_operator = '" + self.tags['operator'] + "',"
                if 'route_ref' in self.tags:
                    route_ref = self.tags['route_ref'] # updatequery += "OSM_route_ref = '" + self.tags['route_ref'] + "',"
                if 'source' in self.tags:
                    source = self.tags['source'] # updatequery += "OSM_source = '" + self.tags['source'] + "',"
                if 'addr:city' in self.tags:
                    addr_city = self.tags['addr:city'].replace("'","''") # updatequery += "OSM_city = '" + self.tags['addr:city'].replace("'","''") + "',"
                if 'addr:street' in self.tags:
                    addr_street = self.tags['addr:street'].replace("'","''") # updatequery += "OSM_street = '" + self.tags['addr:street'].replace("'","''") + "',"
                if 'zone' in self.tags:
                    zone = self.tags['zone'] # updatequery += "OSM_zone = '" + self.tags['zone'] + "',"
                if 'user' in self.nodeattributes:
                    user = self.nodeattributes["user"]
                    #updatequery += "OSM_last_modified_by_user = '" + self.nodeattributes["user"] + "',"
                if 'timestamp' in self.nodeattributes:
                    timestamp = self.nodeattributes["timestamp"]
                    #updatequery += "OSM_last_modified_timestamp = '" + self.nodeattributes["timestamp"] + "',"
                #csvfile.write(self.nodeattributes['id'] + '#' + ref + '#' + zone + '#' + self.nodeattributes['lat'] + '#' + self.nodeattributes['lon'] + '#' + name + '#' + addr_city + '#' + addr_street + '#' + operator + '#' + route_ref + '#' + source + '\n')   
                if True: # updatequery[-1] == ',':
                    if ref: # and refexists(int(ref)):
                        updateOSMdata(self.nodeattributes["id"], name, addr_city, addr_street, operator, route_ref, source, user, timestamp, zone, self.nodeattributes['lon'], self.nodeattributes['lat'], ref)

                        '''
                        updatequery += """geomOSM = ST_SetSRID(ST_MakePoint(%s, %s),4326),
                                          OSM_node_ID = '%s'
                                          WHERE stopidentifier = %s;
                                       """ % (self.nodeattributes['lon'], self.nodeattributes['lat'],
                                              self.nodeattributes["id"],
                                              ref)
                        #print (updatequery)
                        '''
                        #db.execute(updatequery)
                        #quit()
def main(sourceFileName):
    source = open(sourceFileName, encoding='utf-8')
    xml.sax.parse(source, OSMContentHandler())
    print("Database updated with stops from " + sourceFileName)
 
if __name__ == "__main__":
    main('C:/Data/OSM/Overpass API queries/PT.osm')

Output in useful formats

Creation of an OSM file with all the stops

Now we have all the relevant data in our database, we can start creating some output.

To reuse code, I created a 'library' with some functions:

#!/bin/python
# -*- coding: utf-8 -*-
import re

leaveCityNameAnywayRE=re.compile(r"(?ix)((^|\s)(station|dorp|markt|kerk|centrum)(\s|$))") # case insensitive search for terms surrounded by whitespace
citynamesToOmit = [
                   (r"Gent",""), # !Sas-van-Gent
                   (r"Antwerpen",""),
                   (r"Sint-Agatha-Berchem",""),
                   (r"Berchem",""), # !Sint-Agatha-Berchem
                   (r"Burcht",""),
                   (r"Borsbeek",""),
                   (r"Borgerhout",""),
                   (r"Deurne",""), # !Deurne (bij Diest)
                   (r"Hoboken",""),
                   (r"Ekeren",""),
                   (r"Merksem",""),
                   (r"Mortsel",""),
                   (r"Wilrijk",""),
                   (r"Zwijndrecht",""),
                   (r"Brussel",""),
                   (r"Anderlecht",""),
                   (r"Elsene",""),
                   (r"Etterbeek",""),
                   (r"Evere",""),
                   (r"Ganshoren",""),
                   (r"Jette",""),
                   (r"Koekelberg",""),
                   (r"Oudergem",""),
                   (r"Schaarbeek",""),
                   (r"Sint-Gillis",""),
                   (r"Sint-Jans-Molenbeek",""),
                   (r"Sint-Joost-ten-Node",""),
                   (r"Sint-Lambrechts-Woluwe",""),
                   (r"Sint-Pieters-Woluwe",""),
                   (r"Ukkel",""),
                   (r"Vorst",""), # !Vorst (bij Veerle) !Vorst-Laakdal
                   (r"Watermaal-Bosvoorde",""),
                   (r"Sint-Job-in-'t Goor","Sint-Job-in-'t-Goor "),
                   (r"SPV","Sint-Pieters-Voeren "),
                   (r"SMV","Sint-Martens-Voeren "),
                   (r"SGV","'s Gravenvoeren "),
                   (r"Mol Gompel ","Gompel "),
                   (r'Petegem A/D Leie','Petegem-aan-de-Leie'),
                   (r"Liège",""),
                   (r"Moeskroen",""),
                   (r"Ronse/Renaix",""),
                   (r"Ellezelles / Elzele",""),
                   (r"Bever/Bievene",""),
                   ]
commonabbreviations=[
    (500,'0',r"'T","'t"),
    (500,'0',r'(?u)T Zand',"'t Zand"),
    (500,'0',r"'S","'s"),
    (500,'0',r"(?ui)D'\s*","d'"),
    (500,'0',r"(?ui)l'\s*","l'"),
    (500,'0',r"Aux","aux"),
    (500,'0',r"Ij","IJ"),
    (500,'0',r'Premetrostation ',''),
    (500,'0',r' (afgeschaft)',''),

    (500,'1',r'(?ui)\bKol\.*Veteranen','Koloniale Veteranen - Vétérans Coloniaux'),
    (500,'1',r"(?ui)\bMarg\.*v\.*Oostenrijk","Margaretha van Oostenrijk - Marguerite d'Autriche"),
    (500,'0',r'Sas Van Gent (Nl)','Sas van Gent'),
    (500,'0',r'(?u)^Mo\s','Moelingen '),

    (500,'0',r'(?ui)\bkerk','Kerk'),
    (500,'0',r'(?ui)\bPl(\.|\b)','Plaats'), # could be Plein, as well
    (500,'0',r'(?u)str(\.|$)','straat'),
    (500,'0',r'O. Dendermondsestwg','Ouden Dendermondsesteenweg'),
    (500,'0',r'(?u)stwg\.*','steenweg'),
    (500,'0',r'(?u)(St(wg|w|g)\.*)','Steenweg'),
    (500,'0',r'(?u)\bperron\b','Perron'),
    (500,'0',r'Prov.','Provinciaal'),
    (500,'0',r'Recreatiecentr.','Recreatiecentrum'),
    (500,'0',r'(?u)\Bbn\b','baan'),
    (500,'0',r'(?u)centr\.','centrum'),
    (500,'0',r'(?ui)\bDom\.','Domein'),
    (500,'0',r'(?ui)Krpt\.*','Kruispunt'),
    #(500,'0',r'Keiheuvel Gt Kamping','Keiheuvel Gt Camping'),

    (500,'0',r'(?u)\bBurg\.\s*','Burgemeester '),
    (500,'0',r'Bos En Lommer','Bos en Lommer'),

    (500,'1',r'Roesbrugge Dr. Gheysen','Roesbrugge Dokter Joseph Gheysen'),
    (500,'0',r'(?u)\bDr\.','Dokter '),
    (500,'0',r'(?ui)Vti','VTI'),
    (500,'0',r'(?ui)\bVko','VKO'),
    (500,'1',r'Turnhout Sint-Pieterinstituut','Turnhout Sint-Pietersinstituut'),
    (500,'0',r'(?u)\s*Inst\.\s',' Instituut '),
    (500,'0',r'(?u)Electr\.','Electrische'),
    (500,'0',r'Ecole','École'),
    (500,'0',r"(?u)\bEtat\b","État"),

    (500,'0',r'Kon.Atheneum','Koninklijk Atheneum'),

    (500,'0',r'(?u)^(.*)A(\.*[Zz]\.*?)\b(.*)$',r'\1Algemeen Ziekenhuis\3'),
    (500,'0',r'(?ui)Ocmw','OCMW'),
    #(r'(?ui)\bO\.C\.M\.W\.*','O.C.M.W.'),

    (400,'0',r'OPZ','Openbaar Psychiatrisch Ziekenhuis'),
    (500,'0',r'(?ui)(P\.?Z\.?)','Psychiatrisch Ziekenhuis'),
    (500,'0',r'(?ui)Ger\.*\s*Centrum','Geriatrisch Centrum'),

    (500,'0',r'(?ui)\bF\.*\s*Adriaenssens','Frans Adriaenssens'),
    (500,'0',r"(?ui)\bW\.*\s*Alexander","Willem Alexander"),
    (500,'0',r"(?ui)\bChem\.*\s*d\s*Angoussart","Chemin d Angoussart"),

    (500,'0',r"(?ui)\bE\.*\s*Beauduin","Emile Beauduin"),
    (500,'0',r'De Becker Remypl.','de Becker Remyplein'),
    (500,'0',r"(?ui)\bP\.*\s*Benoit","Peter Benoit"),
    (500,'0',r"(?ui)\bBesch\.*Werkplaats","Beschermde Werkplaats"),
    (500,'0',r"(?ui)\bA\.*Blieck","Albert Blieck"),
    (500,'0',r'K. Boom','Karel Boom'),
    (500,'0',r'(?ui)L\.*p\.*\sBoon','Louis Paul Boon'),
    (500,'0',r'Hospitaal J. Bracops','Joseph Bracops'),
    (500,'0',r"(?ui)\bBrouw\.*\s*Lorier","Brouwerij Lorier"),
    (500,'0',r'Brouw.','Brouwerij'),

    (500,'0',r"(?ui)\bE\.*Carels","Emile Carels"),
    (500,'0',r"(?ui)\bTh\.*\s*V\.*\s*Cauwenberghs","Theofiel Van Cauwenberghs"),
    (500,'0',r'E. Claes','Ernest Claes'),
    (500,'0',r'(?ui)\bP\.\sCop','Pastoor Cop'),
    (500,'0',r'(?ui)\bH\.*\s*Conscience','Hendrik Conscience'),

    (500,'0',r'Pr. Daens','Priester Daens'),
    (500,'0',r"(?ui)\bGer\.*\s*Centrum\s*P\.*\s*Damiaan","Geriatrisch Centrum Pater Damiaan"),
    (500,'0',r'(?ui)\bP\.\s?Damiaan','Pater Damiaan'),
    (500,'0',r"(?ui)\bJ\.*\s*De\s*Block","Jozef De Block"),
    (500,'0',r"(?ui)\bF\.*\s*De\s*Laet","Frans De Laet"),
    (500,'0',r'A.Demanez','Albert Demanez'),
    (500,'0',r"(?ui)\bJ\.*\s*De\s*Doncker","Jan De Doncker"),
    (500,'0',r'(?ui)\bJ\.?\s*Dooghe','Julius Dooghe'),
    (500,'0',r'Jette De Greef','Guillaume De Greef'),
    (500,'0',r'(?ui)\bG\.*\s*De Kleermaekers','Guillaume De Kleermaekers'),
    (500,'0',r'(?ui)\bJ\.*\s*B\.*\s*Delcorte','Jules Delcorde'),
    (500,'0',r"(?ui)\bF\.*\s*Van\s*Dun","Frans van Dun"),
    (400,'0',r"(?ui)\bJ\.*P\.*\s*David","Jan Baptist David"),
    (500,'0',r"(?ui)\bJ\.*\s*David","Jan David"),
                     
    (500,'0',r'C.Everaart',' Cornelis Everaart'),
    (500,'1',r'Meerle Groot Eysel','Meerle Groot Eyssel'),
    
    (500,'0',r"(?ui)\bJ\.*\s*B\.*\s*Francis","Jan Baptist Francis"),

    (500,'0',r"(?ui)\bM\.*\s*Gandhi","Mahatma Gandhi"),
    (500,'0',r'(?ui)\bGebr\.','Gebroeders'),
    (500,'0',r"(?ui)\bW\.*\s*Geets","Willem Geets"),
    (500,'0',r'(?ui)\bG\.?\s*Gezelle','Guido Gezelle'),
    (500,'0',r"(?ui)\bN\.*Gombert","Nikolaas Gombert"),
    (500,'0',r"(?ui)\bG\.*\s*Le\s*Grelle","Gerard Le Grelle"),
    (500,'0',r'Ned. Grens','Nederlandse Grens'),
    (500,'0',r'(?ui)\bLod\.*\s*Guns','Louis Guns'),

    (500,'0',r'(?ui)P\.*\s*De\s*Halleux','Paul De Halleux'),
    (500,'0',r'(?ui)P\.*\s*Hens','Pater Hens'),
    (500,'0',r"(?ui)\bW\.*\s*Herreyns","Willem Herreyns"),
    (500,'0',r"La Houppe/d'Hoppe","La Houppe/D'Hoppe"),

    (500,'0',r"(?ui)D'Ieteren","D'Ieteren"),
                     
    (500,'0',r'(?ui)\bH\.?\s*Familie','Heilige Familie'),
    (500,'0',r'(?ui)\bH\.*\s*Hart','Heilig Hart'),
    (500,'0',r'O.L.V.-ten-Steen','Onze-Lieve-Vrouw-ten-Steen'),
    (500,'0',r'O.L.V.Middelareskerk','Onze-Lieve-Vrouw-Middelareskerk'),
    (500,'0',r'(?ui)M\.\s*Middelares','Maria Middelares'),
    (500,'0',r"(?ui)\bM\.*\s*Middelares\s*-\s*Maalte","Maria Middelares - Maalte"),
    (500,'0',r'Brugge O.L.V. Kerk','Brugge Onze-Lieve-Vrouwekerk'),
    (500,'0',r'(?ui)O\.*L\.*V\.*\s*College','Onze-Lieve-Vrouwcollege'),
    (500,'0',r'(?ui)O\.*L\.*V\.*\s*Dreef','Onze-Lieve-Vrouwdreef'),
    (500,'0',r'(?ui)O\.*L\.*V\.*\s*Plein','Onze-Lieve-Vrouwplein'),
    (500,'0',r'(?ui)O\.*L\.*V\.*\s*Straat','Onze-Lieve-Vrouwstraat'),
    (500,'0',r'(?ui)Onze\sLieve\sVrouw\sInstituut','Onze-Lieve-Vrouwinstituut'),
    (500,'0',r'(?ui)O\.*L\.*V\.*-*Ziekenhuis','Onze-Lieve-Vrouwziekenhuis'),
    (500,'0',r'(?ui)O\.*L\.*V\.*(\s|-)*K(?P<kerkkapel>erk|apel)',r'Onze-Lieve-Vrouwk\2'),
    (500,'0',r'(?ui)(\bO(nze)?\.*\s*-*L(ieve)?\.*\s*-*V(\.|rouw)*)-*','Onze-Lieve-Vrouw-'),
    (500,'0',r'(?ui)St\.*-Siméon','Saint-Siméon'),
    (500,'0',r'(?u)St\.*-\b','Sint-'),

    (500,'0',r'(?ui)\bK(on)*\.?\s*Astrid','Koningin Astrid'),
    (500,'0',r'(?ui)\bK(on)*\.?\s*Elisabeth','Koningin Elisabeth'),
    (500,'0',r'(?ui)\bJ\.?\s*Charlotte','Josephine Charlotte'),
    (500,'0',r'(?ui)Prinses\s*J\.*\s*Charlotte','Prinses Josephine Charlotte'),

    (500,'0',r'(?ui)Jachtl\.','Jachtlaan'),
    (500,'0',r'Jambede Bois','Jambe de Bois'),
    (500,'0',r"J. Jennes","Jozef Jennes"),
    (500,'0',r' Jh ',' Jeugdherberg '),

    (500,'0',r"(?ui)\bV\.*\s*Kegels","Victor Kegels"),
    (500,'0',r"(?ui)\bG\.*De\s*Kleermaekers","Guillaume De Kleermaekers"),

    (500,'0',r"(?ui)\bG\.*\s*Lambert","Guillaume Lambert"),
    (500,'0',r'E. Lauder','Estée Lauder'),
    (500,'0',r'Lokeren T Lavertje',"Lokeren 't Lavertje"),
    (500,'0',r'(?ui)K\.*\s*De\s*Lens','Karel Lodewijk de Lens'),
    (500,'0',r'(?ui)\bG\.*\s*Le\s*Grelle','Gerard Le Grelle'),
    (500,'0',r'O.Leuvense Baan','Oude Leuvense Baan'),
    (500,'0',r'(?ui)B.\s*Leysen','Bert Leysen'),

    (500,'0',r'Marie Joseplein','Marie-Joséplein'),
    (500,'0',r"(?ui)\bJ\.*de\s*Meeus","Graaf Joseph De Meeus"),
    (500,'0',r"Mesanges","Mésanges"),
    (500,'0',r"(?ui)\bC\.*Meunier","Constantin Meunier"),
    (500,'0',r"(?ui)\bL\.*\s*Mortelmans","Lodewijk Mortelmans"),
    (500,'0',r"(?ui)\bJ\.*\s*Moretus","Jules Moretus"),
    (500,'0',r"(?ui)\bE\.*\s*Moyaerts","Emile Moyaerts"),
    (500,'0',r'(?ui)H\.*Moeremans','Henri Moeremans'),
    (500,'0',r'(?ui)I\.*Meyskens','Isidoor Meyskens'),

    (500,'0',r"(?ui)\bL\.*Nantier","Leopold Nantier"),
    (500,'0',r"(?ui)\bGebr\.*\s*Naudts","Gebroeders Naudts"),
    (500,'0',r"(?ui)\bP\.*Nollekens","Pieter Nollekens"),
    (500,'0',r'(?ui)\bnr\.*\s(?P<number>\d+)',r' nummer \1 '),
    (500,'0',r'(?ui)\bnr\.*\s*(?P<naar>\w+)',r' naar \1 '),

    (500,'0',r"(?ui)\bMarg\.*v\.*Oostenrijk","Margaretha van Oostenrijk - Marguerite d'Autriche"),
    (500,'0',r'Opgeeistenlaan','Opgeëistenlaan'),

    (500,'0',r'(?ui)\bJ\.*\s*Palfijn','Jan Palfijn'),
    (500,'1',r'Dessel Perestraat','Dessel Peresstraat'),
    (500,'1',r'Ukkel Pijnbomenstraat','Ukkel Pijnbomenweg'),
    (500,'0',r"(?ui)\bLt\.*\s*Philippart","Luitenant Philippart"),
    (500,'0',r"(?ui)\bJ\.*\s*Posenaer","Jozef Posenaer"),
    (500,'0',r"(?ui)\bAug\.*Plovie","August Plovie"),

    (500,'0',r'(?u)\bReg\.','Regiment'),
    (500,'0',r'(?ui)\bJ\.*\s*Reusens','Jozef Reusens'),
    (500,'0',r"(?ui)\bT\.*\s*Reyn","Theofiel Reyn"),
    (500,'0',r'(?ui)\bRingl\.','Ringlaan'),
    (500,'0',r'K. Roeland','Klokke Roeland'),
    (500,'0',r"(?ui)\bA\.*\s*Rodenbach","Albrecht Rodenbach"),
    (500,'0',r'(?ui)\bF\.*\s*Roosevelt','Franklin Roosevelt'),
    (500,'0',r"(?ui)\bL\.*Ruelens","Leopold Ruelens"),

    (500,'0',r'(?ui)J\.*\s*.*?M\.*\s*Sabbe','Julius en Maurits Sabbe'),
    (500,'0',r'(?ui)\bM\.?\s*Sabbe','Maurits Sabbe'),
    (500,'0',r'(?ui)\bH\.*\s*Schoofs','Hendrik Schoofs'),
    (500,'0',r'(?ui)\bR\.*\s*Schuman',' Robert Schuman'),
    (500,'0',r'(?ui)Serg\.*\s*','Sergeant '),
    (500,'0',r'Sint-Pieters Station','Sint-Pietersstation'),
    (500,'0',r"(?ui)\bE\.*Sohie","Edgard Sohie"),
    (500,'0',r'(?ui)\bE\.*\s*Soudan','Eugène Soudan'),
    (500,'0',r"(?ui)\bP\.*\s*H\.*\s*Spaak","Paul Henri Spaak"),
    (500,'0',r"(?ui)\bH\.*\s*Stassen","Henri Stassen"),
    (500,'0',r"(?ui)\bJ\.*Stas","Jan Stas"),
    (500,'0',r'T Steppeke',"'t Steppeke"),
    (500,'0',r"(?ui)\bE\.*\s*Steurs","Eduard Steurs"),
    (500,'0',r"(?ui)\bG\.*\s*Stijnen","Gerardus Stijnen"),
    (500,'0',r"(?ui)\bJ\.*\s*Stobbaerts","Jan Stobbaerts"),
    (500,'0',r'Stijn Streuvelstraat','Stijn Streuvelsstraat'),
    (500,'0',r'(?u)S\.*\s*Streuvels','Stijn Streuvels'),

    (500,'0',r'(?ui)E\.*\s*Thieffry','Edmond Thieffry'),
    (500,'0',r'(?ui)Transf\.','Transformatorstation'),
    (500,'0',r'T. Tuts','Theo Tuts'),

    (500,'0',r'C.V.D. Bussche',' Camiel Van den Bussche'),
    (500,'0',r'(?ui)E\.*Vander\s*Steenenstr\.*','Emile Vandersteenen'),
    (500,'0',r'(?ui)J\.*Vanderstraetenstr\.*','Jan Vanderstraeten'),
    (500,'0',r'(?ui)D\.*\s*Vander\s*Vaeren','Désiré Vander Vaeren'),
    (500,'0',r'E. Van Der Velde','Emiel Van Der Velde'),
    (500,'0',r'Vandewielelaan','Gomar Vandewielelaan'),
    (500,'0',r'(?ui)\bK\.?\s*V\.?\s*Mander','Karel Van Mander'),
    (500,'0',r'(?ui)\bJ\.*\s*Van\s*Rijswijck','Jan Van Rijswijck'),
    (500,'0',r'(?ui)A\.*\s*Vesalius','Andreas Vesalius'),
    (500,'0',r'(?ui)\bR\.*\s*Veremanss*','Renaat Veremanss'),
    (500,'0',r"(?ui)\bF\.*\s*Verbiest","Ferdinand Verbiest"),
    (500,'0',r'(?ui)Fr\.*\s*De\s*Vriendt','Frans De Vriendt'),

    (500,'0',r'Eglise Saint-Walburge','Église Sainte-Walburge'),
    (500,'0',r'Wilsele Wijgm.stwg.','Wilsele Wijgmaalsesteenweg'),
    #(r'(?ui)\bWijgm\.','Wijgmaal'),
    (500,'0',r'J. De Wilde','Jean De Wilde'),
    (500,'0',r"(?ui)\bK\.*\s*De\s*Wint","Karel De Wint"),
    (500,'0',r'(?ui)\bW\.*\s*Wood','William Wood'),
    (500,'0',r'Industriezone Bl Toren','Industriezone Blauwe Toren'),
    (500,'0',r"(?ui)\bA\.*Blieck","Albert Blieck"),
    (500,'0',r"(?ui)\bTh\.*Van\s*Loo","Thomas Van Loo"),
    (500,'0',r"(?ui)\bAug\.*Plovie","August Plovie"),
    (500,'0',r"(?ui)\bHotel\s*De\s*Ville","Hôtel De Ville"),
    (500,'0',r"(?ui)\bVti\s","VTI"),
    #(r"(?ui)\bPark\s*De\s*Rode\s*Poort","Park de Rode Poort"),
    #r"(?ui)\bBaron\s*De\s*Serret","Baron de Serret"),
    # (r"(?ui)\bCoq\s*Anglais","anglais"),
    (500,'0',r"(?ui)\bA\.*\s*Rodenbach","Albrecht Rodenbach"),
    (500,'0',r"Fort Van Beieren","Fort van Beieren"),
    (500,'0',r"(?ui)\bBurg\.*\s*Capelle","Burgemeester Capelle"),
    (500,'0',r"(?ui)\bG\.*\s*Gezelle","Guido Gezelle"),
    (500,'0',r"(?ui)\bD'Alcantara","d'Alcantara"),
    (500,'0',r"(?ui)\b/\s*Vloesberg\s*La\s*Houppe/D'Hoppe","/ Vloesberg La Houppe/D'Hoppe"),
    (500,'0',r"(?ui)\bPC\s*St-Jan-Baptist","PC St-Jan-Baptist"),
    (500,'0',r"(?ui)\bT'Jonge","'t Jonge"),
    (500,'0',r"(?ui)\bInst\.*\s*Scheppers","Instituut Scheppers"),
    (500,'0',r"(?ui)\bL\.*P\.*\s*Boon","L.P. Boon"),
    (500,'0',r"(?ui)\bCC\b","Cultureel Centrum"),
    (500,'0',r"(?ui)\bKon\.*\s*Tech\.*\s*Atheneum\b","Koninklijk Technisch Atheneum"),
    (500,'0',r"(?ui)\bOcmw\s*/\s*Koevliet","OCMW / Koevliet"),
    (500,'0',r"(?ui)\bBurg\.*Van\s*Ackerwijk","Burgemeester Van Ackerwijk"),
    (500,'0',r"(?ui)\bC\.*\s*Verhavert","Cypriaan Verhavert"),
    (500,'0',r"(?ui)\bSt-Kornelis","Sint-Kornelis"),
    (500,'0',r"(?ui)\bV\.*\s*Kegels","Victor Kegels"),
    (500,'0',r"(?ui)\bF\.*\s*De\s*Laet","Frans De Laet"),
    (500,'0',r"(?ui)\bd'\s*Hand","d'Hand"),
    (500,'0',r"(?ui)\bJ\.*\s*De\s*Doncker","Jan De Doncker"),
    (500,'0',r"(?ui)\bBesch\.*Werkplaats","Beschermde Werkplaats"),
    (500,'0',r"(?ui)\bE\.*\s*Vandervelde","Emile Vandervelde"),
    (500,'0',r"(?ui)\bJ\.*\s*De\s*Block","Jozef De Block"),
    (500,'0',r"(?ui)\bH\.*\s*Vos","Herman Vos"),
    (500,'0',r"(?ui)\bE\.*\s*Vandervelde","Emile Vandervelde"),
    (500,'0',r"(?ui)\bJ\.*\s*Stobbaerts","Jan Stobbaerts"),
    (500,'0',r"(?ui)\bJ\.*\s*David","Jan David"),
    (500,'0',r"(?ui)\b\s*Dorp"," Dorp"),
    (500,'0',r"(?ui)\bKon\.*\s*Astrid","Kon. Astrid"),
    (500,'0',r"(?ui)\bJ\.*\s*B\.*\s*Francis","Jan Baptist Francis"),
    (500,'0',r"(?ui)\bKon\.*\s*Elisabeth","Koningin Elisabeth"),
    (500,'0',r"(?ui)\bLt\.*\s*Philippart","Luitenant Philippart"),
    (500,'0',r"(?ui)\bO\.*\s*Van\s*Kesbeeck","Oscar Van Kesbeeck"),
    (500,'0',r"(?ui)\bR\.*\s*Verbeeck","René Verbeeck"),
    (500,'0',r"(?ui)\bE\.*Sohie","Edgard Sohie"),
    (500,'0',r"(?ui)\bW\.*\s*Geets","Willem Geets"),
    (500,'0',r"(?ui)\bM\.*\s*Gandhi","Mahatma Gandhi"),
    (500,'0',r"(?ui)\bJ\.*de\s*Meeus","Graaf Joseph De Meeus"),
    (500,'0',r"Stwg.Tervuren /Stwg.Vilvoorde","Steenweg Tervuren/Steenweg Vilvoorde"),
    (500,'0',r"(?ui)\bJ\.*F\.*\s*Willems","Jan Frans Willems"),
    (500,'0',r"(?ui)\bStg\.*op\s*Brussel/Albert","Steenweg op Brussel/Albert"),
    (500,'0',r"(?ui)\bTh\.*\s*V\.*\s*Cauwenberghs","Theofiel Van Cauwenberghs"),
    (500,'0',r"(?ui)\bJ\.*\s*Jennes","Jozef Jennes"),
    (500,'0',r"(?ui)\bL\.*Nantier","Leopold Nantier"),
    (500,'0',r"(?ui)\bJ\.*\s*Van\s*Geel","Jan Frans van Geel"),
    (500,'0',r"(?ui)\bJachtl\.*/Duisburgsestg","Jachtlaan/Duisburgsesteenweg"),
    (500,'0',r"(?ui)\bCl\.*\s*Vanophem","Clement Vanophem"),
    (500,'0',r"(?ui)\bP\.*\s*Benoit","Peter Benoit"),
    (500,'0',r"(?ui)\bE\.*Carels","Emile Carels"),
    (500,'0',r"(?ui)\bP\.*\s*Benoit","Peter Benoit"),
    (500,'0',r"(?ui)\bChem\.*\s*d\s*Angoussart","Chemin d Angoussart"),
    (500,'0',r"(?ui)\bVan\s*Den\s*Nest","Van den Nest"),
    (500,'0',r"(?ui)\bRue\s*Th\.*Piat","Rue Théophile Piat"),
    (500,'0',r"(?ui)\bAv\.*\s","Avenue "),
    (500,'0',r"(?ui)\bSt-Bernardus","Sint-Bernardus"),
    (500,'0',r"(?ui)\bP\.*Nollekens","Pieter Nollekens"),
    (500,'0',r"(?ui)\bL\.*Wouters","Louis Wouters"),
    (500,'0',r"(?ui)\bPnstr\.*/Leuvensebn","Pleinstraat/Leuvensebaan"),
    #(r"(?ui)\bIndustr\.*\s*Interleuven","Industr. Interleuven"),
    (500,'0',r"(?ui)\bL\.*Ruelens","Leopold Ruelens"),
    (500,'0',r"(?ui)\bKerk\s*(St-Guibertus)","Kerk (Sint-Guibertus)"),
    (500,'0',r"(?ui)\bC\.*\s*Vissenaeken","Cornelius Vissenaekens"),
    #(r"(?ui)\bP\.*\s*Van\s*Lommel","P. van Lommel"),
    (500,'0',r"(?ui)\bKne\s*Vleminckxstr\.*","Kleine Vleminckxstraat"),
    (500,'0',r"(?ui)\bE\.*\s*Vandervelde","Emile Vandervelde"),
    (500,'0',r"(?ui)\bE\.*\s*Van\s*Der\s*Velde","Emiel Van Der Velde"),
    (500,'0',r"(?ui)\bJ\.*\s*Van\s*Geel","Jan Frans van Geel"),
    (500,'0',r'(?ui)\bJ\.?\s*Van Hoof','Jef Van Hoof'),
    (500,'0',r"(?ui)\bO\.*\s*Van\s*Kesbeeck","Oscar Van Kesbeeck"),
    (500,'0',r"(?ui)\bTh\.*Van\s*Loo","Thomas Van Loo"),
    (500,'0',r"(?ui)\bBurg\.*\s*Van\s*Nueten","Burgemeester Van Nueten"),
    (500,'0',r"(?ui)\bCl\.*\s*Vanophem","Clement Vanophem"),
    (500,'0',r"(?ui)\bL\.*\s*Van\s*Bercken","Lodewijk Van Bercken"),
    (500,'0',r"(?ui)\bJ\.*\s*Van\s*Rijswijck","Jan Van Rijswijck"),
    (500,'0',r"(?ui)\bK\.*\s*Van\s*De\s*Woestijne","Karel Van de Woestijne"),
    (500,'0',r"(?ui)\bC\.*\s*Verhavert","Cypriaan Verhavert"),
    (500,'0',r"(?ui)\bR\.*\s*Verbeeck","René Verbeeck"),
    (500,'0',r"(?ui)\bKol\.*Veteranensquare","Koloniale Veteranen - Vétérans Coloniaux"),
    (500,'0',r"(?ui)\bC\.*\s*Vissenaeken","Cornelius Vissenaekens"),
    (500,'0',r"(?ui)\bPl\.*\s*Vondel","Plaats Vondel"),
    (500,'0',r"(?ui)\bH\.*\s*Vos","Herman Vos"),
    (500,'0',r"(?ui)J\.*\s*De\s*Wilde","Jean De Wilde"),
    (500,'0',r"(?ui)\bJ\.*F\.*\s*Willems","Jan Frans Willems"),
    (500,'0',r"(?ui)\bL\.*Wouters","Louis Wouters"),
    #(r"(?ui)\bTC\s*Laakdal","TC Laakdal"),
    #(r"(?ui)\bEXC\s*Vorst","EXC Vorst"),
    (500,'0',r"(?ui)\bE\.*\s*Moyaerts","Emile Moyaerts"),
    (500,'0',r"(?ui)\bE\.*\s*Beauduin","Emile Beauduin"),
    (500,'0',r"(?ui)\bProvinciaal\s*Natuurcentrum","Bosbiologisch Centrum"),
    (500,'0',r'Leuven Naamse Poort','Leuven Naamsepoort'),
    (500,'0',r'Leuven Tiense Poort','Leuven Tiensepoort'),
    (500,'0',r'Leuven Tervuurse Poort','Leuven Tervuursepoort'),


    (999,'0',r'(?i)\s+',' '),
    #(r'(?ui)\b',''),
    #(r'',''),

   ]
def nameconversion(identifier,zone):
    name=identifier
    for order, final, short,long in commonabbreviations:
        #print(zone + '  ' + name + '  ' + short + '  ' + long)
        origname=name
        if short[0:2]=='(?':
            name=re.sub(short,long,name)
        else:
            name=name.replace(short,long)
        if final and origname!=name: continue
    #if 'rouw' in name: print (name)
    #if 'Sabbe' in name: print(zone + '  ' + name)
    if not(leaveCityNameAnywayRE.search(name)) and not(zone in ['80','68']) and not('Sas van Gent'  in name):
        #zone 80: Vorst bij Veerle, zone 68: Deurne bij Diest, zone 31: Sas van Gent
        for city, replacement in citynamesToOmit:
            if city in name:
                name = name.replace(city+' ',replacement).strip()
                continue
    return name

def xmlsafe(name):
    return name.replace('&','&amp;').replace("'","&apos;")

def urlsafe(name):
    return xmlsafe(name).replace(' ','%20')

We start with an XML file which can be read by JOSM. The resulting file should not be uploaded directly to the server. Each and every stop needs to be vetted and double checked and dragged to a suitable position before uploading.

#!/bin/python
# -*- coding: utf-8 -*-
import postgresql, re, delijnosmlib

db = postgresql.open('pq://Jo:tttttt@localhost:5432/DL')
DeLijnStops = db.prepare("""SELECT geomdl,
                                       stopidentifier, 
                                       description,
                                       street,
                                       municipality,
                                       route_ref,
                                       OSM_zone,
                                       OSM_name,
                                       OSM_node_ID,
                                       round(ST_X(geomdl)::numeric, 6) AS lon,
                                       round(ST_Y(geomdl)::numeric, 6) AS lat
                                  FROM stops 
                                  WHERE description !~* 'dummy|afgeschaft'
                                  ORDER BY  geomdl;""")

def main():
    with open('C:/Data/De Lijn/Haltes De Lijndata.osm', mode='w', encoding='utf-8') as osmfile:
        osmfile.write("<?xml version='1.0' encoding='UTF-8'?>\n")
        osmfile.write('<osm version="0.6" upload="no" generator="Python script"> \n')
        stopslist = DeLijnStops()
        identifiers = {}
        for row in stopslist:
            description = str(row['description'])
            osmname = str(row['osm_name'])
            name=delijnosmlib.nameconversion((description),str(row['osm_zone']))
            if not(name in identifiers) and osmname != name and (' ' in osmname) and (' ' in name) and (str(row['osm_zone']) not in ('01','20')) and not ('erron' in name) and not ('oeren' in name) and not ('Dokter' in name) and not (' nummer' in name) and not (' naar ' in name) and not ('École' in name) and not ('steenweg' in name) and not ('Goor' in name) and not ('str.' in name) and not ('Ziekenhuis' in name) and not ('Koninklijk' in name) and not ('Ernest Claes' in name) and not ('Heilig' in name) and not ('Lieve-Vrouw' in name) and not ('Siméon' in name) and not ('Streuvels' in name) and not ('Moelingen' in name) and not ("'t" in name) and not ("'s" in name):
                identifiers[name]=''
                #print (description + ' -> ' + osmname + ' -> ' + name)
                print(r'''    (r"(?ui)\b''' + description.split(' ',1)[1].replace(' ','\s*').replace('.','\.*').replace('straat','').replace('laan','').replace('plein','').replace('lei','') + '''","''' + osmname.split(' ',1)[1].replace("''","'").replace('straat','').replace('laan','').replace('plein','').replace('lei','') + '''"),''')

            name=delijnosmlib.xmlsafe(name)
            ref=str(row['stopidentifier'])
            street = city = city2 = route_ref = ''
            if row['street']: street=delijnosmlib.xmlsafe(str(row['street']))
            if row['municipality']: city=delijnosmlib.xmlsafe(str(row['municipality']))
            lat=str(row['lat'])
            lon=str(row['lon'])
            if row['route_ref']: route_ref=delijnosmlib.xmlsafe(row['route_ref'])

            osmfile.write("  <node id='-" + ref + "' visible='true' lat='" + lat + "' lon='" + lon + "' timestamp='2011-03-09T00:36:24Z' >" + '\n')
            osmfile.write('    <tag k="highway" v="bus_stop" />' + "\n")
            if not '"' in name:
                osmfile.write('    <tag k="name" v="' + name + '" />' + "\n")
            else:
                osmfile.write("    <tag k='name' v='" + name + "' />" + '\n')
            osmfile.write('    <tag k="ref" v="' + ref +'" />' + "\n")
            if street or city:
                osmfile.write('    <tag k="created_by" v="' + street + ' ' + city + '" />' + "\n")
            #print( row)
            if row['osm_zone']:
                osmfile.write('    <tag k="zone" v="' + row['osm_zone'] + '" />' + "\n")
            if not(row['osm_node_id']):
                osmfile.write('    <tag k="odbl" v="' + 'new' + '" />' + "\n")
            osmfile.write('    <tag k="operator" v="De Lijn" />' + "\n")

            if route_ref:
                osmfile.write('    <tag k="route_ref" v="' + route_ref + '" />' + "\n")
            osmfile.write('  </node>' + "\n")
        osmfile.write('</osm>')

if __name__ == "__main__":
    main()

Feedback to De Lijn about stops which are more than a certain distance from what is in their DB

To create the report which can be found on this page:

WikiProject_Belgium/Bus_stops

#!/bin/python
# -*- coding: utf-8 -*-
import postgresql, datetime, delijnosmlib, re
from urllib.parse import urlencode

feestbusRE=re.compile(r'''(F\d+)''')
feestbusREsub=re.compile(r'''(?:;)?F\d+(;)?''')
ignorevandeRE=re.compile(r'''(?u)\s((?:[Vv]an)*\s*(?:[Oo]p)*\s*(?:[Dd]e(r|n)*)*)\s''')
db = postgresql.open('pq://Jo:tttttt@localhost:5432/DL')
locationdiffers = db.prepare("""SELECT round(ST_Distance_Sphere(geomdl, geomosm)),
                                       stopidentifier, 
                                       description,
                                       osm_name,
                                       street,
                                       osm_street,
                                       osm_city,
                                       route_ref, 
                                       osm_route_ref,
                                       osm_zone,
                                       osm_node_id,
                                       osm_last_modified_by_user,
                                       now() - osm_last_modified_timestamp AS dayswithoutchanges,
                                       round(ST_X(ST_Centroid(ST_ShortestLine(geomdl, geomosm)))::numeric, 3) AS x,
                                       round(ST_Y(ST_Centroid(ST_ShortestLine(geomdl, geomosm)))::numeric, 3) AS y,
                                       round(ST_X(geomosm)::numeric, 3) AS lon,
                                       round(ST_Y(geomosm)::numeric, 3) AS lat
                                  FROM stops 
                                  WHERE ST_Distance_Sphere(geomdl, geomosm) > 35.0
                                  ORDER BY ST_Distance_Sphere(geomdl, geomosm) DESC;""")

otherdifferences = db.prepare("""SELECT stopidentifier, 
                                       description,
                                       osm_name,
                                       street,
                                       osm_street,
                                       osm_city,
                                       route_ref, 
                                       osm_route_ref,
                                       osm_zone,
                                       osm_node_id,
                                       osm_last_modified_by_user,
                                       now() - osm_last_modified_timestamp AS dayswithoutchanges,
                                       round(ST_X(geomosm)::numeric, 3) AS lon,
                                       round(ST_Y(geomosm)::numeric, 3) AS lat
                                  FROM stops 
                                  WHERE description != osm_name OR route_ref != osm_route_ref -- OR description LIKE '%.%'
                                  ORDER BY osm_zone DESC, stopidentifier;""")


def main():
    targetFileName = 'C:/Data/De Lijn/WikiReport.txt'
    with open(targetFileName, mode='w', encoding='utf-8') as wikifile:
        wikifile.write('''
Instructions on how this list was created can be found here:

http://wiki.openstreetmap.org/w/index.php?title=WikiProject_Belgium/De_Lijndata#Feedback_to_De_Lijn_about_stops_which_are_more_than_a_certain_distance_from_what_is_in_their_DB

==Stops which differ in location==
{| class="wikitable" align="left" style="margin:0 0 2em 2em;"
|-
|+De Lijn Haltes
|-
!Afstand
!Nummer
!Haltenaam
!osm_name
!Straat
!osm_straat
!osm_stad
!Bediende lijnen
!osm_route_ref
!osm_zone
''')
        stopslist = locationdiffers()
        i=1
        while i< len(stopslist):
            j=i+1
            #print(len(stopslist[j:]))
            while j< len(stopslist[j:])+i:
                #print (stopslist[i]['osm_name'], stopslist[j], stopslist[j]['osm_name'])
                if stopslist[i]['description'] and stopslist[j]['description'] and stopslist[i]['description'][:16]==stopslist[j]['description'][:16]:
                    print('flipping for ' + stopslist[j]['osm_name'] + ' ' + str(i) + ' ' + str(j))
                    stopslist.insert(i+1, stopslist[j])
                    del stopslist[j+1]
                    i += 1
                    break
                j += 1
            i += 1
        for row in stopslist:
            wikifile.write('|-\r\n')
            josmRClink = '' ;tags2add = '&addtags='
            print (row['dayswithoutchanges'])
            if not(row['osm_last_modified_by_user']=='Polyglot')  or row['dayswithoutchanges'] > datetime.timedelta(30):
                if row['route_ref'] != row['osm_route_ref']:
                    tags2add += 'route_ref=' + str(row['route_ref']) + '|'
                if row['description'] != row['osm_name']:
                    #print ('test' + str(row['description']))
                    #for c in str(row['description']):
                    #    print(repr(c), ord(c))
                    tags2add += "name=" + str(row['description']).replace(' ','%20') + '|'
                if row['osm_street']: tags2add += "addr:street=|"
                if row['osm_city']: tags2add += "addr:city=|"
                josmRClink = '[http://localhost:8111/load_and_zoom?left=' + str(float(row['lon']) - 0.01) + '&right=' + str(float(row['lon']) + 0.01) + '&top=' + str(float(row['lat']) + 0.005) + '&bottom=' + str(float(row['lat']) - 0.005) + '&select=node' + str(row['osm_node_id']) + tags2add + ' ' + str(row['stopidentifier']) + ']'
            else:
                josmRClink = str(row['stopidentifier'])
            wikifile.write('|align="right" | [http://tools.geofabrik.de/mc/?mt0=mapnik&mt1=googlemap&lon=' + str(row['x']) + '&lat=' + str(row['y']) + '&zoom=18 ' + str(int(row['round'])) + ']m||align="right" |' + josmRClink + '||align="right" | ' + str(row['description']) + '||align="right" | ' + str(row['osm_name']) + '||align="right" | ' + str(row['street']) + '||align="right" | ' + str(row['osm_street']) + '||align="right" | ' + str(row['osm_city']) + '||align="right" | ' + str(row['route_ref']) + '||align="right" | ' + str(row['osm_route_ref']) + '||align="right" | ' + str(row['osm_zone']) + '\r\n')

        stopslist = otherdifferences()
        print ('query ready')
        # i=1
        # while i< len(stopslist):
            # j=i+1
            # #print(len(stopslist[j:]))
            # while j< len(stopslist[j:])+i:
                # #print (stopslist[i]['osm_name'], stopslist[j], stopslist[j]['osm_name'])
                # if stopslist[i]['description'] and stopslist[j]['description'] and stopslist[i]['description'][:16]==stopslist[j]['description'][:16]:
                    # #print('flipping for ' + stopslist[j]['osm_name'] + ' ' + str(i) + ' ' + str(j))
                    # stopslist.insert(i+1, stopslist[j])
                    # del stopslist[j+1]
                    # i += 1
                    # break
                # j += 1
            # i += 1
        print ('rows sorted')
        wikifile.write('''|-\r\n|}\r\n{| class="wikitable" align="left" style="margin:0 0 2em 2em;"
|-
|+De Lijn Haltes
|-
!Nummer
!Haltenaam<br/>osm_name
!osm_zone
!Bediende lijnen<br/>!osm_route_ref
''')
        identifiers = {}
        for row in stopslist:
            josmRClink = '' ;tags2add = '&addtags='; route_ref=''
            #print (row['dayswithoutchanges'])
            description = delijnosmlib.nameconversion(str(row['description']),'')
            name= str(row['osm_name']).replace("''","'")
            #if '.'  in str(row['description']):
            if not(name in identifiers) and str(row['description']) != name and (' ' in str(row['description'])) and '.' in str(row['description']) and not('.' in name) and not ('erron' in name) and not ('oeren' in name) and not ('Dokter' in name) and not (' nummer' in name) and not (' naar ' in name) and not ('École' in name) and not ('steenweg' in name) and not ('Goor' in name) and not ('str.' in name) and not ('Ziekenhuis' in name) and not ('Koninklijk' in name) and not ('Ernest Claes' in name) and not ('Heilig' in name) and not ('Lieve-Vrouw' in name) and not ('Siméon' in name) and not ('Streuvels' in name) and not ('Moelingen' in name) and not ("'t" in name) and not ("'s" in name):
                identifiers[name]=''

                print(r'''    (r"(?ui)\b''' + str(row['description']).split(' ',1)[1].replace(' ','\s*').replace('.','\.*').replace('straat','').replace('laan','').replace('plein','').replace('lei','') + '''","''' + name.replace("''","'").replace('straat','').replace('laan','').replace('plein','').replace('lei','') + '''"),''')
            name_different = route_ref_different = False
            if row['route_ref']:
                route_ref=feestbusREsub.sub(' ',str(row['osm_route_ref']),10).strip()
                #if 'erron 5' in name: print (name + ' ' + route_ref + ' ' + str(row['route_ref']))
                if str(row['route_ref'])!= route_ref: route_ref_different = True
                #if 'erron 5' in name: print (route_ref_different)
            if ignorevandeRE.sub(' ',name,0)!=ignorevandeRE.sub(' ',description,0): name_different = True

            if name_different: #or route_ref_different:
                tags2add += 'route_ref=' + str(row['route_ref']) + '|'
                tags2add += "name=" + delijnosmlib.urlsafe(description) + '|'
                if row['osm_street']: tags2add += "addr:street=|"
                if row['osm_city']: tags2add += "addr:city=|"
                tags2add += "addr:country=|"
                tags2add += "addr:postcode=|"
                tags2add += "source=|"
                josmRClink = '[http://localhost:8111/load_and_zoom?left=' + str(round(float(row['lon']) - 0.0025,3)) + '&right=' + str(round(float(row['lon']) + 0.0025,3)) + '&top=' + str(round(float(row['lat']) + 0.0025,3)) + '&bottom=' + str(round(float(row['lat']) - 0.0025,3)) + '&select=node' + str(row['osm_node_id']) + tags2add + ' ' + str(row['stopidentifier']) + ']'
                wikifile.write('|-\r\n')
                wikifile.write('|align="right" | ' + josmRClink + '||align="right" | ' + str(row['description']) + '<br/>' + name  + '<br/>' + description + '||align="right" | ' + str(row['osm_zone']) + '||align="right" | ' + str(row['osm_route_ref']) + '<br/>' + route_ref + '<br/>' + str(row['route_ref']) + '\r\n')
            
        wikifile.write('|-\r\n|}\r\n')
		
if __name__ == "__main__":
    main()

The script was extended to create a report on stops for which the names or the route_ref differ between what was calculated and what is on Openstreetmap.

Work with the data

Creation of a route relation containing all the stops in the correct order

Adding stops is all very well, but they're only a building block of the routes those buses follow. Before it was very time consuming to create those routes. When all the stops and how they are related are in a database, it becomes possible to extract them in sequence.


#!/bin/python
# -*- coding: utf-8 -*-
import postgresql, random, re
from urllib.parse import urlencode

removePerronRE=re.compile(r"""(?xiu)
                              (?P<name>[\s*\S]+?)
                              (?P<perron>\s*perron\s*\d*)?
                              $
					       """) # case insensitive search removing Perron #

db = postgresql.open('pq://Jo:tttttt@localhost:5432/DL')
routeidentifiersQUERY = db.prepare("""  SELECT DISTINCT
                                          rte.routeidentifier, rte.routedescription, rte.routepublicidentifier, rte.routeversion, rte.routeid
                                        FROM public.routes   rte
                                        WHERE
                                          rte.routepublicidentifier = $1
                                        ORDER BY
                                          rte.routeidentifier;""")

tripids = db.prepare("""                SELECT DISTINCT
                                          trp.tripid,
                                          rte.routeservicetype AS type,
                                          rte.routeservicemode AS bustram,
                                          rte.routedescription AS routedescription,
                                          (SELECT 
                                              st.description
                                              FROM 
                                                public.stops st
                                              JOIN public.segments seg1 ON seg1.stopid = st.stopid AND seg1.tripid = trp.tripid
                                              WHERE 
                                                seg1.segmentsequence = (SELECT MIN(seg2.segmentsequence) FROM public.segments seg2 WHERE seg2.tripid = trp.tripid)) AS fromstop,
                                          (SELECT 
                                              st.description
                                              FROM 
                                                public.stops st
                                              JOIN public.segments seg1 ON seg1.stopid = st.stopid AND seg1.tripid = trp.tripid
                                              WHERE 
                                                seg1.segmentsequence = (SELECT MAX(seg2.segmentsequence) FROM public.segments seg2 WHERE seg2.tripid = trp.tripid)) AS tostop
                                        FROM public.trips    trp
                                        JOIN public.routes   rte      ON rte.routeid=trp.routeid
                                        JOIN public.segments seg      ON seg.tripid=trp.tripid
                                        JOIN public.stops    stp      ON seg.stopid=stp.stopid
                                        WHERE
                                          rte.routeidentifier = $1;""")

nodeIDsofStops = db.prepare("""         SELECT DISTINCT
                                          stp.osm_node_id,
                                          stp.description,
                                          stp.stopidentifier,
                                          trp.tripstart,
                                          seg.segmentsequence
                                        FROM public.trips    trp
                                        JOIN public.routes   rte      ON rte.routeid=trp.routeid
                                        JOIN public.segments seg      ON seg.tripid=trp.tripid
                                        JOIN public.stops    stp      ON seg.stopid=stp.stopid
                                                                      AND stp.description !~* 'dummy|afgeschaft'
                                        WHERE
                                          trp.tripid = $1
                                        ORDER BY
                                          trp.tripstart ASC,
                                          seg.segmentsequence ASC;""")

def main():
    pub_ID=input('Enter line number(s) you want to create OSM route relations for: ')
    if ',' in pub_ID:
        for id in pub_ID.split(','):
            print(id)
            processRoute(id,'*')
    else:
        processRoute(pub_ID,'3214')

def processRoute(id,fn):
    routeidentifiers = routeidentifiersQUERY(id)
    if len(routeidentifiers)<2:
        print(routeidentifiers)
        print("Auto selecting: %s %s Version %s (%s)", routeidentifiers[0][2], routeidentifiers[0][1], routeidentifiers[0][3], routeidentifiers[0][4])
        line=routeidentifiers[0][0]
    else:
        for i,route in enumerate(routeidentifiers):
            print(i+1,route[0], route[1])
        selected=input('Select a line: ')
        line=routeidentifiers[int(selected)-1][0]
    if fn == '*': fn = line
    targetFileName = 'C:/Data/De Lijn/RoutesFor' + fn + '.osm'
    distinctroutes = {}
    print ("Calculating stop lists for:  " + line)
    with open(targetFileName, mode='w', encoding='utf-8') as osmroutesfile:
        #print(tripids.string)
        tripslist = tripids(line)
        #print(tripslist)
        stopnames = {}
        for row in tripslist:
            #print(row)
            stops_as_string = ','
            stopslist = nodeIDsofStops(row['tripid'])
            for stop in stopslist:
                #print(stop)
                stopnames[stop[0]] = stop[1]
                #print(stops_as_string)
                #print(stop[0], stops_as_string.split(',')[-1], stop[1])
                if stop[0]:
                    if stop[0] != stops_as_string.split(',')[-2]:
                        stops_as_string += stop[0] + ','
                else:
                    stops_as_string += '"' + stop[1] + '",'
            stops_as_string = stops_as_string[1:-1]
            notfound=True
            for sequence in distinctroutes.keys():
                notfound=True
                if len(stops_as_string)<len(sequence) and stops_as_string in sequence: notfound=False; break
                if len(sequence)<len(stops_as_string) and sequence in stops_as_string:
                    del distinctroutes[sequence]
                    break
            if notfound: distinctroutes[stops_as_string] = [row['fromstop'],row['tostop'],row['type'],row['bustram']]
        #print(distinctrouteslist)
        osmroutesfile.write("<?xml version='1.0' encoding='UTF-8'?>\r")
        osmroutesfile.write("<osm version='0.6' upload='true' generator='Python'>\r")
        i=1; routeslist = []
        for stopssequence in distinctroutes:
            fromstop,tostop,type,bustram = distinctroutes[stopssequence]
            madeUpId = str(random.randint(100000, 900000))
            routeslist.append(madeUpId)
            osmroutesfile.write("<relation id='-" + madeUpId + "' timestamp='2013-02-13T03:23:07Z' visible='true' version='1'>\r")
            print('\n' + str(i) + "  " + id + " " + fromstop + " - " + tostop)
            for osmstopID in stopssequence.split(','):
                #print ('osmstopID: ' + osmstopID)
                if osmstopID[0] == '"':
                    osmroutesfile.write("  <member type='node' ref='" + stopssequence.split(',')[0] + "' role='" + osmstopID + "'/>\r")
                    print('                                 ' + osmstopID + ' MISSING!!!!!!!!!!!!!')
                else:
                    osmroutesfile.write("  <member type='node' ref='" + osmstopID + "' role='platform'/>\r")
                    print('  ' + stopnames[osmstopID])
            osmroutesfile.write('''  <tag k="type" v="route" />\r''')
            osmroutesfile.write('''  <tag k="odbl" v="tttttt" />\r''')
            print (bustram)
            if int(bustram)==1:
                osmroutesfile.write('''  <tag k="route" v="tram" />\r''')
            else:
                osmroutesfile.write('''  <tag k="route" v="bus" />\r''')
            #print(fromstop, tostop)
            #print(re.search(removePerronRE,tostop).group(1))
            #print(re.search(removePerronRE,fromstop).group(1))
			
            osmroutesfile.write('''  <tag k="name" v="De Lijn ''' + id + ''' ''' + re.search(removePerronRE,fromstop).group('name') + ''' - ''' + re.search(removePerronRE,tostop).group('name') +'''" />\r''')
            osmroutesfile.write('''  <tag k="ref" v="''' + id + '''" />\r''')
            #osmroutesfile.write('''  <tag k="ref:De_Lijn" v="''' + line + '''" />\r''')
            osmroutesfile.write('''  <tag k="from" v="''' + fromstop + '''" />\r''')
            osmroutesfile.write('''  <tag k="to" v="''' + tostop + '''" />\r''')
            osmroutesfile.write('''  <tag k="operator" v="De Lijn" />\r''')
            servicetypes =  ['regular','express','school','special','special','belbus']
            servicetypesOSM=['',       'express','school','',       '',       'on_demand']
            #servicetype=servicetypes[int(type)]
            if servicetypesOSM[int(type)]:
                osmroutesfile.write('''  <tag k="bus" v="''' + servicetypesOSM[int(type)] + '''" />\r''')
            osmroutesfile.write('''</relation>\r\r''')
            i+=1
        osmroutesfile.write("<relation id='-" + str(random.randint(100000, 900000)) + "' timestamp='2013-02-13T03:23:07Z' visible='true' version='1'>\r")
        osmroutesfile.write('''  <tag k="type" v="route_master" />\r''')
        try:
            if int(bustram)==1:
                osmroutesfile.write('''  <tag k="route_master" v="tram" />\r''')
            else:
                osmroutesfile.write('''  <tag k="route_master" v="bus" />\r''')
        except NameError:
            pass
        osmroutesfile.write('''  <tag k="name" v="''' + row['routedescription'] +'''" />\r''')
        osmroutesfile.write('''  <tag k="ref" v="''' + id + '''" />\r''')
        osmroutesfile.write('''  <tag k="ref:De_Lijn" v="''' + line + '''" />\r''')
        osmroutesfile.write('''  <tag k="operator" v="De Lijn" />\r''')
        try:
            if servicetypesOSM[int(type)]:
                osmroutesfile.write('''  <tag k="bus" v="''' + servicetypesOSM[int(type)] + '''" />\r''')
        except NameError:
            pass
        for routeId in routeslist:
            osmroutesfile.write("  <member type='relation' ref='-" + routeId + "' role=''/>\r")
            
        osmroutesfile.write('''</relation>\r\r''')
        
        osmroutesfile.write("</osm>\r")


if __name__ == "__main__":
    main()

Adding the ways nearest to the stops in above route relations automatically

Having correct sequences of stops is a tremendous help, but having all the ways next to those stops, is even better.

To run the following script, you need to add the scripting plugin to JOSM and install Jython.

#!/bin/jython
'''
FindWaysBelongingToRoutesStartingFromStops.jy
- Given a list of stops, find all ways belonging to the route

This code is released under the GNU General
Public License v2 or later.

The GPL v3 is accessible here:
http://www.gnu.org/licenses/gpl.html

The GPL v2 is accessible here:
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html

It comes with no warranty whatsoever.

'''
from javax.swing import JOptionPane
from org.openstreetmap.josm import Main
import org.openstreetmap.josm.command as Command
import org.openstreetmap.josm.data.osm.Node as Node
import org.openstreetmap.josm.data.osm.Way as Way
import org.openstreetmap.josm.data.osm.Relation as Relation
import org.openstreetmap.josm.data.Bounds as Bounds
import org.openstreetmap.josm.data.osm.visitor.BoundingXYVisitor as BoundingXYVisitor
import org.openstreetmap.josm.data.osm.TagCollection as TagCollection
import org.openstreetmap.josm.data.osm.DataSet as DataSet
import org.openstreetmap.josm.data.osm.RelationMember as RelationMember
import org.openstreetmap.josm.gui.dialogs.relation.DownloadRelationMemberTask as DownloadRelationMemberTask
import org.openstreetmap.josm.actions.DownloadReferrersAction as DownloadReferrersAction
import re, time
import codecs

dummyRelation = Relation(); dummyWay = Way()


sideEffects = {
    'addWayToRoute': None,
    'createStopAreaRelations': None,
    }

logVerbosity = 50
'''
10: only report problems that require attention
20: report on collection
30: report on network nodes
40: report on which routes are being checked
50: report everything
'''

def getMapView():
    if Main.main and Main.main.map:
        return Main.main.map.mapView
    else:
        return None

def findConnectingWay(way1,way2):
    if way1.get('junction')=='roundabout':
        endnodesway1=way1.getNodes()
    else:
       endnodesway1 = [way1.getNode(0),way1.getNode(len(way1.getNodes())-1)]
    if way2.get('junction')=='roundabout':
        endnodesway2=way2.getNodes()
    else:
       endnodesway2 = [way2.getNode(0),way2.getNode(len(way2.getNodes())-1)]
    for endnode in endnodesway1:
        #print dir(endnode)
        parentways=endnode.getReferrers()
        for parentway in parentways:
            if parentway.getType()==dummyWay.getType():
	            if parentway.get('junction')=='roundabout':
	                endnodeInParentWays=parentway.getNodes()
	            else:
	               endnodeInParentWays = [parentway.getNode(0),parentway.getNode(len(parentway.getNodes())-1)]
	            for endnodeInParentWay in endnodeInParentWays:
	                if endnodeInParentWay in endnodesway2:
	                    return parentway
    return None
        
            
def checkPTroute(route, aDownloadWasNeeded):
    if aDownloadWasNeeded:
        return None, False, ''

    print
        
    waymemberslist = []
    modified = False

    #print dir(mv)
    for member in route.getMembers():
        """Algorithm:
           Is the node a member of a public_transport=stop_area?
               Grab way from stop_area
           Search near to the node for "highway -highway=bus_stop inview type:way  -closed"
               If one more than one highways are found: Is one of them member of another route=bus relation?
           Also search for "public_transport=stop_position type:node"
               If found: use the parent way
        """
        if member.isNode():
            #print dir(mv)
            node = member.getNode()
            print node.get('name')
            found = False
            for parentRelationOfNode in node.getReferrers():
                if found: break
                if parentRelationOfNode.getType() == dummyRelation.getType():
                    if parentRelationOfNode.get('type') and parentRelationOfNode.get('type') in ('public_transport'):
                        if parentRelationOfNode.get('public_transport') in ('stop_area','stop_position'):
                            for member in parentRelationOfNode.getMembers(): # now we are sure it's the correct kind of relation, drill down to find parent way of stop_position node
                                if found: break
                                if member.isNode():
                                    stopPositionNodeCandidate=member.getNode()
                                    if stopPositionNodeCandidate.get('public_transport') in ['stop_position']:
                                        for parentWayCandidate in stopPositionNodeCandidate.getReferrers():
                                            if parentWayCandidate.getType() == dummyWay.getType():
                                                print 'connected through stop_area: '
                                                print parentWayCandidate.getKeys()
                                                waymemberslist.append(parentWayCandidate); found = True; break
            if not(found):
                # We couldn't determine the way by means of the stop_area relation
                bboxCalculator = BoundingXYVisitor()
                bboxCalculator.computeBoundingBox([node])
                bboxCalculator.enlargeBoundingBox()
                if bboxCalculator.getBounds():
                   mv.recalculateCenterScale(bboxCalculator)
                #mv.zoomTo(node.getEastNorth())
                ignorelist = [node]
                stopPosition = Node()
                for i in range(1,20):
                    candidates = mv.getAllNearest(mv.getPoint(node),ignorelist,Way().wayPredicate)
                    if candidates:
                        print len(candidates)
                        #print candidates
                        nodecandidates = mv.getAllNearest(mv.getPoint(node),[],Node().nodePredicate)

                        for candidate in nodecandidates:
                            # is there a stop_position node in the candidates?
                            if candidate.get('public_transport') in ['stop_position']:
                                stopPosition = candidate
                                ignorelist.append(candidate)
                                break
                        for candidate in candidates:
                            if candidate.get('highway') in ['primary', 'secondary', 'tertiary', 'unclassified', 'residential', 'service', 'living_street', 'trunk']:
                                #print candidate
                                #print candidate.getNode(0)
                                #print stopPosition
                                if not(member==route.getMember(0)) and candidate.getNode(0)==stopPosition:
                                    continue # there is probably a better candidate which has this way as its end node, instead of as the starting node
                                else:
                                    waymemberslist.append(candidate)
                                    print 'using '
                                    print candidate.getKeys()
                                    found = True; break
                            else:
                                ignorelist.append(candidate)
                                print 'ignoring '
                                print candidate.getKeys()
                    if found: break
                    bboxCalculator.enlargeBoundingBox() # zoom out a bit and try again
                    if bboxCalculator.getBounds():
                        mv.recalculateCenterScale(bboxCalculator)
            if not(found):
                print 'Found no suitable candidate way for this stop'
            else:
                # We found a way and added it to the relation, but is this way connected to the previous way we found?
                if len(waymemberslist) > 2:
                    notConnected=True
                    if waymemberslist[-1].get('junction')=='roundabout':
                        endnodeslatest=waymemberslist[-1].getNodes()
                    else:
                       endnodeslatest = [waymemberslist[-1].getNode(0),waymemberslist[-1].getNode(len(waymemberslist[-1].getNodes())-1)]
                    if waymemberslist[-2].get('junction')=='roundabout':
                        endnodesprevious=waymemberslist[-2].getNodes()
                    else:
                       endnodesprevious = [waymemberslist[-2].getNode(len(waymemberslist[-2].getNodes())-1),waymemberslist[-2].getNode(0)]
                    for endnodelatest in endnodeslatest:
                        if endnodelatest in endnodesprevious:
                            notConnected=False; break
                    if notConnected:
                        connectingWay=findConnectingWay(waymemberslist[-2],waymemberslist[-1])
                        if connectingWay:
                            waymemberslist.insert(-1,connectingWay)
                        elif False:
                            # Let's look for a relation containing both ways in the proper order
                            for parentrelation in waymemberslist[-2].getReferrers():
                                notThereYet = True
                                #print parentrelation
                                if parentrelation.getType() == dummyRelation.getType():
                                    memberwaysOfParentRelation=[]
                                    for member in parentrelation.getMembers():
                                        if member.isWay(): memberwaysOfParentRelation.append(member.getWay())
                                    #print membersOfParentRelation
                                    #print waymemberslist[-1] in memberwaysOfParentRelation
                                    if waymemberslist[-1] in memberwaysOfParentRelation and memberwaysOfParentRelation.index(waymemberslist[-1])>memberwaysOfParentRelation.index(waymemberslist[-2]):
                                        notThereYet = True
                                        for way in memberwaysOfParentRelation:
                                            #print way
                                            #print waymemberslist[-1]
                                            #print waymemberslist[-2]
                                            if notThereYet:
                                                if way == waymemberslist[-2]:
                                                    notThereYet=False; continue
                                                else:
                                                    if way == waymemberslist[-1]:
                                                        break
                                                    else:
                                                        waymemberslist.insert(-2,way)
                                        
                                if not(notThereYet): break 
                    else:
                        print 'ALREADY CONNECTED TO PREVIOUS WAY                    !!!!!!!!!!'
                #mv.zoomPrevious()
                #mv.repaint()
                #time.sleep(1)
            #print
            #print 'node:', node
            #print 'candidates:', candidates
        #print 'waymemberslist:', waymemberslist
        #waymemberslist.extend(nodememberslist)
        i=0; newRelation = Relation(route); commandsList = []; previousway=None
        for way in waymemberslist:
            newMember = RelationMember(str(i+1),way)
            if not way==previousway: #not(newMember in newRelation.getMembers()):
                newRelation.addMember(i, newMember)
                i+=1; previousway=way
        modified = True
        #print dir(node)
        #bboxCalculator = BoundingXYVisitor()
        #bboxCalculator.computeBoundingBox([node])
        #print bboxCalculator
        #bboxCalculator.enlargeBoundingBox()
        #if bboxCalculator.getBounds():
        #    mv.recalculateCenterScale(bboxCalculator)
        #mv.zoomTo(node.getEastNorth())
        #candidates = mv.getNearestNodes(mv.getPoint(node),node.nodePredicate)
        

    if modified:
        commandsList.append(Command.ChangeCommand(route, newRelation))
        Main.main.undoRedo.add(Command.SequenceCommand("Adding ways directly adjacent to stop nodes", commandsList))
        commandsList = []
    modified = False


aDownloadWasNeeded = False
'''
Since Downloading referrers or missing members happens asynchronously in a separate worker thread
the script can run in three modes

1. No downloads allowed/offline run; output mentions that data was incomplete in its reports.
2. Download run; When incomplete items are encountered, they are scheduled to be downloaded. From then on, no more quality checks are performed on the data.
   All hierarchies are still checked, looking for more incomplete data for which more downloads need to be scheduled.
3. Normal run; All data is available and proper reporting can be performed.
'''

dummy_way = Way()
dummy_relation = Relation()

mv = getMapView()
if mv and mv.editLayer and mv.editLayer.data:
    selectedRelations = mv.editLayer.data.getSelectedRelations()

    if not(selectedRelations):
        JOptionPane.showMessageDialog(Main.parent, "Please select a route relation")
    else:
        for relation in selectedRelations:
            if logVerbosity> 49: print relation
            if relation.hasIncompleteMembers():
                if 'downloadIncompleteMembers' in sideEffects:
                    aDownloadWasNeeded = True
                    print 'Downloading referrers for ', str(relation.get('name')), ' ', str(relation.get('note'))
                    DownloadRelationMemberTask.run(DownloadRelationMemberTask(relation, relation.getIncompleteMembers(), mv.editLayer ))
                    continue
                else:
                    JOptionPane.showMessageDialog(Main.parent, 'Please download all incomplete member of the relations first')
                    exit()
            relationType = relation.get('type')
            if relationType == 'route':
                checkPTroute(relation, aDownloadWasNeeded)
        if aDownloadWasNeeded:
            JOptionPane.showMessageDialog(Main.parent, 'There was incomplete data and downloading mode was initiated,\nNo further quality checks were performed.\nPlease run the script again when all downloads have completed')