User:Travelling salesman/gpx merge

From OpenStreetMap Wiki
Jump to navigation Jump to search

This is a python script that merges two gpx tracks and creates an average of the two. Both tracks have to be ordered in the same direction, but they can have completely different speeds and timings. That's what make the task non-trivial. The script finds an optimum solution by picking successive point pairs that are closest. For typical tracks which are close to each other, the runtime is linear with the number of trackpoints. If the tracks however don't match well, the runtime becomes quadratic and then the execution can take up to minutes in the worst case.

The script must be saved to a file (gpx_merge.py) and the executable flag should be set. Then it can be executed from the command line. For more documentation, type

./gpx_merge.py --help


Requirements

On a debian based linux distribution (such as Ubuntu) you can install these with:

sudo apt-get install python python-scipy python-lxml python-matplotlib

Algorithm

gpx_merge.py iterates through pairs of points, consisting of one point from the first track and one point of the second track. A successive pair can have one or both points an index further than the predecessor, but no index can decrease even if that would create a closer pair. Like this a forward advancing track is created, that interpolates the two input tracks between each of their points and moving smoothly in the same direction. Among the huge number of possible successor combinations, the best one is determined with the Dijkstra algorithm. Finally the mean positions of every pair constitute the merged track.

If you have two gpx tracks from going the same way twice, and they have poor accuracy in some places, you can use gpx_merge to create one better track and only upload that one to openstreetmap. If the tracks point in different directions, because they were created going the same way forth and back, then the second track will be automatically reversed.

Code

#!/usr/bin/env python
# -*- coding: utf8 -*-

'''
Merges two tracks by finding an optimized average between them.
The tracks can have arbitrary different timing.
'''


from math import *
import sys
import datetime
from lxml import etree
import random
from optparse import OptionParser
import numpy as np
import numpy.linalg as la
import copy
import time


timeformat = '%Y-%m-%dT%H:%M:%S'


parser = OptionParser('usage: %prog [options] input-main.gpx input-second.gpx')
parser.add_option('-o', '--out', action='store', type='string',
    dest='ofname', default=None, help='Output file name')
parser.add_option('-r', '--replace', action='store_true',
    dest='replace', default=False, help='replace original file?')
parser.add_option('-d', '--distance-limit', action='store', type='float', dest='dlim',
    default=30.0, help='Maximum distance between points to be paired.')
parser.add_option('-s', '--reverse', action='store', type='str', dest='reverse',
    default='auto', help='reverse direction of second track: true/false/auto')
parser.add_option("-v", action="store_true", dest="verbose", default=False)

(options, args) = parser.parse_args()

# use the WGS-84 ellipsoid
rE = 6356752.314245 # earth's radius
a = 6378137.0
b = 6356752.314245179


if len(args) < 2:
    parser.print_usage()
    exit(2)


def orth_to(v, vref):
    return v - vref * vref.dot(v) / vref.dot(vref)


def latlonele_to_xyz(lat, lon, ele):
    s = sin(radians(lat))
    c = cos(radians(lat))
    r = ele + a * b / la.norm([s*a, c*b])
    lon = radians(lon)
    return np.array((r * c * sin(lon), r * c * (-cos(lon)), r * s))


def hdistr(r1, r2):
    h = orth_to(r2 - r1, (r1 + r2) / 2.0)
    return la.norm(h)


def hdist(lle1, lle2):
    r1 = latlonele_to_xyz(*lle1)
    r2 = latlonele_to_xyz(*lle2)
    h = orth_to(r2 - r1, (r1 + r2) / 2.0)
    return la.norm(h)


def xyz_to_latlonele(xyz):
    x, y, z = tuple(xyz)
    r = la.norm(xyz)
    if (r == 0):
        return 0.0, 0.0, 0.0
    lat = degrees(atan2(z, la.norm([x, y])))
    lon = degrees(atan2(x, -y))
    ele = r * (1.0 - a * b / la.norm([a*z, b*x, b*y]))
    return lat, lon, ele


def lle(trkpt, nsmap):
    lat = float(trkpt.get('lat'))
    lon = float(trkpt.get('lon'))
    ele = float(trkpt.find(nsmap + 'ele').text)
    return lat, lon, ele


def merge(pts1, pts2):
    # pts1, pts2: lists of points of (lat, lat, ele)
    # returns list of index pairs of points to be matched
    n1 = len(pts1)
    n2 = len(pts2)
    if options.verbose: print 'total points:', n1, n2
    pairs = []
    if n1 < 1 or n2 < 1:
        return pairs
    
    keep_n_pairs = 5
    
    # initialisations for progress printing
    progress_printed = False
    progress = None
    tprint = time.time()
    total = 0
    
    # Algorithm:
    # The cost of a pair is the sum of point distances to get to the pair.
    # Possible paths cointain previous pairs, where each precursor pair
    # can be one index less in one point or in both points.
    # We progress along pairs similar to Dijkstra, but, owing to the dense grid
    # of possible successor pairs, we progress in such an order that no cost of
    # a point must be updated twice, and discard pairs heuristically.
    costs = {(0, 0):{'prev':None, 'd':hdist(pts1[0], pts2[0]),
                     'dsum':hdist(pts1[0], pts2[0])/2.}}
    r1 = [latlonele_to_xyz(*p) for p in pts1]
    r2 = [latlonele_to_xyz(*p) for p in pts2]
    
    i1min = i2min = 0
    i1max = i2max = 0
    mindi1max = mindi2max = costs[(0, 0)]['d']
    minsumi1max = minsumi2max = costs[(0, 0)]['dsum']
    
    while i1max < n1 - 1 or i2max < n2 - 1:
        if (i1max < n1 - 1) and (i2max == n2 - 1 or minsumi1max <= minsumi2max):
            # advance i1max
            for i2 in range(i2min, i2max - keep_n_pairs):
                # cut away pairs that are unlikely to be on the optimum track
                if (hdistr(r1[i1max], r2[i2]) - costs[(i1max, i2max)]['d']
                        <= options.dlim):
                    break
                elif (costs[(i1max, i2)]['dsum']
                        > minsumi1max + 5 * (mindi1max + options.dlim)):
                    i2min = i2 + 1
                else:
                    continue
            
            # advance possible tracks to i1max + 1
            minsumi1max = float('inf')
            for i2 in range(i2min, i2max + 1):
                newpair = (i1max + 1, i2)
                d = hdistr(r1[newpair[0]], r2[newpair[1]])
                prevs = [p for p in ((i1max, i2 - 1), (i1max, i2), (i1max + 1, i2 - 1)) if p[1] >= i2min]
                dsums = [costs[prev]['dsum'] + (costs[prev]['d'] + d) / 2.
                         * (sum(newpair) - sum(prev)) for prev in prevs]
                i = np.argmin(dsums)
                dsummin = dsums[i]
                costs[newpair] = {'prev':prevs[i], 'd':d, 'dsum':dsummin}
                if (dsummin < minsumi1max and i2 < i2max) or i2 == i2min:
                    minsumi1max = dsummin
                    mindi1max = d
                total += 1
            i1max += 1
        else:
            # advance i2max
            for i1 in range(i1min, i1max - keep_n_pairs):
                # cut away pairs that are unlikely to be on the optimum track
                if (hdistr(r1[i1], r2[i2max]) - costs[(i1max, i2max)]['d']
                        <= options.dlim):
                    break
                elif (costs[(i1, i2max)]['dsum']
                        > minsumi2max + 5 * (mindi2max + options.dlim)):
                    i1min = i1 + 1
                else:
                    continue
            
            # advance possible tracks to i2max + 1
            minsumi2max = float('inf')
            for i1 in range(i1min, i1max + 1):
                newpair = (i1, i2max + 1)
                d = hdistr(r1[newpair[0]], r2[newpair[1]])
                prevs = [p for p in ((i1 - 1, i2max), (i1, i2max), (i1 - 1, i2max + 1)) if p[0] >= i1min]
                dsums = [costs[prev]['dsum'] + (costs[prev]['d'] + d) / 2.
                         * (sum(newpair) - sum(prev)) for prev in prevs]
                i = np.argmin(dsums)
                dsummin = dsums[i]
                costs[newpair] = {'prev':prevs[i], 'd':d, 'dsum':dsummin}
                if (dsummin < minsumi2max and i1 < i1max) or i1 == i1min:
                    minsumi2max = dsummin
                    mindi2max = d
                total += 1
            i2max += 1
        
        # print progess
        progress_new = 100 * (i1max + i2max) / (n1 + n2 - 2)
        if options.verbose and progress_new > progress and time.time() >= tprint + 1:
            tprint = time.time()
            progress = progress_new
            print '\r', progress, '%',
            sys.stdout.flush()
            progress_printed = True
    
    if progress_printed:
        print '\r',
    if options.verbose: print 'computed nodes:', total
    
    # now collect the optimal chain
    pairs = [(n1-1, n2-1)]
    p = pairs[0]
    while p != (0, 0):
        p = costs[p]['prev']
        pairs = [p] + pairs
    if options.verbose: print 'mean distance', costs[pairs[-1]]['dsum'] / len(pairs)
    return pairs


def main(f1, f2):
    # import xml data from files
    if options.verbose: print 'opening files', f1, f2
    
    infile1 = open(f1)
    tree1 = etree.parse(infile1)
    infile1.close()
    gpx1 = tree1.getroot()
    if gpx1.nsmap.has_key(None):
        nsmap1 = '{' + gpx1.nsmap[None] + '}'
    else:
        nsmap1 = ''
    
    infile2 = open(f2)
    tree2 = etree.parse(infile2)
    infile2.close()
    gpx2 = tree2.getroot()
    if gpx2.nsmap.has_key(None):
        nsmap2 = '{' + gpx2.nsmap[None] + '}'
    else:
        nsmap2 = ''
    
    # go through coordinates of first track
    trkpts1 = gpx1.findall('.//' + nsmap1 + 'trkpt')
    trkpts2 = gpx2.findall('.//' + nsmap2 + 'trkpt')
    pts1 = [lle(trkpt, nsmap1) for trkpt in trkpts1]
    pts2 = [lle(trkpt, nsmap2) for trkpt in trkpts2]
    
    # reverse the second track if necessary
    if options.reverse == 'true' or (options.reverse == 'auto'
        and hdist(pts1[0], pts2[0]) + hdist(pts1[-1], pts2[-1])
           > hdist(pts1[0], pts2[-1]) + hdist(pts1[-1], pts2[0])):
        pairs = merge(pts1, [i for i in reversed(pts2)])
        pairs = [(p[0], len(pts2)-1 - p[1]) for p in pairs]
    else:
        # execute the merging algorithm
        pairs = merge(pts1, pts2)
    
    # filter out bad pairs where the distance is too large
    istart, iend = 0, len(pairs) - 1
    found_start = False
    for i, p in enumerate(pairs):
        if hdist(lle(trkpts1[p[0]], nsmap1),
                 lle(trkpts2[p[1]], nsmap2)) <= options.dlim:
            iend = i
            if not found_start:
                istart = i
                found_start = True
    pairs = pairs[istart:iend + 1]
    
    # create a new gpx track as a copy of the first one, to preserve properties
    tree = copy.deepcopy(tree1)
    gpx = tree.getroot()
    
    # remove old points
    for trkseg in gpx.findall('.//' + nsmap1 + 'trkseg'):
        for trkpt in trkseg.findall(nsmap1 + 'trkpt'):
            trkseg.remove(trkpt)

    # fill new points
    for i1, i2 in pairs:
        trkpt1 = trkpts1[i1]
        trkpt2 = trkpts2[i2]
        
        trkpt = etree.SubElement(trkseg, 'trkpt')
        trkpt.tail = '\n'
        
        if trkpt1.find(nsmap1+'hdop') != None and trkpt2.find(nsmap2+'hdop') != None:
            hdop1 = float(trkpt1.find(nsmap1 + 'hdop').text)
            hdop2 = float(trkpt2.find(nsmap2 + 'hdop').text)
            weight1 = hdop1**-1 / (hdop1**-1 + hdop2**-1)
            hdop = etree.SubElement(trkpt, 'hdop')
            hdop.text = '{:.1f}'.format((hdop1**-2 + hdop2**-2)**-.5)
        else:
            weight1 = 0.5
        
        lle1 = lle(trkpt1, nsmap1)
        lle2 = lle(trkpt2, nsmap2)
        r1 = latlonele_to_xyz(*lle1)
        r2 = latlonele_to_xyz(*lle2)
        r = weight1 * r1 + (1.0 - weight1) * r2 
        lat, lon, ele = xyz_to_latlonele(r)
        trkpt.set('lat', '{:.7f}'.format(lat))
        trkpt.set('lon', '{:.7f}'.format(lon))
        eletag = etree.SubElement(trkpt, 'ele')
        eletag.text = '{:.1f}'.format(ele)
        timetag = etree.SubElement(trkpt, 'time')
        timetag.text = trkpt1.find(nsmap1 + 'time').text
        
        # adopt other elements from track1
        for el in trkpt1.xpath('./*[not(' +
                ' or '.join(['name()="{}"'.format(i)
                             for i in ('time', 'ele', 'hdop')]) + ')]'):
            trkpt.append(copy.deepcopy(el))
    
    # export data to file
    if options.replace:
        ofname = f1
    elif options.ofname != None:
        ofname = options.ofname
    else:
        ofname = f1[:-4] + '_' + f2[:-4] + '_merged' + '.gpx'
    outfile = open(ofname, 'w')
    outfile.write(etree.tostring(tree, xml_declaration=True,
        pretty_print=True, encoding='utf-8'))
    outfile.close()
    if options.verbose: print 'modified copy written to', ofname

main(args[0], args[1])