Planet.osm to text, source code
From OpenStreetMap Wiki
| This article or section may contain out-of-date information. The information is no longer correct, or no longer has relevance. If you know about the current state of affairs, please help keep everyone informed by updating this information. (Discuss) |
Source code for Planet.osm to text
use XML::Parser;
my $Filename = shift();
# Temporary data
my (%MainAttr,$Type,%Tags, @WaySegments);
# Stats
my %AllTags;
# Stored data
my (@Nodes, @Segments, @Ways, %Stats);
# Processing stage
#----------------------------------------------
my $P = new XML::Parser(Handlers => {Start => \&DoStart, End => \&DoEnd, Char => \&DoChar});
$P->parsefile($Filename);
printf STDERR "Creating output files\n";
# Combine way data into segments
#----------------------------------------------
if(open(WAYS,">ways.txt")){
foreach my $Way (@Ways){
#printf WAYS "Way: %s,%s\n", $Way->{"segments"}, $Way->{"name"};
my @SubSegments = split(/,/,$Way->{"segments"});
$Stats{"empty ways"}++ if(scalar(@SubSegments) < 1);
printf WAYS "Copying keys: %s to segments %s\n",
join(",",keys(%$Way)),
join(",",@SubSegments);
# Each segment in a way inherits the way's attributes
foreach my $Segment(@SubSegments){
foreach my $Key(keys(%$Way)){
$Segments[$Segment]{$Key} = $Way->{$Key}
}
}
}
close WAYS;
}
# Main output (segments)
#----------------------------------------------
if(open(OSM, ">osm.txt")){
foreach my $Segment(@Segments){
my $From = $Segment->{"from"};
my $To = $Segment->{"to"};
$Stats{"segments without endpoints"}++ if($From == 0 or $To == 0);
printf OSM "%f,%f,%f,%f,%s,%s,%s\n",
$Nodes[$From]{"lat"},
$Nodes[$From]{"lon"},
$Nodes[$To]{"lat"},
$Nodes[$To]{"lon"},
$Segment->{"class"},
$Segment->{"name"},
$Segment->{"highway"};
}
close OSM;
}
# Secondary output (named points)
#----------------------------------------------
if(open(POINTS, ">points.txt")){
foreach my $Node(@Nodes){
$Stats{"Nodes with zero lat/long"}++ if($Node->{"lat"} == 0 and $Node->{"lon"} == 0);
if($Node->{"name"} || $Node->{"amenity"} || $Node->{"class"}){
printf POINTS "%f,%f,%s,%s,%s\n",
$Node->{"lat"},
$Node->{"lon"},
$Node->{"name"},
$Node->{"amenity"},
$Node->{"class"};
}
}
close POINTS;
}
# Statistics output
#----------------------------------------------
if(open(STATS, ">stats.txt")){
foreach(sort {$AllTags{$b} <=> $AllTags{$a}} keys(%AllTags)){
printf STATS "* %d %s\n", $AllTags{$_}, $_;
}
printf STATS "\n\nStats:\n";
foreach(keys(%Stats)){
printf STATS "* %d %s\n", $Stats{$_}, $_;
}
}
printf STDERR "Done\n";
exit;
# Function is called whenever an XML tag is started
#----------------------------------------------
sub DoStart()
{
my ($Expat, $Name, %Attr) = @_;
if($Name eq "node"){
undef %Tags;
%MainAttr = %Attr;
$Type = "n";
}
if($Name eq "segment"){
undef %Tags;
%MainAttr = %Attr;
$Type = "s";
}
if($Name eq "way"){
undef %Tags;
undef @WaySegments;
%MainAttr = %Attr;
$Type = "w";
}
if($Name eq "tag"){
# TODO: protect against id,from,to,lat,long,etc. being used as tags
$Tags{$Attr{"k"}} = $Attr{"v"};
$AllTags{$Attr{"k"}}++;
$Stats{"tags"}++;
}
if($Name eq "seg"){
push(@WaySegments, $Attr{"id"});
}
}
# Function is called whenever an XML tag is ended
#----------------------------------------------
sub DoEnd(){
my ($Expat, $Element) = @_;
if($Element eq "node"){
my $ID = $MainAttr{"id"};
$Nodes[$ID]{"lat"} = $MainAttr{"lat"};
$Nodes[$ID]{"lon"} = $MainAttr{"lon"};
foreach(keys(%Tags)){
$Nodes[$ID]{$_} = $Tags{$_};
}
$Stats{"named nodes"}++ if($Nodes[$ID]{"name"});
$Stats{"tagged nodes"}++ if($MainAttr{"tags"});
$Stats{"nodes"}++;
#print "Node:".join(",",keys(%Tags))."\n" if(scalar(keys(%Tags))>0);
}
if($Element eq "segment"){
my $ID = $MainAttr{"id"};
$Segments[$ID]{"from"} = $MainAttr{"from"};
$Segments[$ID]{"to"} = $MainAttr{"to"};
foreach(keys(%Tags)){
$Segments[$ID]{$_} = $Tags{$_};
}
$Stats{"tagged segments"}++ if($MainAttr{"tags"});
$Stats{"segments"}++;
}
if($Element eq "way"){
my $ID = $MainAttr{"id"};
$Ways[$ID]{"segments"} = join(",",@WaySegments);
foreach(keys(%Tags)){
$Ways[$ID]{$_} = $Tags{$_};
}
$Stats{"Ways"}++;
}
}
# Function is called whenever text is encountered in the XML file
#----------------------------------------------
sub DoChar(){
my ($Expat, $String) = @_;
}