Planet.osm to text, source code

From OpenStreetMap Wiki
Jump to: navigation, search


Source code for Planet.osm to text

use XML::Parser;
my $Filename = shift();

# Temporary data
my (%MainAttr,$Type,%Tags, @WaySegments);
# Stats
my %AllTags;
# Stored data
my (@Nodes, @Segments, @Ways, %Stats);

# Processing stage
#----------------------------------------------
my $P = new XML::Parser(Handlers => {Start => \&DoStart, End => \&DoEnd, Char => \&DoChar});
$P->parsefile($Filename);
printf STDERR "Creating output files\n";


# Combine way data into segments
#----------------------------------------------
if(open(WAYS,">ways.txt")){
  foreach my $Way (@Ways){
    #printf WAYS "Way: %s,%s\n", $Way->{"segments"}, $Way->{"name"};
    my @SubSegments = split(/,/,$Way->{"segments"});
    $Stats{"empty ways"}++ if(scalar(@SubSegments) < 1); 
    
    printf WAYS "Copying keys: %s to segments %s\n",
      join(",",keys(%$Way)),
      join(",",@SubSegments);
      
    # Each segment in a way inherits the way's attributes
    foreach my $Segment(@SubSegments){
      foreach my $Key(keys(%$Way)){
        $Segments[$Segment]{$Key} = $Way->{$Key}
      }
    }
  }
  close WAYS;
}

# Main output (segments)
#----------------------------------------------
if(open(OSM, ">osm.txt")){
  foreach my $Segment(@Segments){
    my $From = $Segment->{"from"};
    my $To = $Segment->{"to"};
    $Stats{"segments without endpoints"}++ if($From == 0 or $To == 0);
    printf OSM "%f,%f,%f,%f,%s,%s,%s\n",
      $Nodes[$From]{"lat"},
      $Nodes[$From]{"lon"},
      $Nodes[$To]{"lat"},
      $Nodes[$To]{"lon"},
      $Segment->{"class"},
      $Segment->{"name"},
      $Segment->{"highway"};
  }
  close OSM;
}

# Secondary output (named points)
#----------------------------------------------
if(open(POINTS, ">points.txt")){
  foreach my $Node(@Nodes){
    $Stats{"Nodes with zero lat/long"}++ if($Node->{"lat"} == 0 and $Node->{"lon"} == 0);
    
    if($Node->{"name"} || $Node->{"amenity"} || $Node->{"class"}){
      printf POINTS "%f,%f,%s,%s,%s\n",
        $Node->{"lat"},
        $Node->{"lon"},
        $Node->{"name"},
        $Node->{"amenity"},
        $Node->{"class"};
      }
  }
  close POINTS;
}

# Statistics output
#----------------------------------------------
if(open(STATS, ">stats.txt")){
  foreach(sort {$AllTags{$b} <=> $AllTags{$a}} keys(%AllTags)){
    printf STATS "* %d %s\n", $AllTags{$_}, $_;
  }
  printf STATS "\n\nStats:\n";
  foreach(keys(%Stats)){
    printf STATS "* %d %s\n", $Stats{$_}, $_;
  }
}
printf STDERR "Done\n";
exit;

# Function is called whenever an XML tag is started
#----------------------------------------------
sub DoStart()
{
  my ($Expat, $Name, %Attr) = @_;
  
  if($Name eq "node"){
    undef %Tags;
    %MainAttr = %Attr;
    $Type = "n";
  }
  if($Name eq "segment"){
    undef %Tags;
    %MainAttr = %Attr;
    $Type = "s";
  }
  if($Name eq "way"){
    undef %Tags;
    undef @WaySegments;
    %MainAttr = %Attr;
    $Type = "w";
  }
  if($Name eq "tag"){
    # TODO: protect against id,from,to,lat,long,etc. being used as tags
    $Tags{$Attr{"k"}} = $Attr{"v"};
    $AllTags{$Attr{"k"}}++;
    $Stats{"tags"}++;
  }
  if($Name eq "seg"){
    push(@WaySegments, $Attr{"id"});
  }
}

# Function is called whenever an XML tag is ended
#----------------------------------------------
sub DoEnd(){
  my ($Expat, $Element) = @_;
  if($Element eq "node"){
    my $ID = $MainAttr{"id"};
    $Nodes[$ID]{"lat"} = $MainAttr{"lat"};
    $Nodes[$ID]{"lon"} = $MainAttr{"lon"};
    foreach(keys(%Tags)){
      $Nodes[$ID]{$_} = $Tags{$_};
    }
    $Stats{"named nodes"}++ if($Nodes[$ID]{"name"});
    $Stats{"tagged nodes"}++ if($MainAttr{"tags"});
    $Stats{"nodes"}++;
    #print "Node:".join(",",keys(%Tags))."\n" if(scalar(keys(%Tags))>0);
  }
  if($Element eq "segment"){
    my $ID = $MainAttr{"id"};
    $Segments[$ID]{"from"} = $MainAttr{"from"};
    $Segments[$ID]{"to"} = $MainAttr{"to"};
    foreach(keys(%Tags)){
      $Segments[$ID]{$_} = $Tags{$_};
    }
    $Stats{"tagged segments"}++ if($MainAttr{"tags"});
    $Stats{"segments"}++;
  }
  if($Element eq "way"){
    my $ID = $MainAttr{"id"};
    $Ways[$ID]{"segments"} = join(",",@WaySegments);
    foreach(keys(%Tags)){
      $Ways[$ID]{$_} = $Tags{$_};
    }    
    $Stats{"Ways"}++;
  }
}

# Function is called whenever text is encountered in the XML file
#----------------------------------------------
sub DoChar(){
  my ($Expat, $String) = @_;
}
Personal tools
Namespaces
Variants
Actions
site
Toolbox