PBF Perl Parser

From OpenStreetMap Wiki
Jump to: navigation, search

In order to parse PBF files in Perl, you'll need two modules from CPAN :

  • Google::ProtocolBuffers::Dynamic [1]
  • Compress::Zlib from the IO::Compress suite [2]

Then you need to load the two ProtoBuf prototype files from an OSM source, for instance :

https://github.com/scrosby/OSM-binary/tree/master/src

Finally, this skeleton script should help you get started

use strict; use warnings;
use Google::ProtocolBuffers::Dynamic;
use Compress::Zlib 'uncompress';

# fileformat.proto, osmformat.proto from https://github.com/scrosby/OSM-binary/tree/master/src
my $dynamic = Google::ProtocolBuffers::Dynamic->new;
$dynamic->load_string('fileformat.proto', read_file('fileformat.proto'));
$dynamic->load_string('osmformat.proto', read_file('osmformat.proto'));
$dynamic->map({
    package => 'OSMPBF', prefix => 'OSMPBF',
    options => {qw'accessor_style single_accessor'}
});

open my $fh, '<', my $file = $ARGV[0];
my $file_sz = (stat($file))[7];
binmode $fh;
my $readed = 0;
while (read($fh, my $data, 4) == 4) {
    $readed += 4;
    my $sz = unpack "N", $data;
    die "cannot read $sz bytes" unless read($fh, my $data, $sz) == $sz;
    $readed += $sz;
    my $blobheader = OSMPBF::BlobHeader->decode($data);
    $sz = $blobheader->datasize;
    die "cannot read $sz bytes" unless read($fh, $data, $sz) == $sz;
    $readed += $sz;
    my $ratio = $readed / $file_sz * 100;
    my $blob = OSMPBF::Blob->decode($data);
    if ($blob->has_raw) {
        $data = $blob->raw;
    } elsif ($blob->has_zlib_data) {
        die "cannot uncompress block" unless defined($data = uncompress($blob->zlib_data));
    } else {
        die "unknown compression type";
    }
    printf "buffer is %d bytes - header announced %d bytes, processed: %.3f%%\n", length($data), $blob->raw_size, $ratio;
    if ($blobheader->type eq 'OSMData') {
        my $primitive = OSMPBF::PrimitiveBlock->decode($data);
    } elsif ($blobheader->type eq 'OSMHeader') {
        my $headerblock = OSMPBF::HeaderBlock->decode($data);
    }
}

sub read_file { open my $fh, '<', $_[0]; local $/; scalar <$fh> }

See also