User FixKarlsruheSchema:Xybot

From OpenStreetMap Wiki
Jump to: navigation, search

This is the Ruleset FixKarlsruheSchema of the Xybot script

<source lang=perl> sub begin {

 my $me = shift;
 Data::Primitive->setCreator("FixKarlsruheSchema");
 Data::Primitive->setComment('Fixing typo errors in keys of the Karlsruhe Schema');

}

my $typo_mapping_keys = { "add:city" => "addr:city", "addr4:city" => "addr:city", "addr city" => "addr:city", "addr_city" => "addr:city", "addr:city" => "addr:city", "addr.city" => "addr:city", "áddr:city" => "addr:city", "addr:cyti" => "addr:city", "addr:town" => "addr:city", "adr:city" => "addr:city", "addres" => "address", "adress" => "address", "add:full" => "addr:full", "addr:ful" => "addr:full", "addr full" => "addr:full", "addr_full" => "addr:full", "addr:full" => "addr:full", "addr.full" => "addr:full", "adr:full" => "addr:full", "addr:building_name" => "addr:housename", "-addr:housenumber" => "addr:housenumber", "addr:housenumbers" => "addr:housenumber", "add:housenumber" => "addr:housenumber", "addr:addr:housenumber" => "addr:housenumber", "addr:hausenumber" => "addr:housenumber", "addr:hausnummer" => "addr:housenumber", "addr:houesenumber" => "addr:housenumber", "addr:houesenumber" => "addr:housenumber", "addr:hous" => "addr:housenumber", "addr:house" => "addr:housenumber", "addr:house" => "addr:housenumber", "addr:housennumber" => "addr:housenumber", "addr:houseno" => "addr:housenumber", "addr housenumber" => "addr:housenumber", "addr_housenumber" => "addr:housenumber", "addr:housenumber:" => "addr:housenumber", "addr:housenumber" => "addr:housenumber", "addr: housenumber" => "addr:housenumber", "addr.housenumber" => "addr:housenumber", "addr:housenumberg" => "addr:housenumber", "addr:housenumberkey" => "addr:housenumber", "addr:housenumer" => "addr:housenumber", "addr:housenumer" => "addr:housenumber", "addr:housenummer" => "addr:housenumber", "addr:housenumner" => "addr:housenumber", "addr:houseumber" => "addr:housenumber", "addr:housnumber" => "addr:housenumber", "addr:husenumber" => "addr:housenumber", "addr:number" => "addr:housenumber", "adr:hausnummer" => "addr:housenumber", "adr:housenumber" => "addr:housenumber", "adr:housnumber" => "addr:housenumber", "ddr:housenumber" => "addr:housenumber", "keyaddr:housenumber" => "addr:housenumber", "addr:housnumber" => "addr:housenumber", "addr:housenuber" => "addr:housenumber", "dr:housenumber" => "addr:housenumber", "addr:housember" => "addr:housenumber", "add:interpolation" => "addr:interpolation", "addr:interpolate" => "addr:interpolation", "addr interpolation" => "addr:interpolation", "addr_interpolation" => "addr:interpolation", "addr:interpolation" => "addr:interpolation", "addr.interpolation" => "addr:interpolation", "adr:interpolation" => "addr:interpolation", "addr:inter" => "addr:interpolation", "addr:interpolated" => "addr:interpolation", "-addr_postcode" => "addr:postcode", "add:postcode" => "addr:postcode", "addr:plz" => "addr:postcode", "addr:postal_code" => "addr:postcode", "addr:postalcode" => "addr:postcode", "addr:postcod" => "addr:postcode", "addr postcode" => "addr:postcode", "addr_postcode" => "addr:postcode", "addr:postcode" => "addr:postcode", "addr.postcode" => "addr:postcode", "addrpostcode" => "addr:postcode", "addr:postcodw" => "addr:postcode", "addr:zip" => "addr:postcode", "adr:postal_code" => "addr:postcode", "adr:postalcode" => "addr:postcode", "adr:postcode" => "addr:postcode", "adddr:postcode" => "addr:postcode", "addr:postode" => "addr:postcode", "-addr_street" => "addr:street", "addr:steet" => "addr:street", "addr street" => "addr:street", "addr_street" => "addr:street", "addr:street" => "addr:street", "addr.street" => "addr:street", "addr:streetname" => "addr:street", "addr:stret" => "addr:street", "add:street" => "addr:street", "adress_street" => "addr:street", "adrr:street" => "addr:street", "adr:street" => "addr:street", "addr:cstreet" => "addr:street", "addrr:stete" => "addr:street", "addr:avenue" => "addr:street", "-addr:country" => "addr:country", "addr:country" => "addr:country", "addr.country" => "addr:country", "addr_country" => "addr:country", "addr country" => "addr:country", "addr:counrty" => "addr:country", "add:country" => "addr:country", "adr:country" => "addr:country", "aaddr:country" => "addr:country", "addr:facsimile" => "addr:fax", "addr:telephon" => "addr:phone", "addr:telephone" => "addr:phone", "addr:tel" => "addr:phone", "addr:web" => "addr:website", "addr:www" => "addr:website", "addr:internet" => "addr:website", };

my $typo_mapping_pairs = { "addr:country|a" => "addr:country|AT", "addr:country|austria" => "addr:country|AT", "addr:country|austria, wien" => "addr:country|AT", "addr:country|österreich" => "addr:country|AT", "addr:country|australia" => "addr:country|AU", "addr:country|canada" => "addr:country|CA", "addr:country|belgium" => "addr:country|BE", "addr:country|switzerland" => "addr:country|CH", "addr:country|schweiz" => "addr:country|CH", "addr:country|chile" => "addr:country|CL", "addr:country|colombia" => "addr:country|CO", "addr:country|Čr" => "addr:country|CZ", "addr:country|czech republic" => "addr:country|CZ", "addr:country|България" => "addr:country|BG", "addr:country|d" => "addr:country|DE", "addr:country|deutschland" => "addr:country|DE", "addr:country|germany" => "addr:country|DE", "addr:country|nrw" => "addr:country|DE", "addr:country|niedersachsen" => "addr:country|DE", "addr:country|brandenburg" => "addr:country|DE", "addr:country|est" => "addr:country|EE", "addr:country|españa" => "addr:country|ES", "addr:country|f" => "addr:country|FR", "addr:country|france" => "addr:country|FR", "addr:country|scotland" => "addr:country|GB", "addr:country|uk" => "addr:country|GB", "addr:country|lithuania" => "addr:country|LT", "addr:country|lëtzebuerg" => "addr:country|LU", "addr:country|niederlande" => "addr:country|NL", "addr:country|the netherlands" => "addr:country|NL", "addr:country|polska" => "addr:country|PL", "addr:country|romania" => "addr:country|RO", "addr:country|Россия" => "addr:country|RU", "addr:country|russia" => "addr:country|RU", "addr:country|sweden" => "addr:country|SE", "addr:country|slovakia" => "addr:country|SK", "addr:country|УкраЇна" => "addr:country|UA", "addr:country|Україна" => "addr:country|UA", };

sub process {

 my ($me, $obj) = @_;
 my $resultstr = "";
 my $msg = "";
 my $work = $obj;
 my $clone = undef;
 my $k;
 my $v;
 while (($k,$v) = each(%{$work->{"tags"}})) {
   my $key = $k;
   my $val = $v;
   $key =~ s/^\s*(.*?)\s*$/$1/;	# remove surrounding space in keys
   $val =~ s/^\s*(.*?)\s*$/$1/;	# remove surrounding space in values
   
   $key = $typo_mapping_keys->{lc($key)} if (defined($typo_mapping_keys->{lc($key)}) && $typo_mapping_keys->{lc($key)} ne $k);
   my @newpairs = split(/#/,$typo_mapping_pairs->{lc($key."|".$val)});
   if ($newpairs[0] =~ /(.+)\|(.*)/) {
     $key = $1;
     $val = $2;
     shift @newpairs;
   }
   # delete keys with empty values
   if ($val eq "") {
     $clone=$work->clone() unless defined($clone);
     delete $clone->{"tags"}->{$k};
     $resultstr .= sprintf " - deleting tag '%s=%s'", $k, $v,
     $work=$clone;
   } elsif ($k ne $key && $v ne $val) {
     $clone=$work->clone() unless defined($clone);
     delete $clone->{"tags"}->{$k};
     $clone->{"tags"}->{$key} = $val;
     $resultstr .= sprintf " - modifying tag '%s=%s' to '%s=%s'", $k, $v, $key, $val;
     $work=$clone;
   } elsif ($k ne $key) {
     $clone=$work->clone() unless defined($clone);
     delete $clone->{"tags"}->{$k};
     $clone->{"tags"}->{$key} = $v;
     $resultstr .= sprintf " - modifying key '%s' to '%s' value '%s'", $k, $key, $v;
     $work=$clone;
   } elsif ($v ne $val) {
     $clone=$work->clone() unless defined($clone);
     $clone->{"tags"}->{$k} = $val;
     $resultstr .= sprintf " - key '%s' modifying value '%s' to '%s'", $k, $v, $val;
     $work=$clone;
   }
   while (@newpairs) {
     if ($newpairs[0] =~ /(.+)\|(.+)/) {
       $key = $1;
       $val = $2;
       shift @newpairs;
       $h = $work->{"tags"}->{$key};
       if (defined($h) && $h ne $val) {
         $clone=$work->clone() unless defined($clone);
         $clone->{"tags"}->{$key} = $val;
         $resultstr .= sprintf " - key '%s' modifying value '%s' to '%s'", $k, $h, $val;
         $work=$clone;
       } elsif (!defined($h)) {
         $clone=$work->clone() unless defined($clone);
         $clone->{"tags"}->{$key} = $val;
         $resultstr .= sprintf " - adding tag '%s=%s'", $key, $val;
         $work=$clone;
       }
     }
   }
 }
   
 my $h=$work->{"tags"}->{"addr:country"};
 if (defined($h)) {
   if ($h=~/^(AD|AE|AF|AG|AI|AL|AM|AN|AO|AQ|AR|AS|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BJ|BL|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|CR|CU|CV|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EE|EG|EH|ER|ES|ET|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|IO|IQ|IR|IS|IT|JE|JM|JO|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MF|MG|MH|MK|ML|MM|MN|MO|MP|MQ|MR|MS|MT|MU|MV|MW|MX|MY|MZ|NA|NC|NE|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SV|SY|SZ|TC|TD|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TR|TT|TV|TW|TZ|UA|UG|UM|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|YE|YT|ZA|ZM|ZW)$/i) {
     if ($h ne uc($h)) {
       $clone=$work->clone() unless defined($clone);
       $clone->{"tags"}->{"addr:country"} = uc($h);
       $resultstr .= sprintf " - key 'addr:country' modifying value '%s' to '%s'", $h, uc($h);
       $work=$clone;
     }
   } else {
     $resultstr .= sprintf " - key 'addr:country' has illegal value '%s'", $h;
   }
 }