User FixKarlsruheSchema:Xybot

From OpenStreetMap Wiki
Jump to: navigation, search

This is the Ruleset FixKarlsruheSchema of the Xybot script

sub begin {
  my $me = shift;
  Data::Primitive->setCreator("FixKarlsruheSchema");
  Data::Primitive->setComment('Fixing typo errors in keys of the Karlsruhe Schema');
}

my $typo_mapping_keys = {
"add:city" => "addr:city",
"addr4:city" => "addr:city",
"addr city" => "addr:city",
"addr_city" => "addr:city",
"addr:city" => "addr:city",
"addr.city" => "addr:city",
"áddr:city" => "addr:city",
"addr:cyti" => "addr:city",
"addr:town" => "addr:city",
"adr:city" => "addr:city",
"addres" => "address",
"adress" => "address",
"add:full" => "addr:full",
"addr:ful" => "addr:full",
"addr full" => "addr:full",
"addr_full" => "addr:full",
"addr:full" => "addr:full",
"addr.full" => "addr:full",
"adr:full" => "addr:full",
"addr:building_name" => "addr:housename",
"-addr:housenumber" => "addr:housenumber",
"addr:housenumbers" => "addr:housenumber",
"add:housenumber" => "addr:housenumber",
"addr:addr:housenumber" => "addr:housenumber",
"addr:hausenumber" => "addr:housenumber",
"addr:hausnummer" => "addr:housenumber",
"addr:houesenumber" => "addr:housenumber",
"addr:houesenumber" => "addr:housenumber",
"addr:hous" => "addr:housenumber",
"addr:house" => "addr:housenumber",
"addr:house" => "addr:housenumber",
"addr:housennumber" => "addr:housenumber",
"addr:houseno" => "addr:housenumber",
"addr housenumber" => "addr:housenumber",
"addr_housenumber" => "addr:housenumber",
"addr:housenumber:" => "addr:housenumber",
"addr:housenumber" => "addr:housenumber",
"addr: housenumber" => "addr:housenumber",
"addr.housenumber" => "addr:housenumber",
"addr:housenumberg" => "addr:housenumber",
"addr:housenumberkey" => "addr:housenumber",
"addr:housenumer" => "addr:housenumber",
"addr:housenumer" => "addr:housenumber",
"addr:housenummer" => "addr:housenumber",
"addr:housenumner" => "addr:housenumber",
"addr:houseumber" => "addr:housenumber",
"addr:housnumber" => "addr:housenumber",
"addr:husenumber" => "addr:housenumber",
"addr:number" => "addr:housenumber",
"adr:hausnummer" => "addr:housenumber",
"adr:housenumber" => "addr:housenumber",
"adr:housnumber" => "addr:housenumber",
"ddr:housenumber" => "addr:housenumber",
"keyaddr:housenumber" => "addr:housenumber",
"addr:housnumber" => "addr:housenumber", 
"addr:housenuber" => "addr:housenumber",
"dr:housenumber" => "addr:housenumber",
"addr:housember" => "addr:housenumber",
"add:interpolation" => "addr:interpolation",
"addr:interpolate" => "addr:interpolation",
"addr interpolation" => "addr:interpolation",
"addr_interpolation" => "addr:interpolation",
"addr:interpolation" => "addr:interpolation",
"addr.interpolation" => "addr:interpolation",
"adr:interpolation" => "addr:interpolation",
"addr:inter" => "addr:interpolation",
"addr:interpolated" => "addr:interpolation",
"-addr_postcode" => "addr:postcode",
"add:postcode" => "addr:postcode",
"addr:plz" => "addr:postcode",
"addr:postal_code" => "addr:postcode",
"addr:postalcode" => "addr:postcode",
"addr:postcod" => "addr:postcode",
"addr postcode" => "addr:postcode",
"addr_postcode" => "addr:postcode",
"addr:postcode" => "addr:postcode",
"addr.postcode" => "addr:postcode",
"addrpostcode" => "addr:postcode",
"addr:postcodw" => "addr:postcode",
"addr:zip" => "addr:postcode",
"adr:postal_code" => "addr:postcode",
"adr:postalcode" => "addr:postcode",
"adr:postcode" => "addr:postcode",
"adddr:postcode" => "addr:postcode",
"addr:postode" => "addr:postcode",
"-addr_street" => "addr:street",
"addr:steet" => "addr:street",
"addr street" => "addr:street",
"addr_street" => "addr:street",
"addr:street" => "addr:street",
"addr.street" => "addr:street",
"addr:streetname" => "addr:street",
"addr:stret" => "addr:street",
"add:street" => "addr:street",
"adress_street" => "addr:street",
"adrr:street" => "addr:street",
"adr:street" => "addr:street",
"addr:cstreet" => "addr:street",
"addrr:stete" => "addr:street",
"addr:avenue" => "addr:street",
"-addr:country" => "addr:country",
"addr:country" => "addr:country",
"addr.country" => "addr:country",
"addr_country" => "addr:country",
"addr country" => "addr:country",
"addr:counrty" => "addr:country",
"add:country" => "addr:country",
"adr:country" => "addr:country",
"aaddr:country" => "addr:country",
"addr:facsimile" => "addr:fax",
"addr:telephon" => "addr:phone",
"addr:telephone" => "addr:phone",
"addr:tel" => "addr:phone",
"addr:web" => "addr:website",
"addr:www" => "addr:website",
"addr:internet" => "addr:website",
};

my $typo_mapping_pairs = {
"addr:country|a"                               => "addr:country|AT",
"addr:country|austria"                         => "addr:country|AT",
"addr:country|austria, wien"                   => "addr:country|AT",
"addr:country|österreich"                      => "addr:country|AT",
"addr:country|australia"                       => "addr:country|AU",
"addr:country|canada"                          => "addr:country|CA",
"addr:country|belgium"                         => "addr:country|BE",
"addr:country|switzerland"                     => "addr:country|CH",
"addr:country|schweiz"                         => "addr:country|CH",
"addr:country|chile"                           => "addr:country|CL",
"addr:country|colombia"                        => "addr:country|CO",
"addr:country|Čr"                              => "addr:country|CZ",
"addr:country|czech republic"                  => "addr:country|CZ",
"addr:country|България"                        => "addr:country|BG",
"addr:country|d"                               => "addr:country|DE",
"addr:country|deutschland"                     => "addr:country|DE",
"addr:country|germany"                         => "addr:country|DE",
"addr:country|nrw"                             => "addr:country|DE",
"addr:country|niedersachsen"                   => "addr:country|DE",
"addr:country|brandenburg"                     => "addr:country|DE",
"addr:country|est"                             => "addr:country|EE",
"addr:country|españa"                          => "addr:country|ES",
"addr:country|f"                               => "addr:country|FR",
"addr:country|france"                          => "addr:country|FR",
"addr:country|scotland"                        => "addr:country|GB",
"addr:country|uk"                              => "addr:country|GB",
"addr:country|lithuania"                       => "addr:country|LT",
"addr:country|lëtzebuerg"                      => "addr:country|LU",
"addr:country|niederlande"                     => "addr:country|NL",
"addr:country|the netherlands"                 => "addr:country|NL",
"addr:country|polska"                          => "addr:country|PL",
"addr:country|romania"                         => "addr:country|RO",
"addr:country|Россия"                          => "addr:country|RU",
"addr:country|russia"                          => "addr:country|RU",
"addr:country|sweden"                          => "addr:country|SE",
"addr:country|slovakia"                        => "addr:country|SK",
"addr:country|УкраЇна"                         => "addr:country|UA",
"addr:country|Україна"                         => "addr:country|UA",
};

sub process {
  my ($me, $obj) = @_;

  my $resultstr = "";
  my $msg = "";
  my $work = $obj;
  my $clone = undef;
  my $k;
  my $v;

  while (($k,$v) = each(%{$work->{"tags"}})) {
    my $key = $k;
    my $val = $v;
    $key =~ s/^\s*(.*?)\s*$/$1/;	# remove surrounding space in keys
    $val =~ s/^\s*(.*?)\s*$/$1/;	# remove surrounding space in values
    
    $key = $typo_mapping_keys->{lc($key)} if (defined($typo_mapping_keys->{lc($key)}) && $typo_mapping_keys->{lc($key)} ne $k);

    my @newpairs = split(/#/,$typo_mapping_pairs->{lc($key."|".$val)});
    if ($newpairs[0] =~ /(.+)\|(.*)/) {
      $key = $1;
      $val = $2;
      shift @newpairs;
    }

    # delete keys with empty values
    if ($val eq "") {
      $clone=$work->clone() unless defined($clone);
      delete $clone->{"tags"}->{$k};
      $resultstr .= sprintf " - deleting tag '%s=%s'", $k, $v,
      $work=$clone;
    } elsif ($k ne $key && $v ne $val) {
      $clone=$work->clone() unless defined($clone);
      delete $clone->{"tags"}->{$k};
      $clone->{"tags"}->{$key} = $val;
      $resultstr .= sprintf " - modifying tag '%s=%s' to '%s=%s'", $k, $v, $key, $val;
      $work=$clone;
    } elsif ($k ne $key) {
      $clone=$work->clone() unless defined($clone);
      delete $clone->{"tags"}->{$k};
      $clone->{"tags"}->{$key} = $v;
      $resultstr .= sprintf " - modifying key '%s' to '%s' value '%s'", $k, $key, $v;
      $work=$clone;
    } elsif ($v ne $val) {
      $clone=$work->clone() unless defined($clone);
      $clone->{"tags"}->{$k} = $val;
      $resultstr .= sprintf " - key '%s' modifying value '%s' to '%s'", $k, $v, $val;
      $work=$clone;
    }

    while (@newpairs) {
      if ($newpairs[0] =~ /(.+)\|(.+)/) {
        $key = $1;
        $val = $2;
        shift @newpairs;
        $h = $work->{"tags"}->{$key};
        if (defined($h) && $h ne $val) {
          $clone=$work->clone() unless defined($clone);
          $clone->{"tags"}->{$key} = $val;
          $resultstr .= sprintf " - key '%s' modifying value '%s' to '%s'", $k, $h, $val;
          $work=$clone;
        } elsif (!defined($h)) {
          $clone=$work->clone() unless defined($clone);
          $clone->{"tags"}->{$key} = $val;
          $resultstr .= sprintf " - adding tag '%s=%s'", $key, $val;
          $work=$clone;
        }
      }
    }
  }
    
  my $h=$work->{"tags"}->{"addr:country"};
  if (defined($h)) {
    if ($h=~/^(AD|AE|AF|AG|AI|AL|AM|AN|AO|AQ|AR|AS|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BJ|BL|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|CR|CU|CV|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EE|EG|EH|ER|ES|ET|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|IO|IQ|IR|IS|IT|JE|JM|JO|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MF|MG|MH|MK|ML|MM|MN|MO|MP|MQ|MR|MS|MT|MU|MV|MW|MX|MY|MZ|NA|NC|NE|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SV|SY|SZ|TC|TD|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TR|TT|TV|TW|TZ|UA|UG|UM|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|YE|YT|ZA|ZM|ZW)$/i) {
      if ($h ne uc($h)) {
        $clone=$work->clone() unless defined($clone);
        $clone->{"tags"}->{"addr:country"} = uc($h);
        $resultstr .= sprintf " - key 'addr:country' modifying value '%s' to '%s'", $h, uc($h);
        $work=$clone;
      }
    } else {
      $resultstr .= sprintf " - key 'addr:country' has illegal value '%s'", $h;
    }
  }