User FixKarlsruheSchema:Xybot
This is the Ruleset FixKarlsruheSchema of the Xybot script
<source lang=perl> sub begin {
my $me = shift; Data::Primitive->setCreator("FixKarlsruheSchema"); Data::Primitive->setComment('Fixing typo errors in keys of the Karlsruhe Schema');
}
my $typo_mapping_keys = { "add:city" => "addr:city", "addr4:city" => "addr:city", "addr city" => "addr:city", "addr_city" => "addr:city", "addr:city" => "addr:city", "addr.city" => "addr:city", "áddr:city" => "addr:city", "addr:cyti" => "addr:city", "addr:town" => "addr:city", "adr:city" => "addr:city", "addres" => "address", "adress" => "address", "add:full" => "addr:full", "addr:ful" => "addr:full", "addr full" => "addr:full", "addr_full" => "addr:full", "addr:full" => "addr:full", "addr.full" => "addr:full", "adr:full" => "addr:full", "addr:building_name" => "addr:housename", "-addr:housenumber" => "addr:housenumber", "addr:housenumbers" => "addr:housenumber", "add:housenumber" => "addr:housenumber", "addr:addr:housenumber" => "addr:housenumber", "addr:hausenumber" => "addr:housenumber", "addr:hausnummer" => "addr:housenumber", "addr:houesenumber" => "addr:housenumber", "addr:houesenumber" => "addr:housenumber", "addr:hous" => "addr:housenumber", "addr:house" => "addr:housenumber", "addr:house" => "addr:housenumber", "addr:housennumber" => "addr:housenumber", "addr:houseno" => "addr:housenumber", "addr housenumber" => "addr:housenumber", "addr_housenumber" => "addr:housenumber", "addr:housenumber:" => "addr:housenumber", "addr:housenumber" => "addr:housenumber", "addr: housenumber" => "addr:housenumber", "addr.housenumber" => "addr:housenumber", "addr:housenumberg" => "addr:housenumber", "addr:housenumberkey" => "addr:housenumber", "addr:housenumer" => "addr:housenumber", "addr:housenumer" => "addr:housenumber", "addr:housenummer" => "addr:housenumber", "addr:housenumner" => "addr:housenumber", "addr:houseumber" => "addr:housenumber", "addr:housnumber" => "addr:housenumber", "addr:husenumber" => "addr:housenumber", "addr:number" => "addr:housenumber", "adr:hausnummer" => "addr:housenumber", "adr:housenumber" => "addr:housenumber", "adr:housnumber" => "addr:housenumber", "ddr:housenumber" => "addr:housenumber", "keyaddr:housenumber" => "addr:housenumber", "addr:housnumber" => "addr:housenumber", "addr:housenuber" => "addr:housenumber", "dr:housenumber" => "addr:housenumber", "addr:housember" => "addr:housenumber", "add:interpolation" => "addr:interpolation", "addr:interpolate" => "addr:interpolation", "addr interpolation" => "addr:interpolation", "addr_interpolation" => "addr:interpolation", "addr:interpolation" => "addr:interpolation", "addr.interpolation" => "addr:interpolation", "adr:interpolation" => "addr:interpolation", "addr:inter" => "addr:interpolation", "addr:interpolated" => "addr:interpolation", "-addr_postcode" => "addr:postcode", "add:postcode" => "addr:postcode", "addr:plz" => "addr:postcode", "addr:postal_code" => "addr:postcode", "addr:postalcode" => "addr:postcode", "addr:postcod" => "addr:postcode", "addr postcode" => "addr:postcode", "addr_postcode" => "addr:postcode", "addr:postcode" => "addr:postcode", "addr.postcode" => "addr:postcode", "addrpostcode" => "addr:postcode", "addr:postcodw" => "addr:postcode", "addr:zip" => "addr:postcode", "adr:postal_code" => "addr:postcode", "adr:postalcode" => "addr:postcode", "adr:postcode" => "addr:postcode", "adddr:postcode" => "addr:postcode", "addr:postode" => "addr:postcode", "-addr_street" => "addr:street", "addr:steet" => "addr:street", "addr street" => "addr:street", "addr_street" => "addr:street", "addr:street" => "addr:street", "addr.street" => "addr:street", "addr:streetname" => "addr:street", "addr:stret" => "addr:street", "add:street" => "addr:street", "adress_street" => "addr:street", "adrr:street" => "addr:street", "adr:street" => "addr:street", "addr:cstreet" => "addr:street", "addrr:stete" => "addr:street", "addr:avenue" => "addr:street", "-addr:country" => "addr:country", "addr:country" => "addr:country", "addr.country" => "addr:country", "addr_country" => "addr:country", "addr country" => "addr:country", "addr:counrty" => "addr:country", "add:country" => "addr:country", "adr:country" => "addr:country", "aaddr:country" => "addr:country", "addr:facsimile" => "addr:fax", "addr:telephon" => "addr:phone", "addr:telephone" => "addr:phone", "addr:tel" => "addr:phone", "addr:web" => "addr:website", "addr:www" => "addr:website", "addr:internet" => "addr:website", };
my $typo_mapping_pairs = { "addr:country|a" => "addr:country|AT", "addr:country|austria" => "addr:country|AT", "addr:country|austria, wien" => "addr:country|AT", "addr:country|österreich" => "addr:country|AT", "addr:country|australia" => "addr:country|AU", "addr:country|canada" => "addr:country|CA", "addr:country|belgium" => "addr:country|BE", "addr:country|switzerland" => "addr:country|CH", "addr:country|schweiz" => "addr:country|CH", "addr:country|chile" => "addr:country|CL", "addr:country|colombia" => "addr:country|CO", "addr:country|Čr" => "addr:country|CZ", "addr:country|czech republic" => "addr:country|CZ", "addr:country|България" => "addr:country|BG", "addr:country|d" => "addr:country|DE", "addr:country|deutschland" => "addr:country|DE", "addr:country|germany" => "addr:country|DE", "addr:country|nrw" => "addr:country|DE", "addr:country|niedersachsen" => "addr:country|DE", "addr:country|brandenburg" => "addr:country|DE", "addr:country|est" => "addr:country|EE", "addr:country|españa" => "addr:country|ES", "addr:country|f" => "addr:country|FR", "addr:country|france" => "addr:country|FR", "addr:country|scotland" => "addr:country|GB", "addr:country|uk" => "addr:country|GB", "addr:country|lithuania" => "addr:country|LT", "addr:country|lëtzebuerg" => "addr:country|LU", "addr:country|niederlande" => "addr:country|NL", "addr:country|the netherlands" => "addr:country|NL", "addr:country|polska" => "addr:country|PL", "addr:country|romania" => "addr:country|RO", "addr:country|Россия" => "addr:country|RU", "addr:country|russia" => "addr:country|RU", "addr:country|sweden" => "addr:country|SE", "addr:country|slovakia" => "addr:country|SK", "addr:country|УкраЇна" => "addr:country|UA", "addr:country|Україна" => "addr:country|UA", };
sub process {
my ($me, $obj) = @_;
my $resultstr = ""; my $msg = ""; my $work = $obj; my $clone = undef; my $k; my $v;
while (($k,$v) = each(%{$work->{"tags"}})) { my $key = $k; my $val = $v; $key =~ s/^\s*(.*?)\s*$/$1/; # remove surrounding space in keys $val =~ s/^\s*(.*?)\s*$/$1/; # remove surrounding space in values $key = $typo_mapping_keys->{lc($key)} if (defined($typo_mapping_keys->{lc($key)}) && $typo_mapping_keys->{lc($key)} ne $k);
my @newpairs = split(/#/,$typo_mapping_pairs->{lc($key."|".$val)}); if ($newpairs[0] =~ /(.+)\|(.*)/) { $key = $1; $val = $2; shift @newpairs; }
# delete keys with empty values if ($val eq "") { $clone=$work->clone() unless defined($clone); delete $clone->{"tags"}->{$k}; $resultstr .= sprintf " - deleting tag '%s=%s'", $k, $v, $work=$clone; } elsif ($k ne $key && $v ne $val) { $clone=$work->clone() unless defined($clone); delete $clone->{"tags"}->{$k}; $clone->{"tags"}->{$key} = $val; $resultstr .= sprintf " - modifying tag '%s=%s' to '%s=%s'", $k, $v, $key, $val; $work=$clone; } elsif ($k ne $key) { $clone=$work->clone() unless defined($clone); delete $clone->{"tags"}->{$k}; $clone->{"tags"}->{$key} = $v; $resultstr .= sprintf " - modifying key '%s' to '%s' value '%s'", $k, $key, $v; $work=$clone; } elsif ($v ne $val) { $clone=$work->clone() unless defined($clone); $clone->{"tags"}->{$k} = $val; $resultstr .= sprintf " - key '%s' modifying value '%s' to '%s'", $k, $v, $val; $work=$clone; }
while (@newpairs) { if ($newpairs[0] =~ /(.+)\|(.+)/) { $key = $1; $val = $2; shift @newpairs; $h = $work->{"tags"}->{$key}; if (defined($h) && $h ne $val) { $clone=$work->clone() unless defined($clone); $clone->{"tags"}->{$key} = $val; $resultstr .= sprintf " - key '%s' modifying value '%s' to '%s'", $k, $h, $val; $work=$clone; } elsif (!defined($h)) { $clone=$work->clone() unless defined($clone); $clone->{"tags"}->{$key} = $val; $resultstr .= sprintf " - adding tag '%s=%s'", $key, $val; $work=$clone; } } } } my $h=$work->{"tags"}->{"addr:country"}; if (defined($h)) { if ($h=~/^(AD|AE|AF|AG|AI|AL|AM|AN|AO|AQ|AR|AS|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BJ|BL|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|CR|CU|CV|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EE|EG|EH|ER|ES|ET|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|IO|IQ|IR|IS|IT|JE|JM|JO|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MF|MG|MH|MK|ML|MM|MN|MO|MP|MQ|MR|MS|MT|MU|MV|MW|MX|MY|MZ|NA|NC|NE|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SV|SY|SZ|TC|TD|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TR|TT|TV|TW|TZ|UA|UG|UM|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|YE|YT|ZA|ZM|ZW)$/i) { if ($h ne uc($h)) { $clone=$work->clone() unless defined($clone); $clone->{"tags"}->{"addr:country"} = uc($h); $resultstr .= sprintf " - key 'addr:country' modifying value '%s' to '%s'", $h, uc($h); $work=$clone; } } else { $resultstr .= sprintf " - key 'addr:country' has illegal value '%s'", $h; } }