ad04145791b59754c6a78f4dac94efdf92ede89d galt Wed Jun 19 15:29:48 2019 -0700 Updated geoIp mapping for our 3 official mirrors - RR, euro, asia with much better quality data from MaxMind. diff --git src/hg/geoIp/README.v3 src/hg/geoIp/README.v3 new file mode 100644 index 0000000..fffd81a --- /dev/null +++ src/hg/geoIp/README.v3 @@ -0,0 +1,190 @@ + +Steps to upgrade geoIp to use MaxMind free db. 2019-06-18 + +MaxMind has a free country to IP database. + +It is more complete than the software77.net free country-to-IP db that we used previously. + +These are the steps to upgrade the IPV4 geoIp system on RR and official mirrors. +One of the problems earlier was that the software77 was not very up-to-date +and did not have a bunch of new IPs that were allocated for china and other places in asia. +This upgrade should be fairly easy. + +Following a mixure of steps from the previous READMEs. + +cd $HOME/kent/src/hg/geoIp + +mkdir update2019 +cd update2019 + +wget 'https://geolite.maxmind.com/download/geoip/database/GeoLite2-Country-CSV.zip' +unzip GeoLite2-Country-CSV.zip +cd GeoLite2-Country-CSV_20190611/ + +geoIpToCountryMaxMind GeoLite2-Country-Blocks-IPv4.csv > geoIpCountry.tab +cp geoIpCountry.tab ../../ +cd ../.. + +# keep backup comparison copy in geoIpOld database +./save-old-for-compare + +# update .sql and .c just in case +./make-sql + +# reload the 8 tables +# tweaked script to drop existing table first +./load-tables + +# output says these 3 are not found. +'continentToNode.tab' not found +'countryToNode.tab' not found +'geoIpNode.tab' not found + +They should be created by other queries. + +hgsql hgFixed + +forgot to strip off the 1st row of the csv file. +Cleanup left-over header row. +delete from countryToContinent where continentId = '"continent code"'; + +The European Commission and many other organisations + are using 'XK' as a temporary country code for Kosovo till ISO officially assigns a code. +GeoNames will switch to the official ISO code as soon as it has been released. +insert into country values ('XK', 'Kosovo'); +insert into countryToContinent values ('XK', 'EU'); + +mysql> select * from countryToContinent where countryId not in (select id from country); ++-----------+-------------+ +| countryId | continentId | ++-----------+-------------+ +| AN | NA | +| FX | EU | ++-----------+-------------+ + +AN Netherlands Antilles + +insert into country values ('AN', 'Netherlands Antilles'); + +insert into country values ('FX', 'France, Metropolitan'); + +mysql> select * from countryToContinent where continentId not in (select id from continent); ++-----------+-------------+ +| countryId | continentId | ++-----------+-------------+ +| A1 | -- | +| A2 | -- | +| O1 | -- | ++-----------+-------------+ + +insert into continent values ('--','not applicable'); + +mysql> select count(*) from geoIpCountry where countryId not in (select id from country); ++----------+ +| count(*) | ++----------+ +| 0 | ++----------+ + + + +-- + +start off with crude continent-level assignments: + +make everything 1 by default + +insert into continentToNode select id, 1 from continent; + +Europe +update continentToNode set node=2 where continentId = "EU"; + +Asia +update continentToNode set node=3 where continentId = "AS"; + +Now we just can make a join + +insert into countryToNode (select countryId, node from countryToContinent cc, continentToNode cn where cc.continentId = cn.continentId); + +# fix ones requested by Bob to be included in euro instead of defaulting to america. +# these are mostly around the middle east. +update countryToNode set node=2 where countryId in ('AE','AM','BH','CY','GE','IL','IQ','JO','KW','LB','OM','PS','QA','SA','SY','YE'); + +I compared the before and after updating with public_html/geoMap/test.csh at + http://hgwdev.soe.ucsc.edu/~galt/geoMap/countryToNodeMap.html +And it looked identical. So we did not lose anything. + + +mysql> select distinct countryId from geoIpCountry where countryId not in (select distinct countryId from countryToNode); ++-----------+ +| countryId | ++-----------+ +| SS | +| CW | +| SX | +| BQ | ++-----------+ + +mysql> select * from country where id in ('SS','CW','SX','BQ'); ++----+--------------+ +| id | name | ++----+--------------+ +| BQ | Bonaire | +| CW | Curacao | +| SS | South Sudan | +| SX | Sint Maarten | ++----+--------------+ + + +So, what happened to these countries, and why are they not in countryToNode? +Shall we add them? + +insert into countryToContinent values ('SS', 'AF'); +insert into countryToContinent values ('SX', 'NA'); +insert into countryToContinent values ('CW', 'NA'); +insert into countryToContinent values ('BQ', 'NA'); + +insert into countryToNode values ('SS', '1'); +insert into countryToNode values ('SX', '1'); +insert into countryToNode values ('CW', '1'); +insert into countryToNode values ('BQ', '1'); + + + +mysql> select distinct countryId from geoIpCountry where countryId not in (select distinct countryId from countryToNode); +Empty set (0.13 sec) + +Re-ran test.csh and checked again: +http://hgwdev.soe.ucsc.edu/~galt/geoMap/countryToNodeMap.html + +This means we should have full integrity. + +insert into geoIpNode (select ipStart, ipEnd, node from geoIpCountry gic, countryToNode ctn where gic.countryId=ctn.countryId); +Query OK, 326637 rows affected (1.49 sec) +Records: 326637 Duplicates: 0 Warnings: 0 + +Yay! + +TODO at the end: +rename tables to real: +gbNodeReal +geoIpNodeReal + + +Make an extra backup of just the git untracked files just in case: + +mkdir /hive/data/outside/geoIp/untracked/2019-06-18 +gitu | xargs -I X cp X /hive/data/outside/geoIp/untracked/2019-06-18/ + + +Make a backup of the data too: + +mkdir /hive/data/outside/geoIp/geoIpTableDumps/2019-06-18 +# sadly we need to temporarily make it write-able by all so mysqld can write dump there. +chmod 777 /hive/data/outside/geoIp/geoIpTableDumps/2019-06-18 +pushd /hive/data/outside/geoIp/geoIpTableDumps +hgsqldump hgFixed -T 2019-06-18 continent continentToNode country countryToContinent countryToNode gbNode geoIpCountry geoIpNode +popd +chmod 775 /hive/data/outside/geoIp/geoIpTableDumps/2019-06-18 + +