bb2d391712fb0208347ffc0b88abe146df14dc0a galt Fri Jun 21 18:21:50 2019 -0700 Adding Support for CIDR specification of subnets, e.g. 192.168.1.255/31. It still supports the older subnet format too, e.g. 192.168 diff --git src/hg/geoIp/geoIpToCountryMaxMind.c src/hg/geoIp/geoIpToCountryMaxMind.c index 43fb6d7..175ca44 100644 --- src/hg/geoIp/geoIpToCountryMaxMind.c +++ src/hg/geoIp/geoIpToCountryMaxMind.c @@ -1,27 +1,30 @@ /* program geoIpToCountryMaxMind * by Galt Barber 2011-04-15 - * Read csv input geoip data and output format for use with genome-browser cgis + * Read csv input geoip data (free from MaxMind) and + * output geoIpCountry.tab to load into hgFixed for testing, + * and hgcentral for use with genome-browser cgis * to map user IP addresses to country-code. */ #include "common.h" #include "linefile.h" #include "options.h" #include "sqlNum.h" #include "hash.h" #include "obscure.h" #include "csv.h" +#include "net.h" #include "internet.h" #define MAXWORDS 1024 /* command line option specifications */ static struct optionSpec optionSpecs[] = { {"-help" , OPTION_BOOLEAN}, {NULL, 0} }; struct hash *locHash = NULL; void usage(char *p) /* display correct usage/syntax */ { @@ -81,87 +84,41 @@ else if (sameString(continentCode, "EU")) // asia { countryCode = "DE"; // fake some other country in that continent, germany } else errAbort("unexpected country code is empty string in line #%d", lineCount); } hashAdd(locHash, geoname_id, countryCode); } lineFileClose(&lf); } -bits32 internetPackIp(unsigned char unpacked[4]) -/* Convert from 4-byte format with most significant - * byte first to native 32-bit format. */ -{ -int i; -bits32 packed = 0; -for (i=0; i<=3; ++i) - { - packed <<= 8; - packed |= unpacked[i]; - } -return packed; -} - void parseCIDR(char *cidr, bits32 *pStartIp, bits32 *pEndIp) /* parse input CIDR format IP range (or subnet) */ { -char *s = cloneString(cidr); -char *c = strchr(s, '/'); -if (!c) - errAbort("expected slash char '/' in input cidr %s\n", cidr); -*c++ = 0; -char *ip = s; -unsigned int bits = sqlUnsigned(c); -//printf("ip=%s, bits=%d \n", ip, bits); // DEBUG REMOVE -unsigned char quadIp[4]; -internetParseDottedQuad(ip, quadIp); -//int i; -//for(i=0;i<4;++i) -// printf("ip[%d]=%d\n", i, quadIp[i]); // DEBUG REMOVE -bits32 packedIp = 0; -packedIp = internetPackIp(quadIp); // TODO should this go in the library internet.c? -//printf("packed32 bits=%u %08x\n", packedIp, packedIp); // DEBUG REMOVE -int r = 32 - bits; -bits32 start = packedIp & (((unsigned int) 0xFFFFFFFF) << r); -bits32 end; -// on this platform shr or shl 32 of a 32-bit value actually does nothing at all rather than turning it to 0s. -if (bits == 32) - end = packedIp; -else - end = packedIp | (((unsigned int) 0xFFFFFFFF) >> bits); -//printf("start=%u %08x\n", start, start); // DEBUG REMOVE -//printf("end =%u %08x\n", end, end ); // DEBUG REMOVE +struct cidr *subnet = internetParseSubnetCidr(cidr); -char startIpS[17]; -char endIpS[17]; -internetIpToDottedQuad(start, startIpS); -internetIpToDottedQuad(end, endIpS); - -//printf("dottedQuad start %s end %s\n", startIpS, endIpS); - -*pStartIp = start; -*pEndIp = end; +internetCidrRange(subnet, pStartIp, pEndIp); } + void geoIpToCountry(char *fileName) /* List each field in tab-separated file on a new line, dashed-lines separate records */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); int lineSize; char *line; char *words[MAXWORDS]; int wordCount; int lineCount = 0; while (lineFileNext(lf, &line, &lineSize)) { // input format // network,geoname_id,registered_country_geoname_id,represented_country_geoname_id,is_anonymous_proxy,is_satellite_provider @@ -170,53 +127,58 @@ // cannot use chopByString since the input has empty strings and they get skipped. wordCount = chopByChar(line, ',', words, MAXWORDS); if (wordCount != 6) errAbort("Invalid row found, wordCount = %d != 6", wordCount); if (lineCount++ == 0) // 1st line is a comment continue; // get network info char *network = words[0]; bits32 startIp, endIp; parseCIDR(network, &startIp, &endIp); + // Handy for debugging + //char startIpS[17]; + //char endIpS[17]; + //internetIpToDottedQuad(startIp, startIpS); + //internetIpToDottedQuad(endIp, endIpS); + //printf("dottedQuad start %s end %s\n", startIpS, endIpS); + // get country info char *geoname_id = words[1]; char *registered_country_geoname_id = words[2]; //printf("network %s ", network); //printf(" geoname_id %s\n", geoname_id); struct hashEl *el = hashLookup(locHash, geoname_id); if (!el) { el = hashLookup(locHash, registered_country_geoname_id); if (!el) { warn("%s missing %s and %s in location lookup", network, geoname_id, registered_country_geoname_id); continue; } } char *countryCode = el->val; - //if (!sameString(countryCode, "ZZ")) // Filter out Reserved Ip ranges printf("%u\t%u\t%s\n", startIp, endIp, countryCode); - //printf("----------------------------------------\n"); } lineFileClose(&lf); } int main (int argc, char *argv[]) { char *fileName="stdin"; optionInit(&argc, argv, optionSpecs); if ((argc != 2) || optionExists("-help")) usage(argv[0]); fileName=argv[1]; readLocations();