7c23ae36017c35ef4f20b3725454c71100860c46 max Thu Apr 24 07:04:20 2025 -0700 making sure that robo1RangeEnd includes today, so the cleaner will touch rows from today, refs #35554 diff --git src/hg/hgcentralTidy/hgcentralTidy.c src/hg/hgcentralTidy/hgcentralTidy.c index c1520103a0d..5532ec3a659 100644 --- src/hg/hgcentralTidy/hgcentralTidy.c +++ src/hg/hgcentralTidy/hgcentralTidy.c @@ -25,31 +25,30 @@ struct sqlConnection *conn = NULL; int chunkSize = 1000; int chunkWait = 0; int squealSize = 20; /* complain if table data_length is bigger than squealSize GB */ // was 14 until 2018-06-02 int purgeStart = -1; /* manual specify purge range in days ago */ int purgeEnd = -1; char *purgeTable = NULL; /* optionally specify one table to purge */ char *sessionDbTableName = "sessionDb"; char *userDbTableName = "userDb"; - void usage() /* Explain usage and exit. */ { errAbort( "hgcentralTidy - Clean out old carts in hgcentral without blocking cart use\n" "usage:\n" " hgcentralTidy config\n" "options:\n" " -chunkSize=N - number of rows to examine in one chunk, default %d\n" " -chunkWait=N - sleep interval between chunks to allow other processing, default %d'\n" " -squealSize=N - email warning to cluster-admin when this size in GB is exceeded, default %d'\n" " -purgeStart=N - purge range starts N days ago'\n" " -purgeEnd=N - purge range end N days ago'\n" " -purgeTable=tableName - optional purge table must be userDb or sessionDb. If not specified, both tables are purged.'\n" " -dryRun - option that causes it to skip the call to cleanTableSection.'\n" @@ -416,48 +415,49 @@ sqlSafef(query,sizeof(query), "select dayofweek(now())"); int day = sqlQuickNum(conn, query); // These old records take a long time to go through, 5k sessionDb to 55k userDb old recs to look at, // and typically produce only a few hundred deletions. // they are growing slowly and expire rarely, so we don't need to scan them // frequently and aggressively. So ONLY scan them once per week by doing 1/7 per day. // Also don't need to worry much about the // borders of the split-over-7-days divisions shifting much because the set is so nearly static. YAWN. int firstUseIndex = binaryIdSearch(ids, totalRows, table, firstUseAge); int oldRangeSize = (firstUseIndex - 0) / 7; int oldRangeStart = oldRangeSize * (day-1); int oldRangeEnd = oldRangeStart + oldRangeSize; + verbose(1, "old cleaner: firstUseAge=%d firstUseIndex = %d day %d: rangeStart %d rangeEnd %d rangeSize=%d ids[oldRangeStart]=%u\n", firstUseAge, firstUseIndex, day, oldRangeStart, oldRangeEnd, oldRangeEnd-oldRangeStart, ids[oldRangeStart]); //int oldRangeStart = 0; //int oldRangeEnd = firstUseIndex; //verbose(1, "old cleaner: firstUseAge=%d firstUseIndex = %d rangeStart %d rangeEnd %d rangeSize=%d ids[firstUseIndex]=%u\n", //firstUseAge, firstUseIndex, oldRangeStart, oldRangeEnd, oldRangeEnd-oldRangeStart, ids[firstUseIndex]); // newly old can be expected to have some delete action // these records have newly crossed the threshold into being old enough to have possibly expired. int newOldRangeStart = firstUseIndex; int newOldRangeEnd = binaryIdSearch(ids, totalRows, table, firstUseAge - 1); verbose(1, "newOld cleaner: firstUseAge=%d rangeStart %d rangeEnd %d rangeSize=%d ids[newOldRangeStart]=%u\n", firstUseAge, newOldRangeStart, newOldRangeEnd, newOldRangeEnd-newOldRangeStart, ids[newOldRangeStart]); // this is the main delete action of cleaning out new robots (20k to 50k or more) int robo1RangeStart = binaryIdSearch(ids, totalRows, table, 2); - int robo1RangeEnd = binaryIdSearch(ids, totalRows, table, 1); + int robo1RangeEnd = binaryIdSearch(ids, totalRows, table, 0); verbose(1, "robot cleaner1: twoDayIndex = %d oneDayIndex %d rangeSize=%d ids[rs]=%u\n", robo1RangeStart, robo1RangeEnd, robo1RangeEnd-robo1RangeStart, ids[robo1RangeStart]); int robo2RangeStart = -1; int robo2RangeEnd = -1; if (sameString(table, userDbTableName)) { // secondary robot cleaning only for userDb., produces a somewhat lesser, perhaps 3 to 5k deletions robo2RangeStart = binaryIdSearch(ids, totalRows, table, 7); robo2RangeEnd = binaryIdSearch(ids, totalRows, table, 6); verbose(1, "robot cleaner2: sevenDayIndex = %d sixDayIndex %d rangeSize=%d ids[rs]=%u\n", robo2RangeStart, robo2RangeEnd, robo2RangeEnd-robo2RangeStart, ids[robo2RangeStart]); } /* cannot clean until we have all the ranges determined since deleting messes up binSearch */ if (!optionExists("dryRun")) @@ -540,55 +540,51 @@ { boolean squealed = FALSE; /* get connection info */ database = getCfgOption(config, "db" ); host = getCfgOption(config, "host" ); user = getCfgOption(config, "user" ); password = getCfgOption(config, "password"); conn = sqlConnectRemote(host, user, password, database); verbose(1, "Cleaning database %s.%s\n", host, database); verbose(1, "chunkWait=%d chunkSize=%d\n", chunkWait, chunkSize); -//sessionDbTableName = "sessionDbGalt"; - -//userDbTableName = "userDbGalt"; - if (!purgeTable || sameString(purgeTable,sessionDbTableName)) { if (cleanTable(sessionDbTableName)) squealed = TRUE; } if (!purgeTable || sameString(purgeTable,userDbTableName)) { if (cleanTable(userDbTableName)) squealed = TRUE; } sqlDisconnect(&conn); return squealed; } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); chunkSize = optionInt("chunkSize", chunkSize); chunkWait = optionInt("chunkWait", chunkWait); squealSize = optionInt("squealSize", squealSize); if (optionExists("purgeTable")) { purgeTable = optionVal("purgeTable", NULL); - if (!sameString(purgeTable,"sessionDb") && !sameString(purgeTable,"userDb")) + if (!sameString(purgeTable,userDbTableName) && !sameString(purgeTable,sessionDbTableName)) errAbort("Invalid value for purgeTable option, must be userDb or sessionDb or leave option off for both."); } if (argc != 2) usage(); return hgcentralTidy(argv[1]); }