823555550e19e556ca7f3ac9ebcc9793f0c39668 galt Fri Apr 25 19:56:46 2025 -0700 Since it was confusing not to have the end included in the range, fixed binary search to specify flag if you want the beginning or the end of the range. THis allows the natural expression of day 0 instead of -1. diff --git src/hg/hgcentralTidy/hgcentralTidy.c src/hg/hgcentralTidy/hgcentralTidy.c index 6cfa1f782bb..f2416499406 100644 --- src/hg/hgcentralTidy/hgcentralTidy.c +++ src/hg/hgcentralTidy/hgcentralTidy.c @@ -290,57 +290,69 @@ verbose(3, "sleeping %d seconds\n", chunkWait);fflush(stderr); sleep(chunkWait); verbose(3, "awake\n");fflush(stderr); } verbose(1, "old recs %s deleted %d, robot recs %s deleted %d\n", optionExists("skipDel")?"would have been":"", oldRecCount, optionExists("skipDel")?"would have been":"", delRobotCount);fflush(stderr); time_t cleanEnd = time(NULL); int minutes = difftime(cleanEnd, cleanSectionStart) / 60; verbose(1, "%s\n", ctime(&cleanEnd)); verbose(1, "%d minutes\n\n", minutes); } -int binaryIdSearch(unsigned int *ids, int numIds, char *table, int daysAgo) +int binaryIdSearch(unsigned int *ids, int numIds, char *table, int daysAgo, boolean endSearch) /* Find the array index in ids which holds the id that contains * the oldest record satisfying the daysAgo criterion. * If not found, return -1 */ { char query[256]; int a = 0; int b = numIds - 1; int m = 0; while (TRUE) { if (a > b) - return a; // is this right? + { + if (endSearch) + return b; + else + return a; + } m = (b + a) / 2; //verbose(1,"bin a=%d, b=%d, m=%d\n", a, b, m); while (TRUE) { sqlSafef(query, sizeof(query), "select firstUse from %s where id=%u", table, ids[m]); char *firstUse = sqlQuickString(conn,query); if (firstUse) { int daysAgoFirstUse = toDaysAgo(firstUse, ids[m]); //verbose(1, "DEBUG: %d %d %s %d\n", m, ids[m], firstUse, daysAgoFirstUse); // DEBUG REMOVE if (daysAgoFirstUse > daysAgo) { a = m + 1; } + else if (daysAgoFirstUse == daysAgo) + { + if (endSearch) + a = m + 1; + else + b = m - 1; + } else { b = m - 1; } break; } else // rare event: record not found, was it deleted? { errAbort("hgcentralTidy: unexpected error in binaryIdSearch() id %u not found in table %s", ids[m], table); } } } } @@ -385,91 +397,84 @@ sqlFreeResult(&sr); totalRows = i; // in case they differed. int purgeRangeStart = -1; int purgeRangeEnd = -1; if (optionExists("purgeStart")) // manual purge range specified { purgeStart = optionInt("purgeStart", -1); purgeEnd = optionInt("purgeEnd", -1); if (purgeStart < 1 || purgeStart > 720) errAbort("Invalid purgeStart"); if (purgeEnd < 0) purgeEnd = 0; if (purgeStart < purgeEnd) errAbort("purgeStart should be greater than purgeEnd (in days ago)"); - purgeRangeStart = binaryIdSearch(ids, totalRows, table, purgeStart); - purgeRangeEnd = binaryIdSearch(ids, totalRows, table, purgeEnd); + purgeRangeStart = binaryIdSearch(ids, totalRows, table, purgeStart, FALSE); + purgeRangeEnd = binaryIdSearch(ids, totalRows, table, purgeEnd, TRUE); verbose(1, "manual purge range: purgeStart %d purgeEnd %d rangeStart %d rangeEnd %d rangeSize=%d ids[rs]=%u\n", purgeStart, purgeEnd, purgeRangeStart, purgeRangeEnd, purgeRangeEnd-purgeRangeStart, ids[purgeRangeStart]); if (!optionExists("dryRun")) cleanTableSection(table, ids[purgeRangeStart], ids[purgeRangeEnd]); } else // figure out purge-ranges automatically { int firstUseAge = 0; if (sameString(table, sessionDbTableName)) firstUseAge = 14; if (sameString(table, userDbTableName)) firstUseAge = 365; sqlSafef(query,sizeof(query), "select dayofweek(now())"); int day = sqlQuickNum(conn, query); // These old records take a long time to go through, 5k sessionDb to 55k userDb old recs to look at, // and typically produce only a few hundred deletions. // they are growing slowly and expire rarely, so we don't need to scan them // frequently and aggressively. So ONLY scan them once per week by doing 1/7 per day. // Also don't need to worry much about the // borders of the split-over-7-days divisions shifting much because the set is so nearly static. YAWN. - int firstUseIndex = binaryIdSearch(ids, totalRows, table, firstUseAge); + int firstUseIndex = binaryIdSearch(ids, totalRows, table, firstUseAge, FALSE); int oldRangeSize = (firstUseIndex - 0) / 7; int oldRangeStart = oldRangeSize * (day-1); int oldRangeEnd = oldRangeStart + oldRangeSize; verbose(1, "old cleaner: firstUseAge=%d firstUseIndex = %d day %d: rangeStart %d rangeEnd %d rangeSize=%d ids[oldRangeStart]=%u\n", firstUseAge, firstUseIndex, day, oldRangeStart, oldRangeEnd, oldRangeEnd-oldRangeStart, ids[oldRangeStart]); - //int oldRangeStart = 0; - //int oldRangeEnd = firstUseIndex; - //verbose(1, "old cleaner: firstUseAge=%d firstUseIndex = %d rangeStart %d rangeEnd %d rangeSize=%d ids[firstUseIndex]=%u\n", - //firstUseAge, firstUseIndex, oldRangeStart, oldRangeEnd, oldRangeEnd-oldRangeStart, ids[firstUseIndex]); // newly old can be expected to have some delete action // these records have newly crossed the threshold into being old enough to have possibly expired. int newOldRangeStart = firstUseIndex; - int newOldRangeEnd = binaryIdSearch(ids, totalRows, table, firstUseAge - 1); + int newOldRangeEnd = binaryIdSearch(ids, totalRows, table, firstUseAge - 1, TRUE); verbose(1, "newOld cleaner: firstUseAge=%d rangeStart %d rangeEnd %d rangeSize=%d ids[newOldRangeStart]=%u\n", firstUseAge, newOldRangeStart, newOldRangeEnd, newOldRangeEnd-newOldRangeStart, ids[newOldRangeStart]); // this is the main delete action of cleaning out new robots (20k to 50k or more) - int robo1RangeStart = binaryIdSearch(ids, totalRows, table, 2); - int robo1RangeEnd = binaryIdSearch(ids, totalRows, table, -1); - // Because the end is not actually included in the cleaning range, so to get all rows, need -1 so 0 is fully included. - if (robo1RangeEnd > totalRows - 1) // do not go off the end of the array. - robo1RangeEnd = totalRows - 1; - verbose(1, "robot cleaner1: twoDayIndex = %d minusOneDayIndex %d rangeSize=%d ids[rs]=%u\n", + int robo1RangeStart = binaryIdSearch(ids, totalRows, table, 2, FALSE); + int robo1RangeEnd = binaryIdSearch(ids, totalRows, table, 0, TRUE); + verbose(1, "robot cleaner1: twoDayIndex = %d zeroDayIndex %d rangeSize=%d ids[rs]=%u\n", robo1RangeStart, robo1RangeEnd, robo1RangeEnd-robo1RangeStart, ids[robo1RangeStart]); int robo2RangeStart = -1; int robo2RangeEnd = -1; if (sameString(table, userDbTableName)) { // secondary robot cleaning only for userDb., produces a somewhat lesser, perhaps 3 to 5k deletions - robo2RangeStart = binaryIdSearch(ids, totalRows, table, 7); - robo2RangeEnd = binaryIdSearch(ids, totalRows, table, 6); + robo2RangeStart = binaryIdSearch(ids, totalRows, table, 7, FALSE); + robo2RangeEnd = binaryIdSearch(ids, totalRows, table, 6, TRUE); verbose(1, "robot cleaner2: sevenDayIndex = %d sixDayIndex %d rangeSize=%d ids[rs]=%u\n", robo2RangeStart, robo2RangeEnd, robo2RangeEnd-robo2RangeStart, ids[robo2RangeStart]); } /* cannot clean until we have all the ranges determined since deleting messes up binSearch */ if (!optionExists("dryRun")) { verbose(1, "old cleaner:\n"); cleanTableSection(table, ids[oldRangeStart], ids[oldRangeEnd]); } if (!optionExists("dryRun")) { verbose(1, "newOld cleaner:\n"); cleanTableSection(table, ids[newOldRangeStart], ids[newOldRangeEnd]); @@ -481,39 +486,39 @@ cleanTableSection(table, ids[robo1RangeStart], ids[robo1RangeEnd]); } if (sameString(table, userDbTableName)) { if (!optionExists("dryRun")) { verbose(1, "robot cleaner2:\n"); cleanTableSection(table, ids[robo2RangeStart], ids[robo2RangeEnd]); } } } /* -int found = binaryIdSearch(ids, totalRows, table, 1); +int found = binaryIdSearch(ids, totalRows, table, 1, FALSE); if ((found >= 0) && (found < totalRows)) verbose(1, "1 days ago found = %d, id == ids[found] = %u \n", found, ids[found]); -found = binaryIdSearch(ids, totalRows, table, 2); +found = binaryIdSearch(ids, totalRows, table, 2, FALSE); if ((found >= 0) && (found < totalRows)) verbose(1, "2 days ago found = %d, id == ids[found] = %u \n", found, ids[found]); -found = binaryIdSearch(ids, totalRows, table, 30); +found = binaryIdSearch(ids, totalRows, table, 30, FALSE); if ((found >= 0) && (found < totalRows)) verbose(1, "30 days ago found = %d, id == ids[found] = %u \n", found, ids[found]); */ /* if (daysAgoFirstUse < 14) { hitEnd = TRUE; break; } */ /*