bb8470a90643b057df7bbf5d0140a5b2bbf9dddd braney Fri Jun 12 08:44:37 2026 -0700 quickLift: load genePreds by column name, not a fixed 15-col loader Lifting a genePred track to another assembly used genePredExtLoad15, a positional loader that assumes the extended genePred columns. Classic knownGene-format tables instead carry proteinID and alignID after the ten core columns, so the loader read proteinID as the integer score field and aborted with "invalid signed integer" (e.g. "O54946", or "" when empty). This showed up when lifting from an assembly whose knownGene is the legacy format, such as mm10 to mm39 or rheMac10. Add quickLiftGenePreds(), which loads through genePredReader so the actual set of columns in the table is honored by name (proteinID maps to name2, score defaults), matching how the tracks load when not lifted. The three call sites that hardcoded genePredExtLoad15 (hgTracks gene loading and two hgc detail handlers) now use it. The chain-walking shared with quickLiftSql is factored into quickLiftLoadChains() and quickLiftChainQueryRange(). refs #37535 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> diff --git src/hg/lib/quickLift.c src/hg/lib/quickLift.c index e7045c9add4..b91c370206d 100644 --- src/hg/lib/quickLift.c +++ src/hg/lib/quickLift.c @@ -7,30 +7,31 @@ #include "limits.h" #include "float.h" #include "asParse.h" #include "chain.h" #include "binRange.h" #include "basicBed.h" #include "liftOver.h" #include "hash.h" #include "bigBed.h" #include "bbiFile.h" #include "chainNetDbLoad.h" #include "hdb.h" #include "jksql.h" #include "hgConfig.h" #include "quickLift.h" +#include "genePredReader.h" #include "bigChain.h" #include "bigLink.h" #include "chromAlias.h" #include "customTrack.h" struct bigBedInterval *quickLiftGetIntervals(char *quickLiftFile, struct bbiFile *bbi, char *chrom, int start, int end, struct hash **pChainHash) /* Return intervals from "other" species that will map to the current window. * These intervals are NOT YET MAPPED to the current assembly. */ { char *linkFileName = bigChainGetLinkFile(quickLiftFile); int maxGapBefore = 0; int maxGapAfter = 0; struct chain *chain, *chainList = chainLoadIdRangeHub(NULL, quickLiftFile, linkFileName, chrom, start, end, -1); struct lm *lm = lmInit(0); @@ -181,96 +182,145 @@ unsigned ret = 0; struct sqlConnection *conn = hConnectCentral(); char query[2048]; // this needs to use the hg.conf setting sqlSafef(query, sizeof(query), "select q.id from quickLiftChain q where q.fromDb='%s' and q.toDb='%s'", fromDb, toDb); char *geneId = sqlQuickString(conn, query); hDisconnectCentral(&conn); if (geneId) ret = atoi(geneId); return ret; } -struct slList *quickLiftSql(struct sqlConnection *conn, char *quickLiftFile, char *table, char *chrom, int start, int end, char *query, char *extraWhere, ItemLoader2 loader, int numFields,struct hash *chainHash) -// retrieve items for which we have a loader from a SQL database for which we have a set quickLift chains. -// Save the chains we used to map the item back to the current reference. +static struct chain *quickLiftLoadChains(char *quickLiftFile, char *chrom, int start, int end) +/* Load the chains from quickLiftFile that overlap a padded window around the + * destination range. */ { // need to add some padding to these coordinates int padStart = start - 100000; if (padStart < 0) padStart = 0; char *linkFileName = bigChainGetLinkFile(quickLiftFile); -struct chain *chain, *chainList = chainLoadIdRangeHub(NULL, quickLiftFile, linkFileName, chrom, padStart, end+100000, -1); - -struct slList *item, *itemList = NULL; -int rowOffset = 0; -struct sqlResult *sr = NULL; -char **row = NULL; +return chainLoadIdRangeHub(NULL, quickLiftFile, linkFileName, chrom, padStart, end+100000, -1); +} -for(chain = chainList; chain; chain = chain->next) +static void quickLiftChainQueryRange(struct chain *chain, int *retQStart, int *retQEnd) +/* Return the query-side ("other" species) coordinate range spanned by the + * aligned blocks of chain, corrected for query strand. chain->blockList must + * not be NULL. */ { - struct cBlock *cb; - cb = chain->blockList; - - if (cb == NULL) - continue; - +struct cBlock *cb = chain->blockList; int qStart = cb->qStart; int qEnd = cb->qEnd; - // get the range for the links on the "other" species for(; cb; cb = cb->next) { if (cb->qStart < qStart) qStart = cb->qStart; if (cb->qEnd > qEnd) qEnd = cb->qEnd; } // correct for strand if (chain->qStrand == '-') { int saveStart = qStart; qStart = chain->qSize - qEnd; qEnd = chain->qSize - saveStart; } +*retQStart = qStart; +*retQEnd = qEnd; +} + +struct slList *quickLiftSql(struct sqlConnection *conn, char *quickLiftFile, char *table, char *chrom, int start, int end, char *query, char *extraWhere, ItemLoader2 loader, int numFields,struct hash *chainHash) +// retrieve items for which we have a loader from a SQL database for which we have a set quickLift chains. +// Save the chains we used to map the item back to the current reference. +{ +struct chain *chain, *chainList = quickLiftLoadChains(quickLiftFile, chrom, start, end); + +struct slList *item, *itemList = NULL; +int rowOffset = 0; +struct sqlResult *sr = NULL; +char **row = NULL; + +for(chain = chainList; chain; chain = chain->next) + { + if (chain->blockList == NULL) + continue; + + int qStart, qEnd; + quickLiftChainQueryRange(chain, &qStart, &qEnd); + // now grab the items if (query == NULL) sr = hRangeQuery(conn, table, chain->qName, qStart, qEnd, extraWhere, &rowOffset); else sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { item = loader(row + rowOffset, numFields); slAddHead(&itemList, item); } // now squirrel the swapped chains we used to use to make the retrieved items back to us chainSwap(chain); liftOverAddChainHash(chainHash, chain); } return itemList; } +struct genePred *quickLiftGenePreds(struct sqlConnection *conn, char *quickLiftFile, char *table, char *chrom, int start, int end, char *extraWhere, struct hash *chainHash) +// Like quickLiftSql, but load genePreds with a genePredReader so the actual set +// of (extended) genePred columns in the table is honored. A fixed 15-column +// loader misreads classic knownGene-style tables, whose trailing proteinID and +// alignID columns are not extended genePred fields. +{ +struct chain *chain, *chainList = quickLiftLoadChains(quickLiftFile, chrom, start, end); + +struct genePred *gpList = NULL; + +for(chain = chainList; chain; chain = chain->next) + { + if (chain->blockList == NULL) + continue; + + int qStart, qEnd; + quickLiftChainQueryRange(chain, &qStart, &qEnd); + + struct genePredReader *gpr = genePredReaderRangeQuery(conn, table, chain->qName, + qStart, qEnd, extraWhere); + struct genePred *gp; + while ((gp = genePredReaderNext(gpr)) != NULL) + slAddHead(&gpList, gp); + genePredReaderFree(&gpr); + + // now squirrel the swapped chains we used to use to map the retrieved items back to us + chainSwap(chain); + liftOverAddChainHash(chainHash, chain); + } + +return gpList; +} + struct bed *quickLiftBeds(struct bed *bedList, struct hash *chainHash, boolean blocked) // Map a list of bedd in query coordinates to our current reference { struct bed *liftedBedList = NULL; struct bed *nextBed; struct bed *bed; for(bed = bedList; bed; bed = nextBed) { // remapBlockedBed may want to add new beds after this bed if the region maps to more than one location nextBed = bed->next; bed->next = NULL; char *error; if (!blocked) {