e0fa0d154d7c8a3068bdfcd0668ca09b2f57a464 hiram Fri Aug 22 18:33:24 2025 -0700 initial prototype for a liftOver end point - need to get some of the hgConvert code into a library - refs #35976 diff --git src/hg/hubApi/liftOver.c src/hg/hubApi/liftOver.c new file mode 100644 index 00000000000..4d8c54fd774 --- /dev/null +++ src/hg/hubApi/liftOver.c @@ -0,0 +1,257 @@ +/* liftOver functions */ + +#include "dataApi.h" +#include "hgFind.h" +#include "cartTrackDb.h" +#include "cartJson.h" +#include "genark.h" +#include "asmAlias.h" +#include "assemblyList.h" +#include "liftOver.h" +#include "net.h" + +/**** SHOULD BE IN LIBRARY - code from hgConvert.c ******/ +static long chainTotalBlockSize(struct chain *chain) +/* Return sum of sizes of all blocks in chain */ +{ +struct cBlock *block; +long total = 0; +for (block = chain->blockList; block != NULL; block = block->next) + total += block->tEnd - block->tStart; +return total; +} +/**** SHOULD BE IN LIBRARY - code from hgConvert.c ******/ + +static void chainListOut(char *fromDb, char* toDb, int origSize, char *fromPos, struct chain *chainList) +/* given the list of chains, output the list in JSON */ +{ +struct chain *chain = NULL; +char position[4096]; +char coverage[4096]; +struct jsonWrite *jw = apiStartOutput(); +jsonWriteListStart(jw, "from"); +jsonWriteListStart(jw, NULL); +jsonWriteString(jw, NULL, fromDb); +jsonWriteString(jw, NULL, fromPos); +jsonWriteListEnd(jw); +jsonWriteListEnd(jw); +jsonWriteListStart(jw, "to"); +for (chain = chainList; chain != NULL; chain = chain->next) + { + int blockSize; + int qStart, qEnd; + if (chain->qStrand == '-') + { + qStart = chain->qSize - chain->qEnd; + qEnd = chain->qSize - chain->qStart; + } + else + { + qStart = chain->qStart; + qEnd = chain->qEnd; + } + blockSize = chainTotalBlockSize(chain); + safef(position, sizeof(position), "%s:%d-%d", chain->qName, qStart+1, qEnd); + safef(coverage, sizeof(coverage), "%3.1f%% of bases, %3.1f%% of span", + 100.0 * blockSize / origSize, + 100.0 * (chain->tEnd - chain->tStart) / origSize); + jsonWriteListStart(jw, NULL); + jsonWriteString(jw, NULL, toDb); + jsonWriteString(jw, NULL, position); + jsonWriteString(jw, NULL, coverage); + jsonWriteListEnd(jw); + } +jsonWriteListEnd(jw); +apiFinishOutput(0, NULL, jw); +} + +static void listExisting() +/* output the fromDb,toDb from liftOverChain.hgcentral SQL table */ +{ +long long itemCount = 0; +char *filter = cgiOptionalString(argFilter); + +struct sqlConnection *conn = hConnectCentral(); +char *tableName = cloneString("liftOverChain"); +char query[1024]; +sqlSafef(query, sizeof(query), "SELECT count(*) FROM %s", tableName); +long long totalRows = sqlQuickLongLong(conn, query); + +if (isEmpty(filter)) + { + sqlSafef(query, sizeof(query), "SELECT fromDb,toDb FROM %s LIMIT %d", tableName, maxItemsOutput); + } +else + { + sqlSafef(query, sizeof(query), "SELECT fromDb,toDb FROM %s WHERE LOWER(fromDb) = LOWER('%s') OR LOWER(toDb) = LOWER('%s') LIMIT %d;", tableName, filter, filter, maxItemsOutput); + } + +char *dataTime = sqlTableUpdate(conn, tableName); +time_t dataTimeStamp = sqlDateToUnixTime(dataTime); +replaceChar(dataTime, ' ', 'T'); /* ISO 8601 */ +struct jsonWrite *jw = apiStartOutput(); +jsonWriteString(jw, "dataTime", dataTime); +jsonWriteNumber(jw, "dataTimeStamp", (long long)dataTimeStamp); + +jsonWriteListStart(jw, "existingLiftOvers"); +struct sqlResult *sr = sqlGetResult(conn, query); +char **row; +while ((row = sqlNextRow(sr)) != NULL) + { + ++itemCount; + jsonWriteListStart(jw, NULL); + jsonWriteString(jw, NULL, row[0]); + jsonWriteString(jw, NULL, row[1]); + jsonWriteListEnd(jw); + } +jsonWriteListEnd(jw); +jsonWriteNumber(jw, "totalLiftOvers", totalRows); +jsonWriteNumber(jw, "itemsReturned", itemCount); + +apiFinishOutput(0, NULL, jw); +hDisconnectCentral(&conn); +} + +/**** SHOULD BE IN LIBRARY - code from hgConvert.c ******/ +static char *skipWord(char *fw) +/* skips over current word to start of next. + * Error for this not to exist. */ +{ +char *s; +s = skipToSpaces(fw); +if (s == NULL) + errAbort("Expecting two words in .ra file line %s\n", fw); +s = skipLeadingSpaces(s); +if (s == NULL) + errAbort("Expecting two words in .ra file line %s\n", fw); +return s; +} + +static struct chain *chainLoadIntersecting(char *fileName, + char *chrom, int start, int end) +/* Load the chains that intersect given region. */ +{ +struct lineFile *lf = netLineFileOpen(fileName); +char *line; +int chromNameSize = strlen(chrom); +struct chain *chainList = NULL, *chain; +#ifdef SOON /* Put in if we index. */ +boolean gotChrom = FALSE; +#endif /* SOON */ +int chainCount = 0; + +while (lineFileNextReal(lf, &line)) + { + if (startsWith("chain", line) && isspace(line[5])) + { + ++chainCount; + line = skipWord(line); /* Skip over 'chain' */ + line = skipWord(line); /* Skip over chain score */ + if (startsWith(chrom, line) && isspace(line[chromNameSize])) + { +#ifdef SOON /* Put in if we index. */ + gotChrom = TRUE; +#endif /* SOON */ + lineFileReuse(lf); + chain = chainReadChainLine(lf); + if (rangeIntersection(chain->tStart, chain->tEnd, start, end) > 0) + { + chainReadBlocks(lf, chain); + slAddHead(&chainList, chain); + } + else + chainFree(&chain); + } +#ifdef SOON /* Put in if we index. */ + else if (gotChrom) + break; /* We assume file is sorted by chromosome, so we're done. */ +#endif /* SOON */ + } + } +lineFileClose(&lf); +slReverse(&chainList); +return chainList; +} + +static struct chain *chainLoadAndTrimIntersecting(char *fileName, + char *chrom, int start, int end) +/* Load the chains that intersect given region, and trim them + * to fit region. */ +{ +struct chain *rawList, *chainList = NULL, *chain, *next; +rawList = chainLoadIntersecting(fileName, chrom, start, end); +for (chain = rawList; chain != NULL; chain = next) + { + struct chain *subChain, *chainToFree; + next = chain->next; + chainSubsetOnT(chain, start, end, &subChain, &chainToFree); + if (subChain != NULL) + slAddHead(&chainList, subChain); + if (chainToFree != NULL) + chainFree(&chain); + } +slSort(&chainList, chainCmpScore); +return chainList; +} +/**** SHOULD BE IN LIBRARY - code from hgConvert.c ******/ + +void apiLiftOver(char *words[MAX_PATH_INFO]) +/* 'liftOver' function words[1] is the subCommand */ +{ +char *extraArgs = verifyLegalArgs(argLiftOver); +if (extraArgs) + apiErrAbort(err400, err400Msg, "extraneous arguments found for function /liftOver'%s'", extraArgs); + +if (sameWordOk("listExisting", words[1])) + { + listExisting(); + return; + } + +char *fromGenome = cgiOptionalString(argFromGenome); +char *toGenome = cgiOptionalString(argToGenome); +char *chrom = cgiOptionalString(argChrom); +char *start = cgiOptionalString(argStart); +char *end = cgiOptionalString(argEnd); + +if (isEmpty(fromGenome) || isEmpty(toGenome) || isEmpty(chrom) || isEmpty(start) || isEmpty(end)) + apiErrAbort(err400, err400Msg, "must have all arguments: %s, %s, %s, %s, %s for endpoint '/liftOver", argFromGenome, argToGenome, argChrom, argStart, argEnd); + +unsigned uStart = 0; +unsigned uEnd = 0; +uStart = sqlUnsigned(start); +uEnd = sqlUnsigned(end); +if (uEnd < uStart) + apiErrAbort(err400, err400Msg, "given start coordinate %u is greater than given end coordinate", uStart, uEnd); + +struct dbDb *fromDb = hDbDb(fromGenome); +if (fromDb == NULL) + { + fromDb = genarkLiftOverDbs(fromGenome); + } +if (fromDb == NULL) + apiErrAbort(err400, err400Msg, "can not find 'fromGenome=%s' for endpoint '/liftOver", fromGenome); +struct dbDb *toDb = hDbDb(toGenome); +if (toDb == NULL) + { + toDb = genarkLiftOverDbs(toGenome); + } +if (toDb == NULL) + apiErrAbort(err400, err400Msg, "can not find 'toGenome=%s' for endpoint '/liftOver", toGenome); + +char *fileName = liftOverChainFile(fromDb->name, toDb->name); +if (isEmpty(fileName)) + apiErrAbort(err400, err400Msg, "Unable to find a chain file from %s to %s - please contact support", fromGenome, toGenome); +fileName = hReplaceGbdbMustDownload(fileName); +char fromPos[4096]; +safef(fromPos, sizeof(fromPos), "%s:%u-%u", chrom, uStart, uEnd); +char *nChrom; +int nStart, nEnd; +if (!hgParseChromRange(fromDb->name, fromPos, &nChrom, &nStart, &nEnd)) + apiErrAbort(err400, err400Msg, "position %s is not in chrom:start-end format", fromPos); +int origSize = nEnd - nStart; +struct chain *chainList = chainLoadAndTrimIntersecting(fileName, nChrom, nStart, nEnd); +if (chainList == NULL) + apiErrAbort(err400, err400Msg, "Sorry, this position %s is not found in the %s assembly", fromPos, toGenome); +chainListOut(fromGenome, toGenome, origSize, fromPos, chainList); +}