ee5f1773270f1662bbea9a28017cf88080e40793 hiram Fri Jun 14 13:28:36 2024 -0700 add new option to getSeq -revComp to reverse complement sequence output refs #26691 diff --git src/hg/hubApi/getData.c src/hg/hubApi/getData.c index 9b1a325..41177f2 100644 --- src/hg/hubApi/getData.c +++ src/hg/hubApi/getData.c @@ -784,30 +784,37 @@ else tableDataOutput(db, thisTrack, conn, jw, track, chrom, uStart, uEnd); } apiFinishOutput(0, NULL, jw); hFreeConn(&conn); } /* static void getTrackData() */ static void getSequenceData(char *db, char *hubUrl) /* return DNA sequence, given at least a genome=name and chrom=chr, optionally start and end, might be a track hub for UCSC database */ { char *chrom = chrOrAlias(db, hubUrl); char *start = cgiOptionalString("start"); char *end = cgiOptionalString("end"); +boolean revComp = FALSE; +char *revCompStr = cgiOptionalString("revComp"); +if (isNotEmpty(revCompStr)) + { + if (SETTING_IS_ON(revCompStr)) + revComp = TRUE; + } long timeStart = clock1000(); if (isEmpty(chrom)) apiErrAbort(err400, err400Msg, "missing URL chrom= for endpoint '/getData/sequence?genome=%s'", db); if (chromSeqFileExists(db, chrom)) { struct chromInfo *ci = hGetChromInfo(db, chrom); unsigned chromSize = ci->size; struct dnaSeq *seq = NULL; if (isEmpty(start) || isEmpty(end)) if (chromSize > MAX_DNA_LENGTH) apiErrAbort(err400, err400Msg, "DNA sequence request %u (size of %s) too large, limit: %u for endpoint '/getData/sequence?genome=%s;chrom=%s'", chromSize, chrom, MAX_DNA_LENGTH, db, chrom); else @@ -833,49 +840,61 @@ if (measureTiming) jsonWriteNumber(jw, "dnaFetchTimeMs", et); jsonWriteString(jw, "genome", db); jsonWriteString(jw, "chrom", chrom); if (isEmpty(start) || isEmpty(end)) { jsonWriteNumber(jw, "start", (long long)0); jsonWriteNumber(jw, "end", (long long)ci->size); } else { jsonWriteNumber(jw, "start", (long long)sqlSigned(start)); jsonWriteNumber(jw, "end", (long long)sqlSigned(end)); } timeStart = clock1000(); + if (revComp) + { + reverseComplement(seq->dna, seq->size); + jsonWriteBoolean(jw, "revComp", revComp); + } jsonWriteString(jw, "dna", seq->dna); endTime = clock1000(); et = endTime - timeStart; if (measureTiming) jsonWriteNumber(jw, "dnaJsonWriteTimeMs", et); apiFinishOutput(0, NULL, jw); freeDnaSeq(&seq); } else apiErrAbort(err400, err400Msg, "can not find specified chrom=%s in sequence for endpoint '/getData/sequence?genome=%s;chrom=%s", chrom, db, chrom); } /* static void getSequenceData(char *db, char *hubUrl) */ static void getHubSequenceData(char *hubUrl) /* return DNA sequence, given at least a genome=name and chrom=chr, optionally start and end */ { char *genome = cgiOptionalString("genome"); char *start = cgiOptionalString("start"); char *end = cgiOptionalString("end"); +boolean revComp = FALSE; +char *revCompStr = cgiOptionalString("revComp"); +if (isNotEmpty(revCompStr)) + { + if (SETTING_IS_ON(revCompStr)) + revComp = TRUE; + } if (isEmpty(genome)) apiErrAbort(err400, err400Msg, "missing genome= for endpoint '/getData/sequence' given hubUrl='%s'", hubUrl); struct trackHub *hub = errCatchTrackHubOpen(hubUrl); struct trackHubGenome *hubGenome = NULL; for (hubGenome = hub->genomeList; hubGenome; hubGenome = hubGenome->next) { if (sameString(genome, hubGenome->name)) break; } if (NULL == hubGenome) apiErrAbort(err400, err400Msg, "failed to find specified genome=%s for endpoint '/getData/sequence' given hubUrl '%s'", genome, hubUrl); hubAliasSetup(hubGenome); @@ -918,30 +937,35 @@ else { if (ci->size > MAX_DNA_LENGTH) apiErrAbort(err400, err400Msg, "DNA sequence request %d too large, limit: %u for endpoint '/getData/sequence?genome=%s;chrom=%s' given hubUrl='%s'", ci->size, MAX_DNA_LENGTH, genome, chrom, hubUrl); jsonWriteNumber(jw, "start", (long long)0); jsonWriteNumber(jw, "end", (long long)ci->size); } struct dnaSeq *seq = twoBitReadSeqFrag(hubGenome->tbf, chrom, fragStart, fragEnd); if (NULL == seq) { if (fragEnd > fragStart) apiErrAbort(err400, err400Msg, "can not find sequence for chrom=%s;start=%s;end=%s for endpoint '/getData/sequence?genome=%s;chrom=%s;start=%s;end=%s' give hubUrl='%s'", chrom, start, end, genome, chrom, start, end, hubUrl); else apiErrAbort(err400, err400Msg, "can not find sequence for chrom=%s for endpoint '/getData/sequence?genome=%s;chrom=%s' give hubUrl='%s'", chrom, genome, chrom, hubUrl); } +if (revComp) + { + reverseComplement(seq->dna, seq->size); + jsonWriteBoolean(jw, "revComp", revComp); + } jsonWriteString(jw, "dna", seq->dna); apiFinishOutput(0, NULL, jw); } void apiGetData(char *words[MAX_PATH_INFO]) /* 'getData' function, words[1] is the subCommand */ { char *hubUrl = cgiOptionalString("hubUrl"); if (sameWord("track", words[1])) { char *extraArgs = verifyLegalArgs(argGetDataTrack); if (extraArgs) apiErrAbort(err400, err400Msg, "extraneous arguments found for function /getData/track '%s'", extraArgs); if (isNotEmpty(hubUrl))