1bfad89c514f15aef219d6bfdcca6f9f754c3ac8 hiram Fri Mar 29 15:13:56 2019 -0700 now returning DNA sequence from assembly or track hub refs #18869 diff --git src/hg/hubApi/getData.c src/hg/hubApi/getData.c index 2dd587e..1c90e7e 100644 --- src/hg/hubApi/getData.c +++ src/hg/hubApi/getData.c @@ -199,58 +199,58 @@ wigDataOutput(jw, bwf, chrom, start, end); } static void getHubTrackData(char *hubUrl) /* return data from a hub track, optionally just one chrom data, * optionally just one section of that chrom data */ { char *genome = cgiOptionalString("genome"); char *track = cgiOptionalString("track"); char *chrom = cgiOptionalString("chrom"); char *start = cgiOptionalString("start"); char *end = cgiOptionalString("end"); if (isEmpty(genome)) - apiErrAbort("missing genome= for endpoint '/getdata/track' given hubUrl='%s'", hubUrl); + apiErrAbort("missing genome= for endpoint '/getData/track' given hubUrl='%s'", hubUrl); if (isEmpty(track)) - apiErrAbort("missing track= for endpoint '/getdata/track' given hubUrl='%s'", hubUrl); + apiErrAbort("missing track= for endpoint '/getData/track' given hubUrl='%s'", hubUrl); struct trackHub *hub = errCatchTrackHubOpen(hubUrl); struct trackHubGenome *hubGenome = NULL; for (hubGenome = hub->genomeList; hubGenome; hubGenome = hubGenome->next) { if (sameString(genome, hubGenome->name)) break; } if (NULL == hubGenome) - apiErrAbort("failed to find specified genome=%s for endpoint '/getdata/track' given hubUrl '%s'", genome, hubUrl); + apiErrAbort("failed to find specified genome=%s for endpoint '/getData/track' given hubUrl '%s'", genome, hubUrl); struct trackDb *tdb = obtainTdb(hubGenome, NULL); if (NULL == tdb) - apiErrAbort("failed to find a track hub definition in genome=%s for endpoint '/getdata/track' given hubUrl='%s'", genome, hubUrl); + apiErrAbort("failed to find a track hub definition in genome=%s for endpoint '/getData/track' given hubUrl='%s'", genome, hubUrl); struct trackDb *thisTrack = findTrackDb(track, tdb); if (NULL == thisTrack) - apiErrAbort("failed to find specified track=%s in genome=%s for endpoint '/getdata/track' given hubUrl='%s'", track, genome, hubUrl); + apiErrAbort("failed to find specified track=%s in genome=%s for endpoint '/getData/track' given hubUrl='%s'", track, genome, hubUrl); char *bigDataUrl = trackDbSetting(thisTrack, "bigDataUrl"); struct bbiFile *bbi = bigFileOpen(thisTrack->type, bigDataUrl); if (NULL == bbi) - apiErrAbort("track type %s management not implemented yet TBD track=%s in genome=%s for endpoint '/getdata/track' given hubUrl='%s'", track, genome, hubUrl); + apiErrAbort("track type %s management not implemented yet TBD track=%s in genome=%s for endpoint '/getData/track' given hubUrl='%s'", track, genome, hubUrl); struct jsonWrite *jw = apiStartOutput(); jsonWriteString(jw, "hubUrl", hubUrl); jsonWriteString(jw, "genome", genome); // jsonWriteString(jw, "track", track); unsigned chromSize = 0; struct bbiChromInfo *chromList = NULL; if (isNotEmpty(chrom)) { // jsonWriteString(jw, "chrom", chrom); chromSize = bbiChromSize(bbi, chrom); if (0 == chromSize) apiErrAbort("can not find specified chrom=%s in bigBed file URL %s", chrom, bigDataUrl); jsonWriteNumber(jw, "chromSize", (long long)chromSize); } @@ -375,31 +375,31 @@ if (startsWith("big", thisTrack->type)) { if (bigDataUrl) bbi = bigFileOpen(thisTrack->type, bigDataUrl); else { char quickReturn[2048]; sqlSafef(query, sizeof(query), "select fileName from %s", sqlTable); if (sqlQuickQuery(conn, query, quickReturn, sizeof(quickReturn))) { bigDataUrl = cloneString(quickReturn); bbi = bigFileOpen(thisTrack->type, bigDataUrl); } } if (NULL == bbi) - apiErrAbort("failed to find bigDataUrl=%s for track=%s in database=%s for endpoint '/getdata/track'", bigDataUrl, track, db); + apiErrAbort("failed to find bigDataUrl=%s for track=%s in database=%s for endpoint '/getData/track'", bigDataUrl, track, db); if (isNotEmpty(chrom)) { jsonWriteString(jw, "chrom", chrom); chromSize = bbiChromSize(bbi, chrom); if (0 == chromSize) apiErrAbort("can not find specified chrom=%s in bigWig file URL %s", chrom, bigDataUrl); if (uEnd < 1) uEnd = chromSize; jsonWriteNumber(jw, "chromSize", (long long)chromSize); } else { chromList = bbiChromList(bbi); jsonWriteNumber(jw, "chromCount", (long long)slCount(chromList)); } @@ -433,76 +433,152 @@ else if (startsWith("bigWig", thisTrack->type)) { jsonWriteObjectStart(jw, track); wigData(jw, bbi, chrom, uStart, uEnd); jsonWriteObjectEnd(jw); bbiFileClose(&bbi); } else tableDataOutput(db, thisTrack, conn, jw, track, chrom, uStart, uEnd); jsonWriteObjectEnd(jw); /* closing the overall global object */ fputs(jw->dy->string,stdout); hFreeConn(&conn); } -static void getSequenceData() +static void getSequenceData(char *db, char *hubUrl) /* return DNA sequence, given at least a db=name and chrom=chr, - optionally start and end */ + optionally start and end, might be a track hub for UCSC database */ { -char *db = cgiOptionalString("db"); char *chrom = cgiOptionalString("chrom"); char *start = cgiOptionalString("start"); char *end = cgiOptionalString("end"); -if (isEmpty(db)) - apiErrAbort("missing URL db= name for endpoint '/getData/sequence"); if (isEmpty(chrom)) - apiErrAbort("missing URL chrom= for endpoint '/getData/sequence?db=%s", db); + apiErrAbort("missing URL chrom= for endpoint '/getData/sequence?db=%s'", db); if (chromSeqFileExists(db, chrom)) { struct chromInfo *ci = hGetChromInfo(db, chrom); struct dnaSeq *seq = NULL; if (isEmpty(start) || isEmpty(end)) seq = hChromSeqMixed(db, chrom, 0, 0); else seq = hChromSeqMixed(db, chrom, sqlSigned(start), sqlSigned(end)); if (NULL == seq) - apiErrAbort("can not find sequence for chrom=%s for endpoint '/getData/sequence?db=%s&chrom=%s", chrom, db, chrom); + apiErrAbort("can not find sequence for chrom=%s for endpoint '/getData/sequence?db=%s&chrom=%s'", chrom, db, chrom); struct jsonWrite *jw = apiStartOutput(); + if (isNotEmpty(hubUrl)) + jsonWriteString(jw, "hubUrl", hubUrl); jsonWriteString(jw, "db", db); jsonWriteString(jw, "chrom", chrom); if (isEmpty(start) || isEmpty(end)) { jsonWriteNumber(jw, "start", (long long)0); jsonWriteNumber(jw, "end", (long long)ci->size); } else { jsonWriteNumber(jw, "start", (long long)sqlSigned(start)); jsonWriteNumber(jw, "end", (long long)sqlSigned(end)); } jsonWriteString(jw, "dna", seq->dna); jsonWriteObjectEnd(jw); fputs(jw->dy->string,stdout); freeDnaSeq(&seq); } else apiErrAbort("can not find specified chrom=%s in sequence for endpoint '/getData/sequence?db=%s&chrom=%s", chrom, db, chrom); +} /* static void getSequenceData(char *db, char *hubUrl) */ + +static void getHubSequenceData(char *hubUrl) +/* return DNA sequence, given at least a genome=name and chrom=chr, + optionally start and end */ +{ +char *genome = cgiOptionalString("genome"); +char *chrom = cgiOptionalString("chrom"); +char *start = cgiOptionalString("start"); +char *end = cgiOptionalString("end"); + +if (isEmpty(genome)) + apiErrAbort("missing genome= for endpoint '/getData/sequence' given hubUrl='%s'", hubUrl); +if (isEmpty(chrom)) + apiErrAbort("missing chrom= for endpoint '/getData/sequence?genome=%s' given hubUrl='%s'", genome, hubUrl); + +struct trackHub *hub = errCatchTrackHubOpen(hubUrl); +struct trackHubGenome *hubGenome = NULL; +for (hubGenome = hub->genomeList; hubGenome; hubGenome = hubGenome->next) + { + if (sameString(genome, hubGenome->name)) + break; + } +if (NULL == hubGenome) + apiErrAbort("failed to find specified genome=%s for endpoint '/getData/sequence' given hubUrl '%s'", genome, hubUrl); + +/* might be a UCSC database track hub, where hubGenome=name is the database */ +if (isEmpty(hubGenome->twoBitPath)) + { + getSequenceData(hubGenome->name, hubUrl); + return; + } + +/* this MaybeChromInfo will open the twoBit file, if not already done */ +struct chromInfo *ci = trackHubMaybeChromInfo(hubGenome->name, chrom); +if (NULL == ci) + apiErrAbort("can not find sequence for chrom=%s for endpoint '/getData/sequence?genome=%s&chrom=%s' given hubUrl='%s'", chrom, genome, chrom, hubUrl); + +struct jsonWrite *jw = apiStartOutput(); +jsonWriteString(jw, "hubUrl", hubUrl); +jsonWriteString(jw, "genome", genome); +jsonWriteString(jw, "chrom", chrom); +int fragStart = 0; +int fragEnd = 0; +if (isNotEmpty(start) && isNotEmpty(end)) + { + fragStart = sqlSigned(start); + fragEnd = sqlSigned(end); + jsonWriteNumber(jw, "start", (long long)fragStart); + jsonWriteNumber(jw, "end", (long long)fragEnd); + } +else + { + jsonWriteNumber(jw, "start", (long long)0); + jsonWriteNumber(jw, "end", (long long)ci->size); + } +struct dnaSeq *seq = twoBitReadSeqFrag(hubGenome->tbf, chrom, fragStart, fragEnd); +if (NULL == seq) + { + if (fragEnd > fragStart) + apiErrAbort("can not find sequence for chrom=%s;start=%s;end=%s for endpoint '/getData/sequence?genome=%s&chrom=%s;start=%s;end=%s' give hubUrl='%s'", chrom, start, end, genome, chrom, start, end, hubUrl); + else + apiErrAbort("can not find sequence for chrom=%s for endpoint '/getData/sequence?genome=%s&chrom=%s' give hubUrl='%s'", chrom, genome, chrom, hubUrl); + } +jsonWriteString(jw, "dna", seq->dna); +jsonWriteObjectEnd(jw); /* closing the overall global object */ +fputs(jw->dy->string,stdout); } void apiGetData(char *words[MAX_PATH_INFO]) /* 'getData' function, words[1] is the subCommand */ { +char *hubUrl = cgiOptionalString("hubUrl"); if (sameWord("track", words[1])) { - char *hubUrl = cgiOptionalString("hubUrl"); if (isNotEmpty(hubUrl)) getHubTrackData(hubUrl); else getTrackData(); } else if (sameWord("sequence", words[1])) - getSequenceData(); + { + if (isNotEmpty(hubUrl)) + getHubSequenceData(hubUrl); + else + { + char *db = cgiOptionalString("db"); + if (isEmpty(db)) + apiErrAbort("missing URL db= name for endpoint '/getData/sequence"); + getSequenceData(db, NULL); + } + } else apiErrAbort("do not recognize endpoint function: '/%s/%s'", words[0], words[1]); }