1bfad89c514f15aef219d6bfdcca6f9f754c3ac8
hiram
  Fri Mar 29 15:13:56 2019 -0700
now returning DNA sequence from assembly or track hub refs #18869

diff --git src/hg/hubApi/getData.c src/hg/hubApi/getData.c
index 2dd587e..1c90e7e 100644
--- src/hg/hubApi/getData.c
+++ src/hg/hubApi/getData.c
@@ -199,58 +199,58 @@
 	wigDataOutput(jw, bwf, chrom, start, end);
 }
 
 static void getHubTrackData(char *hubUrl)
 /* return data from a hub track, optionally just one chrom data,
  *  optionally just one section of that chrom data
  */
 {
 char *genome = cgiOptionalString("genome");
 char *track = cgiOptionalString("track");
 char *chrom = cgiOptionalString("chrom");
 char *start = cgiOptionalString("start");
 char *end = cgiOptionalString("end");
 
 if (isEmpty(genome))
-    apiErrAbort("missing genome=<name> for endpoint '/getdata/track'  given hubUrl='%s'", hubUrl);
+    apiErrAbort("missing genome=<name> for endpoint '/getData/track'  given hubUrl='%s'", hubUrl);
 if (isEmpty(track))
-    apiErrAbort("missing track=<name> for endpoint '/getdata/track'  given hubUrl='%s'", hubUrl);
+    apiErrAbort("missing track=<name> for endpoint '/getData/track'  given hubUrl='%s'", hubUrl);
 
 struct trackHub *hub = errCatchTrackHubOpen(hubUrl);
 struct trackHubGenome *hubGenome = NULL;
 for (hubGenome = hub->genomeList; hubGenome; hubGenome = hubGenome->next)
     {
     if (sameString(genome, hubGenome->name))
 	break;
     }
 if (NULL == hubGenome)
-    apiErrAbort("failed to find specified genome=%s for endpoint '/getdata/track'  given hubUrl '%s'", genome, hubUrl);
+    apiErrAbort("failed to find specified genome=%s for endpoint '/getData/track'  given hubUrl '%s'", genome, hubUrl);
 
 struct trackDb *tdb = obtainTdb(hubGenome, NULL);
 
 if (NULL == tdb)
-    apiErrAbort("failed to find a track hub definition in genome=%s for endpoint '/getdata/track'  given hubUrl='%s'", genome, hubUrl);
+    apiErrAbort("failed to find a track hub definition in genome=%s for endpoint '/getData/track'  given hubUrl='%s'", genome, hubUrl);
 
 struct trackDb *thisTrack = findTrackDb(track, tdb);
 
 if (NULL == thisTrack)
-    apiErrAbort("failed to find specified track=%s in genome=%s for endpoint '/getdata/track'  given hubUrl='%s'", track, genome, hubUrl);
+    apiErrAbort("failed to find specified track=%s in genome=%s for endpoint '/getData/track'  given hubUrl='%s'", track, genome, hubUrl);
 
 char *bigDataUrl = trackDbSetting(thisTrack, "bigDataUrl");
 struct bbiFile *bbi = bigFileOpen(thisTrack->type, bigDataUrl);
 if (NULL == bbi)
-    apiErrAbort("track type %s management not implemented yet TBD track=%s in genome=%s for endpoint '/getdata/track'  given hubUrl='%s'", track, genome, hubUrl);
+    apiErrAbort("track type %s management not implemented yet TBD track=%s in genome=%s for endpoint '/getData/track'  given hubUrl='%s'", track, genome, hubUrl);
 
 struct jsonWrite *jw = apiStartOutput();
 jsonWriteString(jw, "hubUrl", hubUrl);
 jsonWriteString(jw, "genome", genome);
 // jsonWriteString(jw, "track", track);
 unsigned chromSize = 0;
 struct bbiChromInfo *chromList = NULL;
 if (isNotEmpty(chrom))
     {
 //    jsonWriteString(jw, "chrom", chrom);
     chromSize = bbiChromSize(bbi, chrom);
     if (0 == chromSize)
 	apiErrAbort("can not find specified chrom=%s in bigBed file URL %s", chrom, bigDataUrl);
     jsonWriteNumber(jw, "chromSize", (long long)chromSize);
     }
@@ -375,31 +375,31 @@
 if (startsWith("big", thisTrack->type))
     {
     if (bigDataUrl)
 	bbi = bigFileOpen(thisTrack->type, bigDataUrl);
     else
 	{
 	char quickReturn[2048];
         sqlSafef(query, sizeof(query), "select fileName from %s", sqlTable);
         if (sqlQuickQuery(conn, query, quickReturn, sizeof(quickReturn)))
 	    {
 	    bigDataUrl = cloneString(quickReturn);
 	    bbi = bigFileOpen(thisTrack->type, bigDataUrl);
 	    }
 	}
     if (NULL == bbi)
-	apiErrAbort("failed to find bigDataUrl=%s for track=%s in database=%s for endpoint '/getdata/track'", bigDataUrl, track, db);
+	apiErrAbort("failed to find bigDataUrl=%s for track=%s in database=%s for endpoint '/getData/track'", bigDataUrl, track, db);
     if (isNotEmpty(chrom))
 	{
 	jsonWriteString(jw, "chrom", chrom);
 	chromSize = bbiChromSize(bbi, chrom);
 	if (0 == chromSize)
 	    apiErrAbort("can not find specified chrom=%s in bigWig file URL %s", chrom, bigDataUrl);
 	if (uEnd < 1)
 	    uEnd = chromSize;
 	jsonWriteNumber(jw, "chromSize", (long long)chromSize);
 	}
 else
 	{
 	chromList = bbiChromList(bbi);
 	jsonWriteNumber(jw, "chromCount", (long long)slCount(chromList));
 	}
@@ -433,76 +433,152 @@
 else if (startsWith("bigWig", thisTrack->type))
     {
     jsonWriteObjectStart(jw, track);
     wigData(jw, bbi, chrom, uStart, uEnd);
     jsonWriteObjectEnd(jw);
     bbiFileClose(&bbi);
     }
 else
     tableDataOutput(db, thisTrack, conn, jw, track, chrom, uStart, uEnd);
 
 jsonWriteObjectEnd(jw);	/* closing the overall global object */
 fputs(jw->dy->string,stdout);
 hFreeConn(&conn);
 }
 
-static void getSequenceData()
+static void getSequenceData(char *db, char *hubUrl)
 /* return DNA sequence, given at least a db=name and chrom=chr,
-   optionally start and end  */
+   optionally start and end, might be a track hub for UCSC database  */
 {
-char *db = cgiOptionalString("db");
 char *chrom = cgiOptionalString("chrom");
 char *start = cgiOptionalString("start");
 char *end = cgiOptionalString("end");
 
-if (isEmpty(db))
-    apiErrAbort("missing URL db=<ucscDb> name for endpoint '/getData/sequence");
 if (isEmpty(chrom))
-    apiErrAbort("missing URL chrom=<name> for endpoint '/getData/sequence?db=%s", db);
+    apiErrAbort("missing URL chrom=<name> for endpoint '/getData/sequence?db=%s'", db);
 if (chromSeqFileExists(db, chrom))
     {
     struct chromInfo *ci = hGetChromInfo(db, chrom);
     struct dnaSeq *seq = NULL;
     if (isEmpty(start) || isEmpty(end))
 	seq = hChromSeqMixed(db, chrom, 0, 0);
     else
 	seq = hChromSeqMixed(db, chrom, sqlSigned(start), sqlSigned(end));
     if (NULL == seq)
-        apiErrAbort("can not find sequence for chrom=%s for endpoint '/getData/sequence?db=%s&chrom=%s", chrom, db, chrom);
+        apiErrAbort("can not find sequence for chrom=%s for endpoint '/getData/sequence?db=%s&chrom=%s'", chrom, db, chrom);
     struct jsonWrite *jw = apiStartOutput();
+    if (isNotEmpty(hubUrl))
+	jsonWriteString(jw, "hubUrl", hubUrl);
     jsonWriteString(jw, "db", db);
     jsonWriteString(jw, "chrom", chrom);
     if (isEmpty(start) || isEmpty(end))
 	{
         jsonWriteNumber(jw, "start", (long long)0);
         jsonWriteNumber(jw, "end", (long long)ci->size);
 	}
     else
 	{
         jsonWriteNumber(jw, "start", (long long)sqlSigned(start));
         jsonWriteNumber(jw, "end", (long long)sqlSigned(end));
 	}
     jsonWriteString(jw, "dna", seq->dna);
     jsonWriteObjectEnd(jw);
     fputs(jw->dy->string,stdout);
     freeDnaSeq(&seq);
     }
 else
     apiErrAbort("can not find specified chrom=%s in sequence for endpoint '/getData/sequence?db=%s&chrom=%s", chrom, db, chrom);
+}	/*	static void getSequenceData(char *db, char *hubUrl)	*/
+
+static void getHubSequenceData(char *hubUrl)
+/* return DNA sequence, given at least a genome=name and chrom=chr,
+   optionally start and end  */
+{
+char *genome = cgiOptionalString("genome");
+char *chrom = cgiOptionalString("chrom");
+char *start = cgiOptionalString("start");
+char *end = cgiOptionalString("end");
+
+if (isEmpty(genome))
+    apiErrAbort("missing genome=<name> for endpoint '/getData/sequence'  given hubUrl='%s'", hubUrl);
+if (isEmpty(chrom))
+    apiErrAbort("missing chrom=<name> for endpoint '/getData/sequence?genome=%s' given hubUrl='%s'", genome, hubUrl);
+
+struct trackHub *hub = errCatchTrackHubOpen(hubUrl);
+struct trackHubGenome *hubGenome = NULL;
+for (hubGenome = hub->genomeList; hubGenome; hubGenome = hubGenome->next)
+    {
+    if (sameString(genome, hubGenome->name))
+	break;
+    }
+if (NULL == hubGenome)
+    apiErrAbort("failed to find specified genome=%s for endpoint '/getData/sequence'  given hubUrl '%s'", genome, hubUrl);
+
+/* might be a UCSC database track hub, where hubGenome=name is the database */
+if (isEmpty(hubGenome->twoBitPath))
+    {
+    getSequenceData(hubGenome->name, hubUrl);
+    return;
+    }
+
+/* this MaybeChromInfo will open the twoBit file, if not already done */
+struct chromInfo *ci = trackHubMaybeChromInfo(hubGenome->name, chrom);
+if (NULL == ci)
+    apiErrAbort("can not find sequence for chrom=%s for endpoint '/getData/sequence?genome=%s&chrom=%s' given hubUrl='%s'", chrom, genome, chrom, hubUrl);
+
+struct jsonWrite *jw = apiStartOutput();
+jsonWriteString(jw, "hubUrl", hubUrl);
+jsonWriteString(jw, "genome", genome);
+jsonWriteString(jw, "chrom", chrom);
+int fragStart = 0;
+int fragEnd = 0;
+if (isNotEmpty(start) && isNotEmpty(end))
+    {
+    fragStart = sqlSigned(start);
+    fragEnd = sqlSigned(end);
+    jsonWriteNumber(jw, "start", (long long)fragStart);
+    jsonWriteNumber(jw, "end", (long long)fragEnd);
+    }
+else
+    {
+    jsonWriteNumber(jw, "start", (long long)0);
+    jsonWriteNumber(jw, "end", (long long)ci->size);
+    }
+struct dnaSeq *seq = twoBitReadSeqFrag(hubGenome->tbf, chrom, fragStart, fragEnd);
+if (NULL == seq)
+    {
+    if (fragEnd > fragStart)
+	apiErrAbort("can not find sequence for chrom=%s;start=%s;end=%s for endpoint '/getData/sequence?genome=%s&chrom=%s;start=%s;end=%s' give hubUrl='%s'", chrom, start, end, genome, chrom, start, end, hubUrl);
+    else
+	apiErrAbort("can not find sequence for chrom=%s for endpoint '/getData/sequence?genome=%s&chrom=%s' give hubUrl='%s'", chrom, genome, chrom, hubUrl);
+    }
+jsonWriteString(jw, "dna", seq->dna);
+jsonWriteObjectEnd(jw);	/* closing the overall global object */
+fputs(jw->dy->string,stdout);
 }
 
 void apiGetData(char *words[MAX_PATH_INFO])
 /* 'getData' function, words[1] is the subCommand */
 {
+char *hubUrl = cgiOptionalString("hubUrl");
 if (sameWord("track", words[1]))
     {
-    char *hubUrl = cgiOptionalString("hubUrl");
     if (isNotEmpty(hubUrl))
 	getHubTrackData(hubUrl);
     else
 	getTrackData();
     }
 else if (sameWord("sequence", words[1]))
-    getSequenceData();
+    {
+    if (isNotEmpty(hubUrl))
+	getHubSequenceData(hubUrl);
+    else
+	{
+	char *db = cgiOptionalString("db");
+	if (isEmpty(db))
+	    apiErrAbort("missing URL db=<ucscDb> name for endpoint '/getData/sequence");
+	getSequenceData(db, NULL);
+	}
+    }
 else
     apiErrAbort("do not recognize endpoint function: '/%s/%s'", words[0], words[1]);
 }