3a5ceb6cf9b3a4869e57ab621d1143fbd0573a5a braney Wed Nov 23 17:01:52 2016 -0800 implements blat results to custom track using bigPsl. Requires hg.conf variable useBlatBigPsl=on, otherwise old behavior exists diff --git src/hg/hgBlat/hgBlat.c src/hg/hgBlat/hgBlat.c index eea2f90..ff85069 100644 --- src/hg/hgBlat/hgBlat.c +++ src/hg/hgBlat/hgBlat.c @@ -12,30 +12,31 @@ #include "fa.h" #include "psl.h" #include "genoFind.h" #include "cheapcgi.h" #include "htmshell.h" #include "hdb.h" #include "hui.h" #include "cart.h" #include "dbDb.h" #include "blatServers.h" #include "web.h" #include "hash.h" #include "botDelay.h" #include "trashDir.h" #include "trackHub.h" +#include "hgConfig.h" struct cart *cart; /* The user's ui state. */ struct hash *oldVars = NULL; boolean orgChange = FALSE; boolean dbChange = FALSE; struct serverTable /* Information on a server. */ { char *db; /* Database name. */ char *genome; /* Genome name. */ boolean isTrans; /* Is tranlated to protein? */ char *host; /* Name of machine hosting server. */ char *port; /* Port that hosts server. */ @@ -250,31 +251,46 @@ { char url[1024]; safef(url, sizeof(url), "%s?position=%s:%d-%d&db=%s&ss=%s+%s&%s%s", browserUrl, psl->tName, psl->tStart + 1, psl->tEnd, database, pslName, faName, uiState, unhideTrack); /* htmlStart("Redirecting"); */ /* Odd it appears that we've already printed the Content-Typ:text/html line but I can't figure out where... */ htmStart(stdout, "Redirecting"); printf("<script>location.replace('%s');</script>", url); printf("<noscript>No javascript support:<br>Click <a href='%s'>here</a> for browser.</noscript>", url); htmlEnd(); } -void showAliPlaces(char *pslName, char *faName, char *database, +static void makeBigPsl(char *pslName, char *faName, char *db, char *outputBigBed) +/* Make a bigPsl with the blat results. */ +{ +struct tempName bigPslTn; +trashDirFile(&bigPslTn, "bigPsl", "bp", ".bigPsl"); + +char cmdBuffer[4096]; +safef(cmdBuffer, sizeof(cmdBuffer), "loader/pslToBigPsl %s -fa=%s stdout | sort -k1,1 -k2,2n > %s", pslName, faName, bigPslTn.forCgi); +system(cmdBuffer); +safef(cmdBuffer, sizeof(cmdBuffer), "loader/bedToBigBed -extraIndex=name -tab -as=loader/bigPsl.as -type=bed12+13 %s http://hgdownload.cse.ucsc.edu/goldenPath/%s/bigZips/%s.chrom.sizes %s", + bigPslTn.forCgi, db, db, outputBigBed); +system(cmdBuffer); +unlink(bigPslTn.forCgi); +} + +void showAliPlaces(char *pslName, char *faName, char *customText, char *database, enum gfType qType, enum gfType tType, char *organism, boolean feelingLucky) /* Show all the places that align. */ { struct lineFile *lf = pslFileOpen(pslName); struct psl *pslList = NULL, *psl; char *browserUrl = hgTracksName(); char *hgcUrl = hgcName(); char uiState[64]; char *vis; char unhideTrack[64]; char *sort = cartUsualString(cart, "sort", sortList[0]); char *output = cartUsualString(cart, "output", outputList[0]); boolean pslOut = startsWith("psl", output); boolean isStraightNuc = (qType == gftRna || qType == gftDna); @@ -324,55 +340,60 @@ } else { slSort(&pslList, pslCmpQueryScore); } if(feelingLucky) { /* If we found something jump browser to there. */ if(slCount(pslList) > 0) printLuckyRedirect(browserUrl, pslList, database, pslName, faName, uiState, unhideTrack); /* Otherwise call ourselves again not feeling lucky to print empty results. */ else { cartWebStart(cart, database, "%s BLAT Results", trackHubSkipHubName(organism)); - showAliPlaces(pslName, faName, database, qType, tType, organism, FALSE); + showAliPlaces(pslName, faName, customText, database, qType, tType, organism, FALSE); cartWebEnd(); } } else if (pslOut) { printf("<TT><PRE>"); if (!sameString(output, "psl no header")) pslxWriteHead(stdout, qType, tType); for (psl = pslList; psl != NULL; psl = psl->next) pslTabOut(psl, stdout); printf("</PRE></TT>"); } else { printf("<H2>BLAT Search Results</H2>"); char* posStr = cartOptionalString(cart, "position"); if (posStr != NULL) printf("<P>Go back to <A HREF=\"%s\">%s</A> on the Genome Browser.</P>\n", browserUrl, posStr); printf("<DIV STYLE=\"display:block; float:left\"><TT><PRE>"); printf(" ACTIONS QUERY SCORE START END QSIZE IDENTITY CHRO STRAND START END SPAN\n"); printf("---------------------------------------------------------------------------------------------------\n"); for (psl = pslList; psl != NULL; psl = psl->next) { + if (customText) + printf("<A HREF=\"%s?position=%s:%d-%d&db=%s&hgt.customText=%s&%s%s\">", + browserUrl, psl->tName, psl->tStart + 1, psl->tEnd, database, + customText, uiState, unhideTrack); + else printf("<A HREF=\"%s?position=%s:%d-%d&db=%s&ss=%s+%s&%s%s\">", browserUrl, psl->tName, psl->tStart + 1, psl->tEnd, database, pslName, faName, uiState, unhideTrack); printf("browser</A> "); printf("<A HREF=\"%s?o=%d&g=htcUserAli&i=%s+%s+%s&c=%s&l=%d&r=%d&db=%s&%s\">", hgcUrl, psl->tStart, pslName, cgiEncode(faName), psl->qName, psl->tName, psl->tStart, psl->tEnd, database, uiState); printf("details</A> "); printf("%-14s %5d %5d %5d %5d %5.1f%% %4s %2s %9d %9d %6d\n", psl->qName, pslScore(psl), psl->qStart+1, psl->qEnd, psl->qSize, 100.0 - pslCalcMilliBad(psl, TRUE) * 0.1, skipChr(psl->tName), psl->strand, psl->tStart+1, psl->tEnd, psl->tEnd - psl->tStart); } printf("</PRE></TT>\n"); @@ -479,33 +500,63 @@ int i, size = seq->size, count = 0; char *s = seq->dna; for (i=0; i<size; ++i) if (s[i] != unknown) ++count; return count; } void uToT(struct dnaSeq *seqList) /* Convert any u's in sequence to t's. */ { struct dnaSeq *seq; for (seq = seqList; seq != NULL; seq = seq->next) subChar(seq->dna, 'u', 't'); } -void blatSeq(char *userSeq, char *organism) +static char *getUnusedName(char *database, struct cart *cart) +// Find a track name that isn't currently a custom track. +{ +struct slName *browserLines = NULL; +struct customTrack *ctList = customTracksParseCart(database, cart, &browserLines, NULL); +struct customTrack *ct; +int count = 0; +char buffer[4096]; + +for(;;count++) + { + for (ct=ctList; + ct != NULL; + ct=ct->next) + { + safef(buffer, sizeof buffer, "ct_BlatResults%d", count); + if (startsWith(buffer, ct->tdb->track)) + // Found a track with this name. + break; + } + + if (ct == NULL) + break; + } +safef(buffer, sizeof buffer, "Blat Results (%d)", count); + +return cloneString(buffer); +} + +void blatSeq(char *userSeq, char *organism, char *database) /* Blat sequence user pasted in. */ { +boolean useBigPsl = cfgOptionBooleanDefault("useBlatBigPsl", FALSE); FILE *f; struct dnaSeq *seqList = NULL, *seq; struct tempName pslTn, faTn; int maxSingleSize, maxTotalSize, maxSeqCount; int minSingleSize = minMatchShown; char *genome, *db; char *type = cgiString("type"); char *seqLetters = cloneString(userSeq); struct serverTable *serve; int conn; int oneSize, totalSize = 0, seqCount = 0; boolean isTx = FALSE; boolean isTxTx = FALSE; boolean txTxBoth = FALSE; struct gfOutput *gvo; @@ -680,32 +731,65 @@ else { gfAlignTrans(&conn, serve->nibDir, seq, 5, tFileCache, gvo); } } else { gfAlignStrand(&conn, serve->nibDir, seq, FALSE, minMatchShown, tFileCache, gvo); reverseComplement(seq->dna, seq->size); conn = gfConnect(serve->host, serve->port); gfAlignStrand(&conn, serve->nibDir, seq, TRUE, minMatchShown, tFileCache, gvo); } gfOutputQuery(gvo, f); } carefulClose(&f); -showAliPlaces(pslTn.forCgi, faTn.forCgi, serve->db, qType, tType, + +if (useBigPsl) + { + // make bigPsl + struct tempName bigBedTn; + trashDirFile(&bigBedTn, "bigPsl", "bp", ".bb"); + makeBigPsl(pslTn.forCgi, faTn.forCgi, serve->db, bigBedTn.forCgi); + struct tempName customTextTn; + trashDirFile(&customTextTn, "bigPsl", "ct", ".txt"); + FILE *fp = fopen(customTextTn.forCgi, "w"); + char* host = getenv("HTTP_HOST"); + char* reqUrl = getenv("REQUEST_URI"); + // remove everything after / in URL + char *e = strrchr(reqUrl, '/'); + if (e) *e = 0; + + char *trackName = getUnusedName(database, cart); + char *customTextTemplate = "track type=bigPsl visibility=pack showAll=on htmlUrl=http://%s/goldenPath/help/hgUserPsl.html %s bigDataUrl=http://%s/%s/%s name=\"%s\"\n"; + char *extraForMismatch = "showDiffBasesAllScales=. baseColorUseSequence=lfExtra baseColorDefault=diffBases"; + + if (qIsProt) + extraForMismatch = ""; + fprintf(fp, customTextTemplate, host, extraForMismatch, host, reqUrl, bigBedTn.forCgi, trackName); + fclose(fp); + + char buffer[4096]; + safef(buffer, sizeof buffer, "http://%s/%s/%s", host, reqUrl, customTextTn.forCgi); + showAliPlaces(pslTn.forCgi, faTn.forCgi, buffer, serve->db, qType, tType, + organism, feelingLucky); + } +else + { + showAliPlaces(pslTn.forCgi, faTn.forCgi, NULL, serve->db, qType, tType, organism, feelingLucky); + } if(!feelingLucky) cartWebEnd(); gfFileCacheFree(&tFileCache); } void askForSeq(char *organism, char *db) /* Put up a little form that asks for sequence. * Call self.... */ { /* ignore struct serverTable* return, but can error out if not found */ findServer(db, FALSE); /* JavaScript to update form when org changes */ char *onChangeText = "onchange=\"" "document.mainForm.changeInfo.value='orgChange';" @@ -854,31 +938,31 @@ { userSeq = cartOptionalString(cart, "seqFile"); } if (isEmpty(userSeq) || orgChange) { cartWebStart(theCart, db, "%s BLAT Search", trackHubSkipHubName(organism)); if (differentString(oldDb, db)) printf("<HR><P><EM><B>Note:</B> BLAT search is not available for %s %s; " "defaulting to %s %s</EM></P><HR>\n", hGenome(oldDb), hFreezeDate(oldDb), organism, hFreezeDate(db)); askForSeq(organism,db); cartWebEnd(); } else { - blatSeq(skipLeadingSpaces(userSeq), organism); + blatSeq(skipLeadingSpaces(userSeq), organism, db); } } /* Null terminated list of CGI Variables we don't want to save * permanently. */ char *excludeVars[] = {"Submit", "submit", "Clear", "Lucky", "type", "userSeq", "seqFile", "showPage", "changeInfo", NULL}; int main(int argc, char *argv[]) /* Process command line. */ { long enteredMainTime = clock1000(); oldVars = hashNew(10); cgiSpoof(&argc, argv); setUdcCacheDir();