88485cfa7f13affa28ec5765fe9b2db90cce42b6 markd Tue Dec 8 21:02:15 2020 -0800 hgPcr working diff --git src/jkOwnLib/gfBlatLib.c src/jkOwnLib/gfBlatLib.c index 2a6010d..546aa9c 100644 --- src/jkOwnLib/gfBlatLib.c +++ src/jkOwnLib/gfBlatLib.c @@ -93,67 +93,67 @@ if (lDiff < 0) diff = -1; else if (lDiff > 0) diff = 1; else diff = 0; #ifdef SOLARIS_WORKAROUND_COMPILER_BUG_BUT_FAILS_IN_64_BIT diff = (unsigned long)(a->t3) - (unsigned long)(b->t3); /* Casts needed for Solaris. Thanks Darren Platt! */ #endif /* SOLARIS_WORKAROUND_COMPILER_BUG_BUT_FAILS_IN_64_BIT */ } if (diff == 0) diff = a->tStart - b->tStart; return diff; } -static void startSeqQuery(struct gfConnection *conn, bioSeq *seq, char *type, char *genome, char *genomeDataDir) +static void startSeqQuery(struct gfConnection *conn, bioSeq *seq, char *type) /* Send a query that involves some sequence. */ { char buf[1024]; // room for error message if we need it. safef(buf, sizeof(buf), "%s%s", gfSignature(), type); -if (genomeDataDir != NULL) - safefcat(buf, sizeof(buf), " %s %s", genome, genomeDataDir); +if (conn->genomeDataDir != NULL) + safefcat(buf, sizeof(buf), " %s %s", conn->genome, conn->genomeDataDir); safefcat(buf, sizeof(buf), " %d", seq->size); mustWriteFd(conn->fd, buf, strlen(buf)); if (read(conn->fd, buf, 1) < 0) errAbort("startSeqQuery: read failed: %s", strerror(errno)); if (buf[0] != 'Y') { // try to get read of message, might be an a useful error int n = read(conn->fd, buf+1, sizeof(buf)-2); if (n >= 0) buf[n+1] = '\0'; errAbort("Expecting 'Y' from server, got %s", buf); } mustWriteFd(conn->fd, seq->dna, seq->size); } static void gfServerWarn(bioSeq *seq, char *warning) /* Write out warning. */ { warn("couldn't process %s: %s", seq->name, warning); } -static struct gfRange *gfQuerySeq(struct gfConnection *conn, struct dnaSeq *seq, char *genome, char *genomeDataDir) +static struct gfRange *gfQuerySeq(struct gfConnection *conn, struct dnaSeq *seq) /* Ask server for places sequence hits. */ { struct gfRange *rangeList = NULL, *range; char buf[256], *row[6]; int rowSize; gfBeginRequest(conn); -startSeqQuery(conn, seq, "query", genome, genomeDataDir); +startSeqQuery(conn, seq, "query"); /* Read results line by line and save in list, and return. */ for (;;) { netRecieveString(conn->fd, buf); if (sameString(buf, "end")) { break; } else if (startsWith("Error:", buf)) { gfServerWarn(seq, buf); break; } else @@ -207,48 +207,47 @@ if ((q = nextWord(&line)) == NULL) break; if ((t = nextWord(&line)) == NULL) internalErr(); lmAllocVar(lm, hit); hit->qStart = sqlUnsigned(q); hit->tStart = sqlUnsigned(t); slAddHead(&hitList, hit); } freez(&s); slReverse(&hitList); return hitList; } static void gfQuerySeqTrans(struct gfConnection *conn, aaSeq *seq, struct gfClump *clumps[2][3], - struct lm *lm, struct gfSeqSource **retSsList, int *retTileSize, - char *genome, char *genomeDataDir) + struct lm *lm, struct gfSeqSource **retSsList, int *retTileSize) /* Query server for clumps where aa sequence hits translated index. */ { int frame, isRc, rowSize; struct gfClump *clump; int tileSize = 0; char *line; char buf[256], *row[12]; struct gfSeqSource *ssList = NULL, *ss; for (isRc = 0; isRc <= 1; ++isRc) for (frame = 0; frame<3; ++frame) clumps[isRc][frame] = NULL; /* Send sequence to server. */ gfBeginRequest(conn); -startSeqQuery(conn, seq, "protQuery", genome, genomeDataDir); +startSeqQuery(conn, seq, "protQuery"); line = netRecieveString(conn->fd, buf); if (!startsWith("Error:", line)) { tileSize = findTileSize(line); /* Read results line by line and save in memory. */ for (;;) { /* Read and parse first line that describes clump overall. */ netRecieveString(conn->fd, buf); if (sameString(buf, "end")) { break; } else if (startsWith("Error:", buf)) @@ -280,50 +279,49 @@ for (isRc = 0; isRc <= 1; ++isRc) for (frame = 0; frame<3; ++frame) slReverse(&clumps[isRc][frame]); } else { gfServerWarn(seq, line); } gfEndRequest(conn); *retSsList = ssList; *retTileSize = tileSize; } static void gfQuerySeqTransTrans(struct gfConnection *conn, struct dnaSeq *seq, struct gfClump *clumps[2][3][3], - struct lm *lm, struct gfSeqSource **retSsList, int *retTileSize, - char *genome, char *genomeDataDir) + struct lm *lm, struct gfSeqSource **retSsList, int *retTileSize) /* Query server for clumps where translated DNA sequence hits translated * index. */ { int qFrame, tFrame, isRc, rowSize; struct gfClump *clump; int tileSize = 0; char *line; char buf[256], *row[12]; struct gfSeqSource *ssList = NULL, *ss; for (isRc = 0; isRc <= 1; ++isRc) for (qFrame = 0; qFrame<3; ++qFrame) for (tFrame = 0; tFrame<3; ++tFrame) clumps[isRc][qFrame][tFrame] = NULL; /* Send sequence to server. */ gfBeginRequest(conn); -startSeqQuery(conn, seq, "transQuery", genome, genomeDataDir); +startSeqQuery(conn, seq, "transQuery"); line = netRecieveString(conn->fd, buf); if (!startsWith("Error:", line)) { tileSize = findTileSize(line); /* Read results line by line and save in memory. */ for (;;) { /* Read and parse first line that describes clump overall. */ netRecieveString(conn->fd, buf); if (sameString(buf, "end")) { break; } else if (startsWith("Error:", buf)) @@ -552,42 +550,41 @@ static void getTargetName(char *tSpec, boolean includeFile, char *targetName) /* Put sequence name, optionally prefixed by file: in targetName. */ { if (includeFile) { char seqName[128]; char fileName[PATH_LEN]; gfiGetSeqName(tSpec, seqName, fileName); safef(targetName, PATH_LEN, "%s:%s", fileName, seqName); } else gfiGetSeqName(tSpec, targetName, NULL); } void gfAlignStrand(struct gfConnection *conn, char *tSeqDir, struct dnaSeq *seq, - boolean isRc, int minMatch, struct hash *tFileCache, struct gfOutput *out, - char *genome, char *genomeDataDir) + boolean isRc, int minMatch, struct hash *tFileCache, struct gfOutput *out) /* Search genome on server with one strand of other sequence to find homology. * Then load homologous bits of genome locally and do detailed alignment. * Call 'outFunction' with each alignment that is found. */ { struct ssBundle *bun; struct gfRange *rangeList = NULL, *range; struct dnaSeq *targetSeq; char targetName[PATH_LEN]; -rangeList = gfQuerySeq(conn, seq, genome, genomeDataDir); +rangeList = gfQuerySeq(conn, seq); close(conn->fd); conn->fd = -1; slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax); for (range = rangeList; range != NULL; range = range->next) { getTargetName(range->tName, out->includeTargetFile, targetName); targetSeq = gfiExpandAndLoadCached(range, tFileCache, tSeqDir, seq->size, &range->tTotalSize, FALSE, FALSE, usualExpansion); AllocVar(bun); bun->qSeq = seq; bun->genoSeq = targetSeq; alignComponents(range, bun, ffCdna); ssStitch(bun, ffCdna, minMatch, ssAliCount); saveAlignments(targetName, range->tTotalSize, range->tStart, @@ -1042,51 +1039,50 @@ { slAddTail(&oldT3->next, t3); } else { hashAdd(t3Hash, range->tName, t3); } } *retT3Hash = t3Hash; *retSeqList = tSeqList; *retT3RefList = t3RefList; } void gfAlignTrans(struct gfConnection *conn, char *tSeqDir, aaSeq *seq, int minMatch, - struct hash *tFileCache, struct gfOutput *out, - char *genome, char *genomeDataDir) + struct hash *tFileCache, struct gfOutput *out) /* Search indexed translated genome on server with an amino acid sequence. * Then load homologous bits of genome locally and do detailed alignment. * Call 'outFunction' with each alignment that is found. */ { struct ssBundle *bun; struct gfClump *clumps[2][3], *clump; struct gfRange *rangeList = NULL, *range, *rl; struct dnaSeq *targetSeq, *tSeqList = NULL; char targetName[PATH_LEN]; int tileSize; int frame, isRc = 0; struct hash *t3Hash = NULL; struct slRef *t3RefList = NULL, *ref; struct gfSeqSource *ssList = NULL, *ss; struct trans3 *t3; struct lm *lm = lmInit(0); /* Get clumps from server. */ -gfQuerySeqTrans(conn, seq, clumps, lm, &ssList, &tileSize, genome, genomeDataDir); +gfQuerySeqTrans(conn, seq, clumps, lm, &ssList, &tileSize); for (isRc = 0; isRc <= 1; ++isRc) { /* Figure out which parts of sequence we need to load. */ for (frame = 0; frame < 3; ++frame) { rl = seqClumpToRangeList(clumps[isRc][frame], frame); rangeList = slCat(rangeList, rl); } /* Convert from amino acid to nucleotide coordinates. */ rangeCoorTimes3(rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax); loadHashT3Ranges(rangeList, tSeqDir, tFileCache, seq->size, isRc, &t3Hash, &tSeqList, &t3RefList); @@ -1163,52 +1159,51 @@ for (range = rangeList; range != NULL; range = range->next) { range->qStart = 3*range->qStart + qFrame; range->qEnd = 3*range->qEnd + qFrame; range->tStart = 3*range->tStart + tFrame; range->tEnd = 3*range->tEnd + tFrame; if (t3Hash) t3 = trans3Find(t3Hash, range->tSeq->name, range->tStart + tOffset, range->tEnd + tOffset); range->tSeq = t3->seq; range->t3 = t3; } } void gfAlignTransTrans(struct gfConnection *conn, char *tSeqDir, struct dnaSeq *qSeq, boolean qIsRc, int minMatch, struct hash *tFileCache, - struct gfOutput *out, boolean isRna, - char *genome, char *genomeDataDir) + struct gfOutput *out, boolean isRna) /* Search indexed translated genome on server with an dna sequence. Translate * this sequence in three frames. Load homologous bits of genome locally * and do detailed alignment. Call 'outFunction' with each alignment * that is found. */ { struct gfClump *clumps[2][3][3], *clump; char targetName[PATH_LEN]; int qFrame, tFrame, tIsRc; struct gfSeqSource *ssList = NULL, *ss; struct lm *lm = lmInit(0); int tileSize; struct gfRange *rangeList = NULL, *rl, *range; struct trans3 *qTrans = trans3New(qSeq), *t3; struct slRef *t3RefList = NULL, *t3Ref; struct hash *t3Hash = NULL; struct dnaSeq *tSeqList = NULL; enum ffStringency stringency = (isRna ? ffCdna : ffLoose); /* Query server for clumps. */ -gfQuerySeqTransTrans(conn, qSeq, clumps, lm, &ssList, &tileSize, genome, genomeDataDir); +gfQuerySeqTransTrans(conn, qSeq, clumps, lm, &ssList, &tileSize); for (tIsRc=0; tIsRc <= 1; ++tIsRc) { /* Figure out which ranges need to be loaded and load them. */ for (qFrame = 0; qFrame < 3; ++qFrame) { for (tFrame = 0; tFrame < 3; ++tFrame) { rl = seqClumpToRangeList(clumps[tIsRc][qFrame][tFrame], tFrame); rangeList = slCat(rangeList, rl); } } rangeCoorTimes3(rangeList); slSort(&rangeList, gfRangeCmpTarget); rangeList = gfRangesBundle(rangeList, ffIntronMax);