src/hg/hgc/bamClick.c 1.12

1.12 2009/11/26 00:29:11 angie
Refinements to searching for mates out of window: don't do the extra bamFetch unless pairSearchRange > 0; increase default pairSearchRange default; clip start of window at 0; there's only one item name, so don't need to loop on pairHash name.
Index: src/hg/hgc/bamClick.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgc/bamClick.c,v
retrieving revision 1.11
retrieving revision 1.12
diff -b -B -U 1000000 -r1.11 -r1.12
--- src/hg/hgc/bamClick.c	8 Oct 2009 06:38:26 -0000	1.11
+++ src/hg/hgc/bamClick.c	26 Nov 2009 00:29:11 -0000	1.12
@@ -1,193 +1,189 @@
 /* bamClick - handler for alignments in BAM format (produced by MAQ,
  * BWA and some other short-read alignment tools). */
 #ifdef USE_BAM
 
 #include "common.h"
 #include "hash.h"
 #include "hdb.h"
 #include "bamFile.h"
 #include "hgc.h"
 
 static char const rcsid[] = "$Id$";
 
 #include "bamFile.h"
 
 struct bamTrackData
     {
     int itemStart;
     char *itemName;
     struct hash *pairHash;
     };
 
 static void singleBamDetails(const bam1_t *bam)
 /* Print out the properties of this alignment. */
 {
 const bam1_core_t *core = &bam->core;
 char *itemName = bam1_qname(bam);
 int tLength = bamGetTargetLength(bam);
 int tStart = core->pos, tEnd = tStart+tLength;
 boolean isRc = bamIsRc(bam);
 printPosOnChrom(seqName, tStart, tEnd, NULL, FALSE, itemName);
 printf("<B>Alignment Quality: </B>%d<BR>\n", core->qual);
 printf("<B>CIGAR string: </B><tt>%s</tt> (", bamGetCigar(bam));
 bamShowCigarEnglish(bam);
 printf(")<BR>\n");
 printf("<B>Tags:</B>");
 bamShowTags(bam);
 puts("<BR>");
 printf("<B>Flags: </B><tt>0x%02x:</tt><BR>\n &nbsp;&nbsp;", core->flag);
 bamShowFlagsEnglish(bam);
 puts("<BR><BR>");
 char nibName[HDB_MAX_PATH_STRING];
 hNibForChrom(database, seqName, nibName);
 struct dnaSeq *genoSeq = hFetchSeq(nibName, seqName, tStart, tEnd);
 struct ffAli *ffa = bamToFfAli(bam, genoSeq, tStart);
 char *qSeq = ffa->nStart;
 printf("<B>Alignment of %s to %s:%d-%d%s:</B><BR>\n", itemName,
        seqName, tStart+1, tEnd, (isRc ? " (reverse complemented)" : ""));
 ffShowSideBySide(stdout, ffa, qSeq, 0, genoSeq->dna, tStart, tLength, 0, tLength, 8, isRc,
 		 FALSE);
 printf("<B>Sequence quality scores:</B><BR>\n<TT><TABLE><TR>\n");
 UBYTE *quals = bamGetQueryQuals(bam);
 int i;
 for (i = 0;  i < core->l_qseq;  i++)
     {
     if (i > 0 && (i % 24) == 0)
 	printf("</TR>\n<TR>");
     printf("<TD>%c<BR>%d</TD>", qSeq[i], quals[i]);
     }
 printf("</TR></TABLE></TT>\n");
 }
 
 static void showOverlap(const bam1_t *leftBam, const bam1_t *rightBam)
 /* If the two reads overlap, show how. */
 {
 const bam1_core_t *leftCore = &(leftBam->core), *rightCore = &(rightBam->core);
 int leftStart = leftCore->pos, rightStart = rightCore->pos;
 int leftLen = bamGetTargetLength(leftBam), rightLen = bamGetTargetLength(rightBam);
 char *leftSeq = bamGetQuerySequence(leftBam), *rightSeq = bamGetQuerySequence(rightBam);
 if (bamIsRc(leftBam))
     reverseComplement(leftSeq, strlen(leftSeq));
 if (bamIsRc(rightBam))
     reverseComplement(rightSeq, strlen(rightSeq));
 if ((rightStart > leftStart && leftStart + leftLen > rightStart) ||
     (leftStart > rightStart && rightStart+rightLen > leftStart))
     {
     printf("<B>Note: End read alignments overlap:</B><BR>\n<PRE><TT>");
     int i = leftStart - rightStart;
     while (i-- > 0)
 	putc(' ', stdout);
     puts(leftSeq);
     i = rightStart - leftStart;
     while (i-- > 0)
 	putc(' ', stdout);
     puts(rightSeq);
     puts("</TT></PRE>");
     }
 }
 
 static void bamPairDetails(const bam1_t *leftBam, const bam1_t *rightBam)
 /* Print out details for paired-end reads. */
 {
 showOverlap(leftBam, rightBam);
 printf("<TABLE><TR><TD><H4>Left end read</H4>\n");
 singleBamDetails(leftBam);
 printf("</TD><TD><H4>Right end read</H4>\n");
 singleBamDetails(rightBam);
 printf("</TD></TR></TABLE>\n");
 }
 
 static int oneBam(const bam1_t *bam, void *data)
 /* This is called on each record retrieved from a .bam file. */
 {
 const bam1_core_t *core = &bam->core;
 if (core->flag & BAM_FUNMAP)
     return 0;
 struct bamTrackData *btd = (struct bamTrackData *)data;
 if (sameString(bam1_qname(bam), btd->itemName))
     {
     if (btd->pairHash == NULL || (core->flag & BAM_FPAIRED) == 0)
 	{
 	if (core->pos == btd->itemStart)
 	    singleBamDetails(bam);
 	}
     else
 	{
 	bam1_t *firstBam = (bam1_t *)hashFindVal(btd->pairHash, btd->itemName);
 	if (firstBam == NULL)
 	    hashAdd(btd->pairHash, btd->itemName, bamClone(bam));
 	else
 	    {
 	    bamPairDetails(firstBam, bam);
 	    hashRemove(btd->pairHash, btd->itemName);
 	    }
 	}
     }
 return 0;
 }
 
 void doBamDetails(struct trackDb *tdb, char *item)
 /* Show details of an alignment from a BAM file. */
 {
 if (item == NULL)
     errAbort("doBamDetails: NULL item name");
 int start = cartInt(cart, "o");
 // TODO: libify tdb settings table_pairEndsByName, stripPrefix and pairSearchRange
 
 char varName[1024];
 safef(varName, sizeof(varName), "%s_pairEndsByName", tdb->tableName);
 boolean isPaired = cartUsualBoolean(cart, varName,
 				    (trackDbSetting(tdb, "pairEndsByName") != NULL));
 char *seqNameForBam = seqName;
 char *stripPrefix = trackDbSetting(tdb, "stripPrefix");
 if (stripPrefix && startsWith(stripPrefix, seqName))
     seqNameForBam = seqName + strlen(stripPrefix);
 char posForBam[512];
 safef(posForBam, sizeof(posForBam), "%s:%d-%d", seqNameForBam, winStart, winEnd);
 
 bamIgnoreStrand();
 struct hash *pairHash = isPaired ? hashNew(0) : NULL;
 struct bamTrackData btd = {start, item, pairHash};
 char *fileName;
 if (isCustomTrack(tdb->tableName))
     {
     fileName = trackDbSetting(tdb, "bigDataUrl");
     if (fileName == NULL)
 	errAbort("doBamDetails: can't find bigDataUrl for custom track %s", tdb->tableName);
     }
 else
     fileName = bamFileNameFromTable(database, tdb->tableName, seqNameForBam);
 bamFetch(fileName, posForBam, oneBam, &btd);
-if (isPaired && hashNumEntries(pairHash) > 0)
+if (isPaired)
     {
-    char *setting = trackDbSettingOrDefault(tdb, "pairSearchRange", "1000");
+    char *setting = trackDbSettingOrDefault(tdb, "pairSearchRange", "20000");
     int pairSearchRange = atoi(setting);
-    struct hashEl *hel;
-    struct hashCookie cookie = hashFirst(pairHash);
-    while ((hel = hashNext(&cookie)) != NULL)
+    if (pairSearchRange > 0 && hashNumEntries(pairHash) > 0)
 	{
+	// Repeat the search for item in a larger window:
 	struct hash *newPairHash = hashNew(0);
-	btd.itemName = hel->name;
 	btd.pairHash = newPairHash;
 	safef(posForBam, sizeof(posForBam), "%s:%d-%d", seqNameForBam,
-	      winStart-pairSearchRange, winEnd+pairSearchRange);
+	      max(0, winStart-pairSearchRange), winEnd+pairSearchRange);
 	bamFetch(fileName, posForBam, oneBam, &btd);
-	if (hashNumEntries(newPairHash) > 0)
-	    {
-	    struct hashCookie cookie2 = hashFirst(pairHash);
-	    while ((hel = hashNext(&cookie2)) != NULL)
+	}
+    struct hashEl *hel;
+    struct hashCookie cookie = hashFirst(btd.pairHash);
+    while ((hel = hashNext(&cookie)) != NULL)
 		{
 		bam1_t *bam = hel->val;
 		const bam1_core_t *core = &bam->core;
-		if (core->flag & BAM_FPROPER_PAIR)
-		    printf("<B>Note: </B>unable to find paired end "
-			   "for %s within +-%d of viewing window<BR>\n",
-			   hel->name, pairSearchRange);
+	if (! (core->flag & BAM_FMUNMAP))
+	    printf("<B>Note: </B>unable to find paired end for %s "
+		   "within +-%d bases of viewing window %s<BR>\n",
+		   item, pairSearchRange, addCommasToPos(database, cartString(cart, "position")));
 		singleBamDetails(bam);
 		}
 	    }
-	}
-    }
 }
 
 #endif//def USE_BAM