bc20671bb19cc948bda2c36063601ac5edc4d514 angie Tue Feb 15 10:57:59 2011 -0800 Fixing bug found while playing w/hgTables for Code Review #2840(not caused by Brian's changes): BAM tracks can have separate BAM files per chromosome, in which case the db table has an extra column seqName, so bigWigFileName() won't return the correct fileName for those tracks. Share bigWigFileName's logic for detecting custom track or hub track, and use bamFile.h's bamFileNameFromTable() for database tables. diff --git src/hg/hgTables/bam.c src/hg/hgTables/bam.c index 77f3685..1c1b86d 100644 --- src/hg/hgTables/bam.c +++ src/hg/hgTables/bam.c @@ -29,36 +29,38 @@ #endif//def USE_BAM && KNETFILE_HOOKS boolean isBamTable(char *table) /* Return TRUE if table corresponds to a BAM file. */ { if (isHubTrack(table)) { struct trackDb *tdb = hashFindVal(fullTrackAndSubtrackHash, table); return startsWithWord("bam", tdb->type); } else return trackIsType(database, table, curTrack, "bam", ctLookupName); } -char *bamFileName(char *table, struct sqlConnection *conn) +char *bamFileName(char *table, struct sqlConnection *conn, char *seqName) /* Return file name associated with BAM. This handles differences whether it's * a custom or built-in track. Do a freeMem on returned string when done. */ { -/* Implementation is same as bigWig. */ -return bigWigFileName(table, conn); +char *fileName = bigFileNameFromCtOrHub(table, conn); +if (fileName == NULL) + fileName = bamFileNameFromTable(conn, table, seqName); +return fileName; } char *bamAsDef = "table samAlignment\n" "\"The fields of a SAM short read alignment, the text version of BAM.\"\n" " (\n" " string qName; \"Query template name - name of a read\"\n" " ushort flag; \"Flags. 0x10 set for reverse complement. See SAM docs for others.\"\n" " string rName; \"Reference sequence name (often a chromosome)\"\n" " uint pos; \"1 based position\"\n" " ubyte mapQ; \"Mapping quality 0-255, 255 is best\"\n" " string cigar; \"CIGAR encoded alignment string.\"\n" " string rNext; \"Ref sequence for next (mate) read. '=' if same as rName, '*' if no mate\"\n" " int pNext; \"Position (1-based) of next (mate) sequence. May be -1 or 0 if no mate\"\n" " int tLen; \"Size of DNA template for mated pairs. -size for one of mate pairs\"\n" @@ -163,71 +165,72 @@ /* Create an array of column indexes corresponding to the selected field list. */ int *columnArray; AllocArray(columnArray, fieldCount); for (i=0; i<fieldCount; ++i) { columnArray[i] = hashIntVal(fieldHash, fieldArray[i]); } /* Output row of labels */ fprintf(f, "#%s", fieldArray[0]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", fieldArray[i]); fprintf(f, "\n"); -char *fileName = bamFileName(table, conn); struct asObject *as = bamAsObj(); struct asFilter *filter = NULL; if (anyFilter()) { filter = asFilterFromCart(cart, db, table, as); if (filter) { fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList)); } } /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); int maxOut = bigFileMaxOutput(); for (region = regionList; region != NULL && (maxOut > 0); region = region->next) { struct lm *lm = lmInit(0); + char *fileName = bamFileName(table, conn, region->chrom); struct samAlignment *sam, *samList = bamFetchSamAlignment(fileName, region->chrom, region->start, region->end, lm); char *row[SAMALIGNMENT_NUM_COLS]; char numBuf[BAM_NUM_BUF_SIZE]; for (sam = samList; sam != NULL && (maxOut > 0); sam = sam->next) { samAlignmentToRow(sam, numBuf, row); if (asFilterOnRow(filter, row)) { /* if we're looking for identifiers, check if this matches */ if ((idHash != NULL)&&(hashLookup(idHash, row[idFieldNum]) == NULL)) continue; int i; fprintf(f, "%s", row[columnArray[0]]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", row[columnArray[i]]); fprintf(f, "\n"); maxOut --; } } + freeMem(fileName); lmCleanup(&lm); } /* Clean up and exit. */ hashFree(&fieldHash); freeMem(fieldArray); freeMem(columnArray); } int cigarWidth(char *cigar, int cigarSize) /* Return width of alignment as encoded in cigar format string. */ { int tLength=0; char *s, *end = cigar + cigarSize; s = cigar; @@ -281,74 +284,77 @@ bed->chromStart = sam->pos - 1; bed->chromEnd = bed->chromStart + cigarWidth(sam->cigar, strlen(sam->cigar)); bed->name = lmCloneString(bedLm, sam->qName); slAddHead(pBedList, bed); } } lmCleanup(&lm); } struct bed *bamGetFilteredBedsOnRegions(struct sqlConnection *conn, char *db, char *table, struct region *regionList, struct lm *lm, int *retFieldCount) /* Get list of beds from BAM, in all regions, that pass filtering. */ { /* Figure out bam file name get column info and filter. */ -char *fileName = bamFileName(table, conn); struct asObject *as = bamAsObj(); struct asFilter *filter = asFilterFromCart(cart, db, table, as); /* Get beds a region at a time. */ struct bed *bedList = NULL; struct region *region; for (region = regionList; region != NULL; region = region->next) + { + char *fileName = bamFileName(table, conn, region->chrom); addFilteredBedsOnRegion(fileName, region, table, filter, lm, &bedList); + freeMem(fileName); + } slReverse(&bedList); return bedList; } struct slName *randomBamIds(char *table, struct sqlConnection *conn, int count) /* Return some semi-random qName based IDs from a BAM file. */ { /* Read 10000 items from bam file, or if they ask for a big list, then 4x what they ask for. */ -char *fileName = bamFileName(table, conn); +char *fileName = bamFileName(table, conn, NULL); samfile_t *fh = bamOpen(fileName, NULL); struct lm *lm = lmInit(0); int orderedCount = count * 4; if (orderedCount < 10000) orderedCount = 10000; struct samAlignment *sam, *samList = bamReadNextSamAlignments(fh, orderedCount, lm); /* Shuffle list and extract qNames from first count of them. */ shuffleList(&samList, 1); struct slName *randomIdList = NULL; int i; for (i=0, sam = samList; i<count && sam != NULL; ++i, sam = sam->next) slNameAddHead(&randomIdList, sam->qName); /* Clean up and go home. */ lmCleanup(&lm); bamClose(&fh); freez(&fileName); return randomIdList; } void showSchemaBam(char *table) /* Show schema on bam. */ { struct sqlConnection *conn = hAllocConn(database); -char *fileName = bamFileName(table, conn); +char *fileName = bamFileName(table, conn, NULL); struct asObject *as = bamAsObj(); hPrintf("<B>Database:</B> %s", database); hPrintf(" <B>Primary Table:</B> %s<br>", table); hPrintf("<B>BAM File:</B> %s", fileName); hPrintf("<BR>\n"); hPrintf("<B>Format description:</B> %s<BR>", as->comment); hPrintf("See the <A HREF=\"%s\" target=_blank>SAM Format Specification</A> for more details<BR>\n", "http://samtools.sourceforge.net/SAM-1.3.pdf"); /* Put up table that describes fields. */ hTableStart(); hPrintf("<TR><TH>field</TH>"); hPrintf("<TH>description</TH> "); puts("</TR>\n");