2e75814ac8468bdfb14d01cabe7455b63154a64b kent Thu Feb 10 16:59:09 2011 -0800 Adding example lines to BAM schema page in table browser. Pushing forward on getting identifiers to work there, but still a ways to go. (It no longer bombs at the paste identifiers page, but it does shortly thereafter.) diff --git src/hg/lib/bamFile.c src/hg/lib/bamFile.c index c45276b..a3aceff 100644 --- src/hg/lib/bamFile.c +++ src/hg/lib/bamFile.c @@ -184,88 +184,110 @@ #ifndef KNETFILE_HOOKS setCurrentDir(runDir); #endif//ndef KNETFILE_HOOKS samclose(fh); if (idx == NULL) { warn("bamFileExists: failed to read index corresponding to %s", bamFileName); return FALSE; } free(idx); // Not freeMem, freez etc -- sam just uses malloc/calloc. return TRUE; } return FALSE; } -void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, - samfile_t **pSamFile) -/* Open the .bam file, fetch items in the seq:start-end position range, - * and call callbackFunc on each bam item retrieved from the file plus callbackData. - * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. - * The pSamFile parameter is optional. If non-NULL it will be filled in, just for - * the benefit of the callback function, with the open samFile. */ +samfile_t *bamOpen(char *fileOrUrl, char **retBamFileName) +/* Return an open bam file, dealing with FUSE caching if need be. + * Return parameter if NON-null will return the file name after FUSing */ { char *bamFileName = samtoolsFileName(fileOrUrl); +if (retBamFileName != NULL) + *retBamFileName = bamFileName; samfile_t *fh = samopen(bamFileName, "rb", NULL); if (fh == NULL) { boolean usingUrl = (strstr(fileOrUrl, "tp://") || strstr(fileOrUrl, "https://")); struct dyString *urlWarning = dyStringNew(0); if (usingUrl) { char *udcFuseRoot = cfgOption("udcFuse.mountPoint"); boolean usingUdc = (udcFuseRoot != NULL && startsWith(udcFuseRoot, bamFileName)); if (usingUdc) dyStringAppend(urlWarning, " (using udcFuse)"); dyStringAppend(urlWarning, ". If you are able to access the URL with your web browser, " "please try reloading this page."); } warn("failed to open %s%s", fileOrUrl, urlWarning->string); - return; + return NULL; + } +return fh; +} + +void bamClose(samfile_t **pSamFile) +/* Close down a samefile_t */ +{ +if (pSamFile != NULL) + { + samclose(*pSamFile); + *pSamFile = NULL; + } } + + +void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, + samfile_t **pSamFile) +/* Open the .bam file, fetch items in the seq:start-end position range, + * and call callbackFunc on each bam item retrieved from the file plus callbackData. + * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. + * The pSamFile parameter is optional. If non-NULL it will be filled in, just for + * the benefit of the callback function, with the open samFile. */ +{ +char *bamFileName = NULL; +samfile_t *fh = bamOpen(fileOrUrl, &bamFileName); if (pSamFile != NULL) *pSamFile = fh; int chromId, start, end; int ret = bam_parse_region(fh->header, position, &chromId, &start, &end); if (ret != 0 && startsWith("chr", position)) ret = bam_parse_region(fh->header, position+strlen("chr"), &chromId, &start, &end); if (ret != 0) // If the bam file does not cover the current chromosome, OK return; #ifndef KNETFILE_HOOKS // Since samtools' url-handling code saves the .bai file to the current directory, // chdir to a trash directory before calling bam_index_load, then chdir back. char *runDir = getCurrentDir(); char *samDir = getSamDir(); setCurrentDir(samDir); #endif//ndef KNETFILE_HOOKS bam_index_t *idx = bam_index_load(bamFileName); #ifndef KNETFILE_HOOKS setCurrentDir(runDir); #endif//ndef KNETFILE_HOOKS if (idx == NULL) warn("bam_index_load(%s) failed.", bamFileName); else { ret = bam_fetch(fh->x.bam, idx, chromId, start, end, callbackData, callbackFunc); if (ret != 0) warn("bam_fetch(%s, %s (chromId=%d) failed (%d)", bamFileName, position, chromId, ret); free(idx); // Not freeMem, freez etc -- sam just uses malloc/calloc. } -samclose(fh); +bamClose(&fh); } boolean bamIsRc(const bam1_t *bam) /* Return TRUE if alignment is on - strand. */ { const bam1_core_t *core = &bam->core; return (core->flag & BAM_FREVERSE); } void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen) /* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at * the beginning of the query sequence and quality; likewise for retHigh at end. * For convenience, retClippedQLen is the original query length minus soft clipping * (and the length of the query sequence that will be returned). */ { @@ -664,42 +686,44 @@ } static boolean isAllSameChar(char *s, int size, char c) /* Return TRUE if first size chars of s are 0 */ { int i; for (i=0; i<size; ++i) if (s[i] != c) return FALSE; return TRUE; } int bamAddOneSamAlignment(const bam1_t *bam, void *data) /* bam_fetch() calls this on each bam alignment retrieved. Translate each bam - * into a linkedFeaturesSeries item, and either store it until we find its mate - * or add it to tg->items. */ + * into a samAlignment. */ { struct bamToSamHelper *helper = (struct bamToSamHelper *)data; struct lm *lm = helper->lm; struct samAlignment *sam; lmAllocVar(lm, sam); const bam1_core_t *core = &bam->core; struct dyString *dy = helper->dy; sam->qName = lmCloneString(lm, bam1_qname(bam)); sam->flag = core->flag; +if (helper->chrom != NULL) sam->rName = helper->chrom; +else + sam->rName = lmCloneString(lm, helper->samFile->header->target_name[core->tid]); sam->pos = core->pos + 1; sam->mapQ = core->qual; dyStringClear(dy); bamUnpackCigar(bam, dy); sam->cigar = lmCloneStringZ(lm, dy->string, dy->stringSize); if (core->mtid >= 0) { if (core->tid == core->mtid) sam->rNext = "="; else sam->rNext = lmCloneString(lm, helper->samFile->header->target_name[core->mtid]); } else sam->rNext = "*"; sam->pNext = core->mpos + 1; @@ -727,69 +751,119 @@ * bam record. Results is allocated out of lm, since it tends to be large... */ { struct bamToSamHelper helper; helper.lm = lm; helper.chrom = chrom; helper.dy = dyStringNew(0); helper.samList = NULL; char posForBam[256]; safef(posForBam, sizeof(posForBam), "%s:%d-%d", chrom, start, end); bamFetch(fileOrUrl, posForBam, bamAddOneSamAlignment, &helper, &helper.samFile); dyStringFree(&helper.dy); slReverse(&helper.samList); return helper.samList; } +struct samAlignment *bamReadNextSamAlignments(samfile_t *fh, int count, struct lm *lm) +/* Read next count alignments in SAM format, allocated in lm. May return less than + * count at end of file. */ +{ +/* Set up helper. */ +struct bamToSamHelper helper; +helper.lm = lm; +helper.chrom = NULL; +helper.dy = dyStringNew(0); +helper.samFile = fh; +helper.samList = NULL; + +/* Loop through calling our own fetch function */ +int i; +bam1_t *b = bam_init1(); +for (i=0; i<count; ++i) + { + if (samread(fh, b) < 0) + break; + bamAddOneSamAlignment(b, &helper); + } +bam_destroy1(b); + +/* Clean up and go home. */ +dyStringFree(&helper.dy); +slReverse(&helper.samList); +return helper.samList; +} + #else // If we're not compiling with samtools, make stub routines so compile won't fail: char *bamFileNameFromTable(struct sqlConnection *conn, char *table, char *bamSeqName) /* Return file name from table. If table has a seqName column, then grab the * row associated with bamSeqName (which is not nec. in chromInfo, e.g. * bam file might have '1' not 'chr1'). */ { errAbort(COMPILE_WITH_SAMTOOLS, "bamFileNameFromTable"); return NULL; } boolean bamFileExists(char *bamFileName) /* Return TRUE if we can successfully open the bam file and its index file. */ { warn(COMPILE_WITH_SAMTOOLS, "bamFileExists"); return FALSE; } +samfile_t *bamOpen(char *bamFileName) +/* Return an open bam file, dealing with some FUSE caching if need be. */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamOpen"); +return FALSE; +} + +void bamClose(samfile_t **pSamFile) +/* Close down a samefile_t */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamClose"); +} + void bamFetch(char *fileOrUrl, char *position, bam_fetch_f callbackFunc, void *callbackData, samfile_t **pSamFile) /* Open the .bam file, fetch items in the seq:start-end position range, * and call callbackFunc on each bam item retrieved from the file plus callbackData. * This handles BAM files with "chr"-less sequence names, e.g. from Ensembl. * The pSamFile parameter is optional. If non-NULL it will be filled in, just for * the benefit of the callback function, with the open samFile. */ { errAbort(COMPILE_WITH_SAMTOOLS, "bamFetch"); } struct samAlignment *bamFetchSamAlignment(char *fileOrUrl, char *chrom, int start, int end, struct lm *lm) /* Fetch region as a list of samAlignments - which is more or less an unpacked * bam record. Results is allocated out of lm, since it tends to be large... */ { errAbort(COMPILE_WITH_SAMTOOLS, "bamFetchSamAlignment"); return NULL; } +struct samAlignment *bamReadNextSamAlignments(struct samfile_t *fh, int count, struct lm *lm) +/* Read next count alignments in SAM format, allocated in lm. May return less than + * count at end of file. */ +{ +errAbort(COMPILE_WITH_SAMTOOLS, "bamReadNextSamAlignments"); +return NULL; +} + boolean bamIsRc(const bam1_t *bam) /* Return TRUE if alignment is on - strand. */ { errAbort(COMPILE_WITH_SAMTOOLS, "bamIsRc"); return FALSE; } void bamGetSoftClipping(const bam1_t *bam, int *retLow, int *retHigh, int *retClippedQLen) /* If retLow is non-NULL, set it to the number of "soft-clipped" (skipped) bases at * the beginning of the query sequence and quality; likewise for retHigh at end. * For convenience, retClippedQLen is the original query length minus soft clipping * (and the length of the query sequence that will be returned). */ { errAbort(COMPILE_WITH_SAMTOOLS, "bamGetSoftClipping"); }