602fa08316183790dd1e1d633c6d7510ae45d8cb braney Fri Feb 11 15:32:03 2011 -0800 add intersections, identifier filtering, and file length limits to BAM output diff --git src/hg/hgTables/bam.c src/hg/hgTables/bam.c index 26cdf2e..77f3685 100644 --- src/hg/hgTables/bam.c +++ src/hg/hgTables/bam.c @@ -117,93 +117,115 @@ row[3] = numPt; numPt += sprintf(numPt, "%u", sam->pos); numPt += 1; row[4] = numPt; numPt += sprintf(numPt, "%u", sam->mapQ); numPt += 1; row[5] = sam->cigar; row[6] = sam->rNext; row[7] = numPt; numPt += sprintf(numPt, "%d", sam->pNext); numPt += 1; row[8] = numPt; numPt += sprintf(numPt, "%d", sam->tLen); numPt += 1; row[9] = sam->seq; row[10] = sam->qual; row[11] = sam->tagTypeVals; assert(numPt < numBufEnd); } void bamTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f) /* Print out selected fields from BAM. If fields is NULL, then print out all fields. */ { +struct hTableInfo *hti = NULL; +hti = getHti(db, table, conn); +struct hash *idHash = NULL; +char *idField = getIdField(db, curTrack, table, hti); +int idFieldNum = 0; + +/* if we know what field to use for the identifiers, get the hash of names */ +if (idField != NULL) + idHash = identifierHash(db, table); + if (f == NULL) f = stdout; /* Convert comma separated list of fields to array. */ int fieldCount = chopByChar(fields, ',', NULL, 0); char **fieldArray; AllocArray(fieldArray, fieldCount); chopByChar(fields, ',', fieldArray, fieldCount); /* Get list of all fields in big bed and turn it into a hash of column indexes keyed by * column name. */ struct hash *fieldHash = hashNew(0); struct slName *bb, *bbList = bamGetFields(table); int i; for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i) + { + /* if we know the field for identifiers, save it away */ + if ((idField != NULL) && sameString(idField, bb->name)) + idFieldNum = i; hashAddInt(fieldHash, bb->name, i); + } /* Create an array of column indexes corresponding to the selected field list. */ int *columnArray; AllocArray(columnArray, fieldCount); for (i=0; i<fieldCount; ++i) { columnArray[i] = hashIntVal(fieldHash, fieldArray[i]); } /* Output row of labels */ fprintf(f, "#%s", fieldArray[0]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", fieldArray[i]); fprintf(f, "\n"); char *fileName = bamFileName(table, conn); struct asObject *as = bamAsObj(); struct asFilter *filter = NULL; if (anyFilter()) { filter = asFilterFromCart(cart, db, table, as); if (filter) { fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList)); } } /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); -for (region = regionList; region != NULL; region = region->next) + +int maxOut = bigFileMaxOutput(); +for (region = regionList; region != NULL && (maxOut > 0); region = region->next) { struct lm *lm = lmInit(0); struct samAlignment *sam, *samList = bamFetchSamAlignment(fileName, region->chrom, region->start, region->end, lm); char *row[SAMALIGNMENT_NUM_COLS]; char numBuf[BAM_NUM_BUF_SIZE]; - for (sam = samList; sam != NULL; sam = sam->next) + for (sam = samList; sam != NULL && (maxOut > 0); sam = sam->next) { samAlignmentToRow(sam, numBuf, row); if (asFilterOnRow(filter, row)) { + /* if we're looking for identifiers, check if this matches */ + if ((idHash != NULL)&&(hashLookup(idHash, row[idFieldNum]) == NULL)) + continue; + int i; fprintf(f, "%s", row[columnArray[0]]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", row[columnArray[i]]); fprintf(f, "\n"); + maxOut --; } } lmCleanup(&lm); } /* Clean up and exit. */ hashFree(&fieldHash); freeMem(fieldArray); freeMem(columnArray); } int cigarWidth(char *cigar, int cigarSize) /* Return width of alignment as encoded in cigar format string. */ { int tLength=0;