src/hg/hgTables/bam.c 602fa08316183790dd1e1d633c6d7510ae45d8cb

602fa08316183790dd1e1d633c6d7510ae45d8cb
braney
  Fri Feb 11 15:32:03 2011 -0800
add intersections, identifier filtering, and file length limits to BAM output
diff --git src/hg/hgTables/bam.c src/hg/hgTables/bam.c
index 26cdf2e..77f3685 100644
--- src/hg/hgTables/bam.c
+++ src/hg/hgTables/bam.c
@@ -117,93 +117,115 @@
 row[3] = numPt; numPt += sprintf(numPt, "%u", sam->pos); numPt += 1;
 row[4] = numPt; numPt += sprintf(numPt, "%u", sam->mapQ); numPt += 1;
 row[5] = sam->cigar;
 row[6] = sam->rNext;
 row[7] = numPt; numPt += sprintf(numPt, "%d", sam->pNext); numPt += 1;
 row[8] = numPt; numPt += sprintf(numPt, "%d", sam->tLen); numPt += 1;
 row[9] = sam->seq;
 row[10] = sam->qual;
 row[11] = sam->tagTypeVals;
 assert(numPt < numBufEnd);
 }
 
 void bamTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f)
 /* Print out selected fields from BAM.  If fields is NULL, then print out all fields. */
 {
+struct hTableInfo *hti = NULL;
+hti = getHti(db, table, conn);
+struct hash *idHash = NULL;
+char *idField = getIdField(db, curTrack, table, hti);
+int idFieldNum = 0;
+
+/* if we know what field to use for the identifiers, get the hash of names */
+if (idField != NULL)
+    idHash = identifierHash(db, table);
+
 if (f == NULL) 
     f = stdout;
 
 /* Convert comma separated list of fields to array. */
 int fieldCount = chopByChar(fields, ',', NULL, 0);
 char **fieldArray;
 AllocArray(fieldArray, fieldCount);
 chopByChar(fields, ',', fieldArray, fieldCount);
 
 /* Get list of all fields in big bed and turn it into a hash of column indexes keyed by
  * column name. */
 struct hash *fieldHash = hashNew(0);
 struct slName *bb, *bbList = bamGetFields(table);
 int i;
 for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i)
+    {
+    /* if we know the field for identifiers, save it away */
+    if ((idField != NULL) && sameString(idField, bb->name))
+	idFieldNum = i;
     hashAddInt(fieldHash, bb->name, i);
+    }
 
 /* Create an array of column indexes corresponding to the selected field list. */
 int *columnArray;
 AllocArray(columnArray, fieldCount);
 for (i=0; i<fieldCount; ++i)
     {
     columnArray[i] = hashIntVal(fieldHash, fieldArray[i]);
     }
 
 /* Output row of labels */
 fprintf(f, "#%s", fieldArray[0]);
 for (i=1; i<fieldCount; ++i)
     fprintf(f, "\t%s", fieldArray[i]);
 fprintf(f, "\n");
 
 char *fileName = bamFileName(table, conn);
 struct asObject *as = bamAsObj();
 struct asFilter *filter = NULL;
 
 if (anyFilter())
     {
     filter = asFilterFromCart(cart, db, table, as);
     if (filter)
         {
 	fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList));
 	}
     }
 
 /* Loop through outputting each region */
 struct region *region, *regionList = getRegions();
-for (region = regionList; region != NULL; region = region->next)
+
+int maxOut = bigFileMaxOutput();
+for (region = regionList; region != NULL && (maxOut > 0); region = region->next)
     {
     struct lm *lm = lmInit(0);
     struct samAlignment *sam, *samList = bamFetchSamAlignment(fileName, region->chrom,
     	region->start, region->end, lm);
     char *row[SAMALIGNMENT_NUM_COLS];
     char numBuf[BAM_NUM_BUF_SIZE];
-    for (sam = samList; sam != NULL; sam = sam->next)
+    for (sam = samList; sam != NULL && (maxOut > 0); sam = sam->next)
         {
 	samAlignmentToRow(sam, numBuf, row);
 	if (asFilterOnRow(filter, row))
 	    {
+	    /* if we're looking for identifiers, check if this matches */
+	    if ((idHash != NULL)&&(hashLookup(idHash, row[idFieldNum]) == NULL))
+		continue;
+
 	    int i;
 	    fprintf(f, "%s", row[columnArray[0]]);
 	    for (i=1; i<fieldCount; ++i)
 		fprintf(f, "\t%s", row[columnArray[i]]);
 	    fprintf(f, "\n");
+	    maxOut --;
 	    }
 	}
     lmCleanup(&lm);
     }
 
 /* Clean up and exit. */
 hashFree(&fieldHash);
 freeMem(fieldArray);
 freeMem(columnArray);
 }
 
 int cigarWidth(char *cigar, int cigarSize)
 /* Return width of alignment as encoded in cigar format string. */
 {
 int tLength=0;