d024feda213cc0afab651717b6e5539da91e69d0 kent Tue Apr 5 15:28:21 2011 -0700 Adding exons as a type of bedGenePart output. diff --git src/utils/bedGeneParts/bedGeneParts.c src/utils/bedGeneParts/bedGeneParts.c index 7c89d40..abd38e6 100644 --- src/utils/bedGeneParts/bedGeneParts.c +++ src/utils/bedGeneParts/bedGeneParts.c @@ -5,84 +5,96 @@ #include "options.h" #include "basicBed.h" static char const rcsid[] = "$Id: newProg.c,v 1.30 2010/03/24 21:18:33 hiram Exp $"; int proStart = -100; int proEnd = 50; void usage() /* Explain usage and exit. */ { errAbort( "bedGeneParts - Given a bed, spit out promoter, first exon, or all introns.\n" "usage:\n" " bedGeneParts part in.bed out.bed\n" - "Where part is either 'firstExon' or 'introns' or 'promoter' or 'firstCodingSplice'\n" + "Where part is either 'exons' or 'firstExon' or 'introns' or 'promoter' or 'firstCodingSplice'\n" + "or secondCodingSplice\n" "options:\n" " -proStart=NN - start of promoter relative to txStart, default %d\n" " -proEnd=NN - end of promoter relative to txStart, default %d\n" , proStart, proEnd ); } static struct optionSpec options[] = { {"proStart", OPTION_INT}, {"proEnd", OPTION_INT}, {NULL, 0}, }; -enum partChoice {pcFirstExon, pcIntrons, pcPromoter, pcFirstCodingSplice}; +enum partChoice {pcFirstExon, pcExons, pcIntrons, pcPromoter, pcFirstCodingSplice, pcSecondCodingSplice}; void bedGeneParts(char *part, char *input, char *output) /* bedGeneParts - Given a bed, spit out promoter, first exon, or all introns.. */ { /* Convert part string to an enum and make sure it's one we recognize. */ enum partChoice choice = pcFirstExon; int minWords = 0; if (sameString(part, "firstExon")) { choice = pcFirstExon; minWords = 12; } +else if (sameString(part, "exons")) + { + choice = pcExons; + minWords = 12; + } else if (sameString(part, "introns")) { choice = pcIntrons; minWords = 12; } else if (sameString(part, "promoter")) { choice = pcPromoter; minWords = 6; } else if (sameString(part, "firstCodingSplice")) { choice = pcFirstCodingSplice; minWords = 12; } +else if (sameString(part, "secondCodingSplice")) + { + choice = pcSecondCodingSplice; + minWords = 12; + } else errAbort("Unrecognized part '%s'", part); struct lineFile *lf = lineFileOpen(input, TRUE); FILE *f = mustOpen(output, "w"); char *words[256]; int wordCount; while ((wordCount = lineFileChop(lf, words)) != 0) { lineFileExpectAtLeast(lf, minWords, wordCount); struct bed *bed = bedLoadN(words, wordCount); char strand = bed->strand[0]; + if (choice != pcExons && choice != pcIntrons) if (strand != '+' && strand != '-') errAbort("Unrecognized strand %c line %d of %s\n", strand, lf->lineIx, lf->fileName); int start,end; switch (choice) { case pcFirstExon: { if (strand == '+') { start = bed->chromStart; end = start + bed->blockSizes[0]; } else { end = bed->chromEnd; @@ -120,30 +132,82 @@ { firstCodingSplicePos = exonStart; break; } } } if (firstCodingSplicePos > 0) { fprintf(f, "%s\t%d\t%d\t%s\t%d\t%c\n", bed->chrom, firstCodingSplicePos-1, firstCodingSplicePos+1, bed->name, bed->score, strand); } } break; } + case pcSecondCodingSplice: + { + int blockCount = bed->blockCount; + if (blockCount > 2) + { + int i; + int codingSplicePos = 0; + if (strand == '+') + { + for (i=2; ichromStart + bed->chromStarts[i]; + if (exonStart >= bed->thickStart && exonStart < bed->thickEnd) + { + codingSplicePos = exonStart; + break; + } + } + } + else + { + for (i=blockCount-2; i>=0; --i) + { + int exonStart = bed->chromStart + bed->chromStarts[i] + bed->blockSizes[i]; + if (exonStart >= bed->thickStart && exonStart < bed->thickEnd) + { + codingSplicePos = exonStart; + break; + } + } + } + if (codingSplicePos > 0) + { + fprintf(f, "%s\t%d\t%d\t%s\t%d\t%c\n", + bed->chrom, codingSplicePos-1, codingSplicePos+1, bed->name, + bed->score, strand); + } + } + break; + } + case pcExons: + { + int i; + for (i=0; iblockCount; ++i) + { + int start = bed->chromStart + bed->chromStarts[i]; + int end = start + bed->blockSizes[i]; + fprintf(f, "%s\t%d\t%d\t%s\t%d\t%s\n", + bed->chrom, start, end, bed->name, bed->score, bed->strand); + } + break; + } case pcIntrons: { if (bed->blockCount > 1) { /* Figure out last block index and start/end of introns overall */ int lastBlock = bed->blockCount-1; int start = bed->chromStart + bed->blockSizes[0]; int end = bed->chromEnd - bed->blockSizes[lastBlock]; /* Print out constant fields. */ fprintf(f, "%s\t%d\t%d\t%s\t%d\t%c\t", bed->chrom, start, end, bed->name, bed->score, strand); fprintf(f, "%d\t%d\t%d\t%d\t", start, end, bed->itemRgb, bed->blockCount-1);