000286958e0ab18c45c7b4f9e8d0d52e15f35d55 kent Fri Apr 1 21:42:54 2011 -0700 Adding firstCodingSplice as a region you can get with bedGeneParts. diff --git src/utils/bedGeneParts/bedGeneParts.c src/utils/bedGeneParts/bedGeneParts.c index 2d016c2..7c89d40 100644 --- src/utils/bedGeneParts/bedGeneParts.c +++ src/utils/bedGeneParts/bedGeneParts.c @@ -5,67 +5,72 @@ #include "options.h" #include "basicBed.h" static char const rcsid[] = "$Id: newProg.c,v 1.30 2010/03/24 21:18:33 hiram Exp $"; int proStart = -100; int proEnd = 50; void usage() /* Explain usage and exit. */ { errAbort( "bedGeneParts - Given a bed, spit out promoter, first exon, or all introns.\n" "usage:\n" " bedGeneParts part in.bed out.bed\n" - "Where part is either 'firstExon' or 'introns' or 'promoter'\n" + "Where part is either 'firstExon' or 'introns' or 'promoter' or 'firstCodingSplice'\n" "options:\n" " -proStart=NN - start of promoter relative to txStart, default %d\n" " -proEnd=NN - end of promoter relative to txStart, default %d\n" , proStart, proEnd ); } static struct optionSpec options[] = { {"proStart", OPTION_INT}, {"proEnd", OPTION_INT}, {NULL, 0}, }; -enum partChoice {pcFirstExon, pcIntrons, pcPromoter}; +enum partChoice {pcFirstExon, pcIntrons, pcPromoter, pcFirstCodingSplice}; void bedGeneParts(char *part, char *input, char *output) /* bedGeneParts - Given a bed, spit out promoter, first exon, or all introns.. */ { /* Convert part string to an enum and make sure it's one we recognize. */ enum partChoice choice = pcFirstExon; int minWords = 0; if (sameString(part, "firstExon")) { choice = pcFirstExon; minWords = 12; } else if (sameString(part, "introns")) { choice = pcIntrons; minWords = 12; } else if (sameString(part, "promoter")) { choice = pcPromoter; minWords = 6; } +else if (sameString(part, "firstCodingSplice")) + { + choice = pcFirstCodingSplice; + minWords = 12; + } else errAbort("Unrecognized part '%s'", part); struct lineFile *lf = lineFileOpen(input, TRUE); FILE *f = mustOpen(output, "w"); char *words[256]; int wordCount; while ((wordCount = lineFileChop(lf, words)) != 0) { lineFileExpectAtLeast(lf, minWords, wordCount); struct bed *bed = bedLoadN(words, wordCount); char strand = bed->strand[0]; if (strand != '+' && strand != '-') errAbort("Unrecognized strand %c line %d of %s\n", strand, lf->lineIx, lf->fileName); int start,end; @@ -75,30 +80,70 @@ { if (strand == '+') { start = bed->chromStart; end = start + bed->blockSizes[0]; } else { end = bed->chromEnd; start = end - bed->blockSizes[bed->blockCount-1]; } fprintf(f, "%s\t%d\t%d\t%s\t%d\t%c\n", bed->chrom, start, end, bed->name, bed->score, strand); break; } + case pcFirstCodingSplice: + { + int blockCount = bed->blockCount; + if (blockCount > 1) + { + int i; + int firstCodingSplicePos = 0; + if (strand == '+') + { + for (i=1; ichromStart + bed->chromStarts[i]; + if (exonStart >= bed->thickStart && exonStart < bed->thickEnd) + { + firstCodingSplicePos = exonStart; + break; + } + } + } + else + { + for (i=blockCount-1; i>=0; --i) + { + int exonStart = bed->chromStart + bed->chromStarts[i] + bed->blockSizes[i]; + if (exonStart >= bed->thickStart && exonStart < bed->thickEnd) + { + firstCodingSplicePos = exonStart; + break; + } + } + } + if (firstCodingSplicePos > 0) + { + fprintf(f, "%s\t%d\t%d\t%s\t%d\t%c\n", + bed->chrom, firstCodingSplicePos-1, firstCodingSplicePos+1, bed->name, + bed->score, strand); + } + } + break; + } case pcIntrons: { if (bed->blockCount > 1) { /* Figure out last block index and start/end of introns overall */ int lastBlock = bed->blockCount-1; int start = bed->chromStart + bed->blockSizes[0]; int end = bed->chromEnd - bed->blockSizes[lastBlock]; /* Print out constant fields. */ fprintf(f, "%s\t%d\t%d\t%s\t%d\t%c\t", bed->chrom, start, end, bed->name, bed->score, strand); fprintf(f, "%d\t%d\t%d\t%d\t", start, end, bed->itemRgb, bed->blockCount-1);