000286958e0ab18c45c7b4f9e8d0d52e15f35d55
kent
  Fri Apr 1 21:42:54 2011 -0700
Adding firstCodingSplice as a region you can get with bedGeneParts.
diff --git src/utils/bedGeneParts/bedGeneParts.c src/utils/bedGeneParts/bedGeneParts.c
index 2d016c2..7c89d40 100644
--- src/utils/bedGeneParts/bedGeneParts.c
+++ src/utils/bedGeneParts/bedGeneParts.c
@@ -5,67 +5,72 @@
 #include "options.h"
 #include "basicBed.h"
 
 static char const rcsid[] = "$Id: newProg.c,v 1.30 2010/03/24 21:18:33 hiram Exp $";
 
 int proStart = -100;
 int proEnd = 50;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "bedGeneParts - Given a bed, spit out promoter, first exon, or all introns.\n"
   "usage:\n"
   "   bedGeneParts part in.bed out.bed\n"
-  "Where part is either 'firstExon' or 'introns' or 'promoter'\n"
+  "Where part is either 'firstExon' or 'introns' or 'promoter' or 'firstCodingSplice'\n"
   "options:\n"
   "   -proStart=NN - start of promoter relative to txStart, default %d\n"
   "   -proEnd=NN - end of promoter relative to txStart, default %d\n"
   , proStart, proEnd
   );
 }
 
 static struct optionSpec options[] = {
    {"proStart", OPTION_INT},
    {"proEnd", OPTION_INT},
    {NULL, 0},
 };
 
-enum partChoice {pcFirstExon, pcIntrons, pcPromoter};
+enum partChoice {pcFirstExon, pcIntrons, pcPromoter, pcFirstCodingSplice};
 
 void bedGeneParts(char *part, char *input, char *output)
 /* bedGeneParts - Given a bed, spit out promoter, first exon, or all introns.. */
 {
 /* Convert part string to an enum and make sure it's one we recognize. */
 enum partChoice choice = pcFirstExon;
 int minWords = 0;
 if (sameString(part, "firstExon")) 
     {
     choice = pcFirstExon;
     minWords = 12;
     }
 else if (sameString(part, "introns"))
     {
     choice = pcIntrons;
     minWords = 12;
     }
 else if (sameString(part, "promoter"))
     {
     choice = pcPromoter;
     minWords = 6;
     }
+else if (sameString(part, "firstCodingSplice"))
+    {
+    choice = pcFirstCodingSplice;
+    minWords = 12;
+    }
 else
     errAbort("Unrecognized part '%s'", part);
 
 struct lineFile *lf = lineFileOpen(input, TRUE);
 FILE *f = mustOpen(output, "w");
 char *words[256];
 int wordCount;
 while ((wordCount = lineFileChop(lf, words)) != 0)
     {
     lineFileExpectAtLeast(lf, minWords, wordCount);
     struct bed *bed = bedLoadN(words, wordCount);
     char strand = bed->strand[0];
     if (strand != '+' && strand != '-')
         errAbort("Unrecognized strand %c line %d of %s\n", strand, lf->lineIx, lf->fileName);
     int start,end;
@@ -75,30 +80,70 @@
 	    {
 	    if (strand == '+')
 		{
 	        start = bed->chromStart;
 		end = start + bed->blockSizes[0];
 		}
 	    else
 	        {
 		end = bed->chromEnd;
 		start = end - bed->blockSizes[bed->blockCount-1];
 		}
 	    fprintf(f, "%s\t%d\t%d\t%s\t%d\t%c\n", 
 	    	bed->chrom, start, end, bed->name, bed->score, strand);
 	    break;
 	    }
+	case pcFirstCodingSplice:
+	    {
+	    int blockCount = bed->blockCount;
+	    if (blockCount > 1)
+	        {
+		int i;
+		int firstCodingSplicePos = 0;
+		if (strand == '+')
+		    {
+		    for (i=1; i<blockCount; ++i)
+		        {
+			int exonStart = bed->chromStart + bed->chromStarts[i];
+			if (exonStart >= bed->thickStart && exonStart < bed->thickEnd)
+			    {
+			    firstCodingSplicePos = exonStart;
+			    break;
+			    }
+			}
+		    }
+		else
+		    {
+		    for (i=blockCount-1; i>=0; --i)
+		        {
+			int exonStart = bed->chromStart + bed->chromStarts[i] + bed->blockSizes[i];
+			if (exonStart >= bed->thickStart && exonStart < bed->thickEnd)
+			    {
+			    firstCodingSplicePos = exonStart;
+			    break;
+			    }
+			}
+		    }
+		if (firstCodingSplicePos > 0)
+		    {
+		    fprintf(f, "%s\t%d\t%d\t%s\t%d\t%c\n", 
+			bed->chrom, firstCodingSplicePos-1, firstCodingSplicePos+1, bed->name, 
+				bed->score, strand);
+		    }
+		}
+	    break;
+	    }
 	case pcIntrons:
 	    {
 	    if (bed->blockCount > 1)
 	        {
 		/* Figure out last block index and start/end of introns overall */
 		int lastBlock = bed->blockCount-1;
 		int start = bed->chromStart + bed->blockSizes[0];
 		int end = bed->chromEnd - bed->blockSizes[lastBlock];
 		
 		/* Print out constant fields. */
 		fprintf(f, "%s\t%d\t%d\t%s\t%d\t%c\t", 
 		    bed->chrom, start, end, bed->name, bed->score, strand);
 		fprintf(f, "%d\t%d\t%d\t%d\t", 
 			start, end, bed->itemRgb, bed->blockCount-1);