src/getgene/getgene.c 6419fc05af66878081e2b7d4d944900ec42d7cb1

6419fc05af66878081e2b7d4d944900ec42d7cb1
tdreszer
  Fri Jun 22 15:29:37 2012 -0700
First of many checkins as dictated by Jim's OCD.  Formatting space after if and limiting lines to 100 chars.  Changes limited to lines last touched by tdreszer (git blame) so as not to ruin history.  None of these changes should affect executables in any way.  Only affect is to my sanity and Jim's.
diff --git src/getgene/getgene.c src/getgene/getgene.c
index 1b97a0b..85e129c 100644
--- src/getgene/getgene.c
+++ src/getgene/getgene.c
@@ -1,372 +1,372 @@
 #include "common.h"
 #include "htmshell.h"
 #include "cheapcgi.h"
 #include "dnautil.h"
 #include "dnaseq.h"
 #include "wormdna.h"
 
 struct dfm
 /* Output formatter. */
     {
     int wordLen, lineLen;
     int inWord, inLine;
     boolean lineNumbers;
     boolean hiliteRange;
     long startRange;
     long endRange;
     long charCount;
     FILE *out;
     };
 
 void initDfm(struct dfm *dfm, int wordLen, int lineLen,
 	boolean lineNumbers,
 	boolean hiliteRange, long startRange, long endRange,
 	FILE *out)
 /* Set up formatting. */
 {
 dfm->inWord = dfm->inLine = dfm->charCount = 0;
 dfm->wordLen = wordLen;
 dfm->lineLen = lineLen;
 dfm->lineNumbers = lineNumbers;
 dfm->hiliteRange = hiliteRange;
 dfm->startRange = startRange;
 dfm->endRange = endRange;
 dfm->out = out;
 }
 
 void dfmOut(struct dfm *dfm, char c)
 /* Write out a byte, and depending on formatting extras
  */
 {
 if (dfm->hiliteRange && dfm->charCount == dfm->startRange)
     {
     fprintf(dfm->out, "<A NAME=\"CLICKED\"></A><span style='color:#0033FF;'>");
     }
 ++dfm->charCount;
 fputc(c, dfm->out);
 if (dfm->hiliteRange && dfm->charCount == dfm->endRange)
     {
     fprintf(dfm->out, "</span>");
     }
 if (dfm->wordLen)
     {
     if (++dfm->inWord >= dfm->wordLen)
 	{
 	fputc(' ', dfm->out);
 	dfm->inWord = 0;
 	}
     }
 if (dfm->lineLen)
     {
     if (++dfm->inLine >= dfm->lineLen)
 	{
 	if (dfm->lineNumbers)
 	    fprintf(dfm->out, " %ld", dfm->charCount);
 	fprintf(dfm->out, "<BR>\n");
 	dfm->inLine = 0;
 	}
     }
 }
 
 boolean findLineInFile(char *fileName, char *start,
     char *lineBuf, int lineBufSize)
 /* Loop through each line in named file until come to one whose
  * first word (deliminated by a space) is start.  Put the resulting
  * line in lineBuf. */
 {
 FILE *f;
 int startLen = strlen(start);
 boolean foundIt = FALSE;
 f = mustOpen(fileName, "r");
 for (;;)
     {
     if ((fgets(lineBuf, lineBufSize, f)) == NULL)
 	break;
     if (strncmp(start, lineBuf, startLen) == 0 && lineBuf[startLen] == ' ')
 	{
 	foundIt = TRUE;
 	break;
 	}
     }
 fclose(f);
 return foundIt;
 }
 
 char *chopOutSecondWord(char *lineBuf)
 /* Return pointer to second word in line. Zero terminate this word. */
 {
 char *s;
 int wordLen;
 
 s = skipLeadingSpaces(lineBuf);
 wordLen = strcspn(s,whiteSpaceChopper);
 s += wordLen;
 s = skipLeadingSpaces(s);
 wordLen = strcspn(s,whiteSpaceChopper);
 if (wordLen <= 0)
     return NULL;
 s[wordLen] = 0;
 return s;
 }
 
 void outputSeq(DNA *dna, int dnaSize,
 	boolean hiliteRange, long startRange, long endRange,
 	FILE *out)
 /* Write out sequence. */
 {
 struct dfm dfm;
 int i;
 char *seq = dna;
 int size = dnaSize;
 
 if (cgiBoolean("translate"))
     {
     int utr5 = 0;
     int maxProtSize = (dnaSize+2)/3;
     char *prot = needMem(maxProtSize + 1);
     if (cgiVarExists("utr5"))
         utr5 = cgiInt("utr5")-1;
     startRange -= utr5;
     endRange -= utr5;
     startRange /= 3;
     endRange /= 3;
     dna += utr5;
     seq = prot;
     for (size = 0; size < maxProtSize; ++size)
         {
         if ((*prot++ = lookupCodon(dna)) == 0)
             break;
         dna += 3;
         }
     *prot = 0;
     }
 initDfm(&dfm, 10, 50, TRUE, hiliteRange, startRange, endRange, out);
 for (i=0; i<size; ++i)
     dfmOut(&dfm, seq[i]);
 }
 
 int countUpper(char *s)
 /* Count upper case chars */
 {
 char c;
 int count = 0;
 while ((c = *s++) != '\0')
     if (isupper(c)) ++count;
 return count;
 }
 
 char *cloneUpperOnly(char *s)
 /* Return string that is only upper case bits of s. */
 {
 char c;
 char *d;
 char *upper;
 int upSize = countUpper(s);
 upper = d = needMem(upSize+1);
 while ((c = *s++) != '\0')
     {
     if (isupper(c))
         *d++ = c;
     }
 *d = 0;
 return upper;
 }
 
 void doMiddle()
 {
 char *seqName;
 boolean intronsLowerCase = TRUE;
 boolean intronsParenthesized = FALSE;
 boolean hiliteNear = FALSE;
 int startRange = 0;
 int endRange = 0;
 boolean gotRange = FALSE;
 struct dnaSeq *cdnaSeq;
 boolean isChromRange = FALSE;
 DNA *dna;
 char *translation = NULL;
 
 seqName = cgiString("geneName");
 seqName = trimSpaces(seqName);
 if (cgiVarExists("intronsLowerCase"))
     intronsLowerCase = cgiBoolean("intronsLowerCase");
 if (cgiVarExists("intronsParenthesized"))
     intronsParenthesized = cgiBoolean("intronsParenthesized");
 if (cgiVarExists("startRange") && cgiVarExists("endRange" ))
     {
     startRange = cgiInt("startRange");
     endRange = cgiInt("endRange");
     gotRange = TRUE;
     }
 if (cgiVarExists("hiliteNear"))
     {
     hiliteNear = TRUE;
     }
 fprintf(stdout, "<P><TT>\n");
 
 /* The logic here is a little complex to optimize speed.
  * If we can decide what type of thing the name refers to by
  * simply looking at the name we do.  Otherwise we have to
  * search the database in various ways until we get a hit. */
 if (wormIsNamelessCluster(seqName))
     {
     isChromRange = TRUE;
     }
 else if (wormIsChromRange(seqName))
     {
     isChromRange = TRUE;
     }
 else if (getWormGeneDna(seqName, &dna, TRUE))
     {
     if (cgiBoolean("litLink"))
         {
         char nameBuf[64];
         char *geneName = NULL;
         char *productName = NULL;
         char *coding;
         int transSize;
         struct wormCdnaInfo info;
 
         printf("<H3>Information and Links for %s</H3>\n", seqName);
         if (wormInfoForGene(seqName, &info))
             {
             if (info.description)
                 printf("<P>%s</P>\n", info.description);
             geneName = info.gene;
             productName = info.product;
             }
         else
             {
             if (wormIsGeneName(seqName))
                 geneName = seqName;
             else if (wormGeneForOrf(seqName, nameBuf, sizeof(nameBuf)))
                 geneName = nameBuf;
             }
         coding = cloneUpperOnly(dna);
         transSize = 1 + (strlen(coding)+2)/3;
         translation = needMem(1+strlen(coding)/3);
         dnaTranslateSome(coding, translation, transSize);
         freez(&coding);
 
         if (geneName)
             {
-            printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?form=4&db=m&term=C+elegans+%s&dispmax=50&relentrezdate=No+Limit\">",
-                geneName);
+            printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?form=4&db=m"
+                    "&term=C+elegans+%s&dispmax=50&relentrezdate=No+Limit\">", geneName);
             printf("PubMed search on gene: </A>%s<BR>\n", geneName);
             }
         if (productName)
             {
             char *encoded = cgiEncode(productName);
-            printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?form=4&db=m&term=%s&dispmax=50&relentrezdate=No+Limit\">",
-                encoded);
+            printf("<A HREF=\"http://www.ncbi.nlm.nih.gov/htbin-post/Entrez/query?form=4&db=m"
+                    "&term=%s&dispmax=50&relentrezdate=No+Limit\">", encoded);
             printf("PubMed search on product:</A> %s<BR>\n", productName);
             freeMem(encoded);
             }
         /* Process name to get rid of isoform letter for Proteome. */
         if (geneName)
             strcpy(nameBuf, geneName);
         else
             {
             strcpy(nameBuf, seqName);
 #ifdef NEVER
             /* Sometimes Proteome requires the letter after the orf name
              * in alt-spliced cases, sometimes it can't handle it.... */
             nameLen = strlen(nameBuf);
             if (wormIsOrfName(nameBuf) && isalpha(nameBuf[nameLen-1]))
                 {
                 char *dotPos = strrchr(nameBuf, '.');
                 if (dotPos != NULL && isdigit(dotPos[1]))
                     nameBuf[nameLen-1] = 0;
                 }
 #endif /* NEVER */
             }
 	printf("<A HREF=\"http://www.wormbase.org/db/seq/sequence?name=%s;class=Sequence\">", seqName);
 	printf("WormBase link on:</A> %s<BR>\n", seqName);
         printf("<A HREF=\"http://www.proteome.com/databases/WormPD/reports/%s.html\">", nameBuf);
         printf("Proteome link on:</A> %s<BR>\n<BR>\n", nameBuf);
 
 
         printf("<A HREF=#DNA>Genomic DNA Sequence</A><BR>\n");
         if (hiliteNear)
             printf("<A HREF=\"#CLICKED\">Shortcut to where you clicked in gene</A><BR>");
         printf("<A HREF=#protein>Translated Protein Sequence</A><BR>\n");
         htmlHorizontalLine();
 	printf("<A NAME=DNA></A>");
         printf("<H3>%s Genomic DNA sequence</H3>", seqName);
         }
     if (!intronsLowerCase)
         tolowers(dna);
     if (hiliteNear)
 	{
 	if (!gotRange)
 	    {
 	    double nearPos = cgiDouble("hiliteNear");
 	    int rad = 5;
 	    int dnaSize = strlen(dna);
 	    long mid = (int)(dnaSize * nearPos);
 	    startRange = mid - rad;
 	    if (startRange < 0) startRange = 0;
 	    endRange = mid + rad;
 	    if (endRange >= dnaSize) endRange = dnaSize - 1;
 	    }
 	}
     outputSeq(dna, strlen(dna), hiliteNear, startRange, endRange, stdout);
     freez(&dna);
     }
 else if (wormCdnaSeq(seqName, &cdnaSeq, NULL))
     {
     outputSeq(cdnaSeq->dna, cdnaSeq->size, FALSE, 0, 0, stdout);
     }
 else
     {
     isChromRange = TRUE;
     }
 if (isChromRange)
     {
     char *chromId;
     int start, end;
     char strand = '+';
     int size;
 
     if (!wormGeneRange(seqName, &chromId, &strand, &start, &end))
         errAbort("Can't find %s",seqName);
     size = end - start;
     if (intronsLowerCase)
         dna = wormChromPartExonsUpper(chromId, start, size);
     else
         {
         dna = wormChromPart(chromId, start, size);
         touppers(dna);
         }
     if (cgiVarExists("strand"))
         strand = cgiString("strand")[0];
     if (strand == '-')
         reverseComplement(dna, size);
     outputSeq(dna, size, FALSE, 0, 0, stdout);
     }
 if (translation != NULL)
     {
     htmlHorizontalLine();
     printf("<A NAME=protein></A>");
     printf("<H3>Translated Protein of %s</H3>\n", seqName);
     outputSeq(translation, strlen(translation), FALSE, 0, 0, stdout);
     freez(&translation);
     }
 fprintf(stdout, "</TT></P>\n");
 
 }
 
 
 int main(int argc, char *argv[])
 {
 char *geneName;
 char title[256];
 
 if (argc == 2 && sameWord(argv[1], "test"))
     putenv("QUERY_STRING=geneName=I:4000-5500&hiliteNear=0.917112&intronsLowerCase=On");
 geneName = cgiString("geneName");
 sprintf(title, "%s DNA Sequence", geneName);
 dnaUtilOpen();
 htmShell(title, doMiddle, "QUERY");
 return 0;
 }