a44421a79fb36cc2036fe116b97ea3bc9590cd0c
braney
  Fri Dec 2 09:34:39 2011 -0800
removed rcsid (#295)
diff --git src/utils/faCount/faCount.c src/utils/faCount/faCount.c
index a79feaa..49c8127 100644
--- src/utils/faCount/faCount.c
+++ src/utils/faCount/faCount.c
@@ -1,185 +1,184 @@
 /* faCount - count base statistics and CpGs in FA files */
 #include "common.h"
 #include "fa.h"
 #include "dnautil.h"
 #include "options.h"
 
-static char const rcsid[] = "$Id: faCount.c,v 1.8 2009/08/02 19:51:20 markd Exp $";
 
 static struct optionSpec optionSpecs[] =
 {
     {"summary", OPTION_BOOLEAN},
     {"dinuc", OPTION_BOOLEAN},
     {"strands", OPTION_BOOLEAN},
     {NULL, 0},
 };
 
 bool summary = FALSE;
 bool dinuc = FALSE;
 bool strands = FALSE;
 void usage()
 /* Print usage info and exit. */
 {
 errAbort("faCount - count base statistics and CpGs in FA files.\n"
 	 "usage:\n"
 	 "   faCount file(s).fa\n"
          "     -summary  show only summary statistics\n"
          "     -dinuc    include statistics on dinucletoide frequencies\n"
          "     -strands  count bases on both strands\n"
              );
 }
 
 void faCount(char *faFiles[], int faCount)
 /* faCount - count bases. */
 {
 int f, i, j, k;
 struct dnaSeq seq;
 unsigned long long totalLength = 0;
 unsigned long long totalBaseCount[5];
 unsigned long long totalDinucleotideCount[5][5];
 unsigned long long totalCpgCount = 0;
 struct lineFile *lf;
 ZeroVar(&seq);
 
 for (i = 0; i < ArraySize(totalBaseCount); i++)
     totalBaseCount[i] = 0;
 
 for (i = 0; i < ArraySize(totalDinucleotideCount); i++)
     for (j = 0; j < ArraySize(totalDinucleotideCount[i]); j++)
         totalDinucleotideCount[i][j] = 0;
 
 printf("#seq\tlen\tA\tC\tG\tT\tN\tcpg");
 if (dinuc)
     printf("\tAA\tAC\tAG\tAT\tCA\tCC\tCG\tCT\tGA\tGC\tGG\tGT\tTA\tTC\tTG\tTT");
 printf("\n");
 
 dnaUtilOpen();
 for (f = 0; f<faCount; ++f)
     {
     lf = lineFileOpen(faFiles[f], FALSE);
     while (faSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
         {
         int prevBase = -1;
         int prevRcBase = -1;
         unsigned long long length = 0;
         unsigned long long baseCount[5];
         unsigned long long dinucleotideCount[5][5];
         unsigned long long cpgCount = 0;
         for (i = 0; i < ArraySize(baseCount); i++)
             baseCount[i] = 0;
         for (i = 0; i < ArraySize(dinucleotideCount); i++)
             for (j = 0; j < ArraySize(dinucleotideCount[i]); j++)
                 dinucleotideCount[i][j] = 0;
     	for (j=0; j<seq.size; ++j)
 	        {
             int baseVal = ntVal5[(int)(seq.dna[j])];
             int rcBaseVal;
             assert(baseVal != -1);
             assert(baseVal <= 4);
             length++;
             switch(baseVal)
                 {
                 case A_BASE_VAL: rcBaseVal = T_BASE_VAL; break;
                 case C_BASE_VAL: rcBaseVal = G_BASE_VAL; break;
                 case G_BASE_VAL: rcBaseVal = C_BASE_VAL; break;
                 case T_BASE_VAL: rcBaseVal = A_BASE_VAL; break;
                 default: rcBaseVal = N_BASE_VAL; break;
                 }
             baseCount[baseVal]++;
             if ((prevBase == C_BASE_VAL) && (baseVal == G_BASE_VAL))
                 cpgCount++;
             if (prevBase != -1)
                 dinucleotideCount[prevBase][baseVal]++;
             if (strands)
                 {
                 length++;
                 baseCount[rcBaseVal]++;
                 if ((prevRcBase == G_BASE_VAL) && (rcBaseVal == C_BASE_VAL))
                     cpgCount++;
                 if (prevRcBase != -1)
                     dinucleotideCount[rcBaseVal][prevRcBase]++;
                 }
             prevBase = baseVal;
             prevRcBase = rcBaseVal;
             }
         if (!summary)
             {
             printf("%s\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu",
             seq.name, length,
             baseCount[A_BASE_VAL], baseCount[C_BASE_VAL],
             baseCount[G_BASE_VAL], baseCount[T_BASE_VAL],
             baseCount[N_BASE_VAL], cpgCount);
             if (dinuc)
                 printf("\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu",
                     dinucleotideCount[A_BASE_VAL][A_BASE_VAL], dinucleotideCount[A_BASE_VAL][C_BASE_VAL],
                     dinucleotideCount[A_BASE_VAL][G_BASE_VAL], dinucleotideCount[A_BASE_VAL][T_BASE_VAL],
                     dinucleotideCount[C_BASE_VAL][A_BASE_VAL], dinucleotideCount[C_BASE_VAL][C_BASE_VAL],
                     dinucleotideCount[C_BASE_VAL][G_BASE_VAL], dinucleotideCount[C_BASE_VAL][T_BASE_VAL],
                     dinucleotideCount[G_BASE_VAL][A_BASE_VAL], dinucleotideCount[G_BASE_VAL][C_BASE_VAL],
                     dinucleotideCount[G_BASE_VAL][G_BASE_VAL], dinucleotideCount[G_BASE_VAL][T_BASE_VAL],
                     dinucleotideCount[T_BASE_VAL][A_BASE_VAL], dinucleotideCount[T_BASE_VAL][C_BASE_VAL],
                     dinucleotideCount[T_BASE_VAL][G_BASE_VAL], dinucleotideCount[T_BASE_VAL][T_BASE_VAL]);
             printf("\n");
             }
         totalLength += length;
         totalCpgCount += cpgCount;
         for (i = 0; i < ArraySize(baseCount); i++)
             totalBaseCount[i] += baseCount[i];
         for (i = 0; i < ArraySize(dinucleotideCount); i++)
             for (k = 0; k < ArraySize(dinucleotideCount[i]); k++)
                 totalDinucleotideCount[i][k] += dinucleotideCount[i][k];
         }
     lineFileClose(&lf);
 	}
 
 
 printf("total\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu",
        totalLength,
        totalBaseCount[A_BASE_VAL], totalBaseCount[C_BASE_VAL],
        totalBaseCount[G_BASE_VAL], totalBaseCount[T_BASE_VAL],
        totalBaseCount[N_BASE_VAL], totalCpgCount);
 if (dinuc)
     printf("\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu",
         totalDinucleotideCount[A_BASE_VAL][A_BASE_VAL], totalDinucleotideCount[A_BASE_VAL][C_BASE_VAL],
         totalDinucleotideCount[A_BASE_VAL][G_BASE_VAL], totalDinucleotideCount[A_BASE_VAL][T_BASE_VAL],
         totalDinucleotideCount[C_BASE_VAL][A_BASE_VAL], totalDinucleotideCount[C_BASE_VAL][C_BASE_VAL],
         totalDinucleotideCount[C_BASE_VAL][G_BASE_VAL], totalDinucleotideCount[C_BASE_VAL][T_BASE_VAL],
         totalDinucleotideCount[G_BASE_VAL][A_BASE_VAL], totalDinucleotideCount[G_BASE_VAL][C_BASE_VAL],
         totalDinucleotideCount[G_BASE_VAL][G_BASE_VAL], totalDinucleotideCount[G_BASE_VAL][T_BASE_VAL],
         totalDinucleotideCount[T_BASE_VAL][A_BASE_VAL], totalDinucleotideCount[T_BASE_VAL][C_BASE_VAL],
         totalDinucleotideCount[T_BASE_VAL][G_BASE_VAL], totalDinucleotideCount[T_BASE_VAL][T_BASE_VAL]);
 printf("\n");
 
 if (summary)
     {
     printf("prcnt\t%-5.1f\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f",
        (float)totalLength/totalLength,
        ((float)totalBaseCount[A_BASE_VAL])/(float)totalLength, ((float)totalBaseCount[C_BASE_VAL])/(float)totalLength,
        ((float)totalBaseCount[G_BASE_VAL])/(float)totalLength, ((float)totalBaseCount[T_BASE_VAL])/(float)totalLength,
        ((float)totalBaseCount[N_BASE_VAL])/(float)totalLength, (float)totalCpgCount/(float)totalLength);
     if (dinuc)
         printf("\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f\t%-5.4f",
             (float)totalDinucleotideCount[A_BASE_VAL][A_BASE_VAL]/(float)totalLength, (float)totalDinucleotideCount[A_BASE_VAL][C_BASE_VAL]/(float)totalLength,
             (float)totalDinucleotideCount[A_BASE_VAL][G_BASE_VAL]/(float)totalLength, (float)totalDinucleotideCount[A_BASE_VAL][T_BASE_VAL]/(float)totalLength,
             (float)totalDinucleotideCount[C_BASE_VAL][A_BASE_VAL]/(float)totalLength, (float)totalDinucleotideCount[C_BASE_VAL][C_BASE_VAL]/(float)totalLength,
             (float)totalDinucleotideCount[C_BASE_VAL][G_BASE_VAL]/(float)totalLength, (float)totalDinucleotideCount[C_BASE_VAL][T_BASE_VAL]/(float)totalLength,
             (float)totalDinucleotideCount[G_BASE_VAL][A_BASE_VAL]/(float)totalLength, (float)totalDinucleotideCount[G_BASE_VAL][C_BASE_VAL]/(float)totalLength,
             (float)totalDinucleotideCount[G_BASE_VAL][G_BASE_VAL]/(float)totalLength, (float)totalDinucleotideCount[G_BASE_VAL][T_BASE_VAL]/(float)totalLength,
             (float)totalDinucleotideCount[T_BASE_VAL][A_BASE_VAL]/(float)totalLength, (float)totalDinucleotideCount[T_BASE_VAL][C_BASE_VAL]/(float)totalLength,
             (float)totalDinucleotideCount[T_BASE_VAL][G_BASE_VAL]/(float)totalLength, (float)totalDinucleotideCount[T_BASE_VAL][T_BASE_VAL]/(float)totalLength);
     printf("\n");
     }
 }
 
 int main(int argc, char *argv[])
 /* Process command line . */
 {
 optionInit(&argc, argv, optionSpecs);
 if (argc < 2)
     usage();
 summary = optionExists("summary");
 dinuc = optionExists("dinuc");
 strands = optionExists("strands");
 faCount(argv+1, argc-1);
 return 0;
 }