src/hg/protein/pbCalPi/pbCalPi.c 1.8

1.8 2009/09/25 18:37:59 kent
Simplified code. Reduced run time from 5 hours to 20 minutes.
Index: src/hg/protein/pbCalPi/pbCalPi.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/protein/pbCalPi/pbCalPi.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -b -B -U 4 -r1.7 -r1.8
--- src/hg/protein/pbCalPi/pbCalPi.c	3 Sep 2008 19:20:59 -0000	1.7
+++ src/hg/protein/pbCalPi/pbCalPi.c	25 Sep 2009 18:37:59 -0000	1.8
@@ -7,10 +7,9 @@
 {
 errAbort(
   "pbCalPi - Calculate pI values from a list of protein IDs \n"
   "usage:\n"
-  "   pbCalPi listFile spDb outFile\n"
-  "      listFile is the input  file name of list of protein accession numbers\n"
+  "   pbCalPi spDb outFile\n"
   "      spDb     is the swissprot database name\n"
   "      outFile  is the output file name of tab delineated file of protein accession number and pI value\n"
   "example: pbCalPi prot.lis sp040115 pepPi.tab\n");
 }
@@ -19,18 +18,18 @@
 {
 return(powf(10.0, x));
 }
 
-float calPi(char *sequence)
+double calPi(char *sequence)
 /* the calPi() is based on the parameters and algorithm obtained */
 /* from SWISS-PROT.  The calculation results should be identical to that */
 /* from the Web page: http://us.expasy.org/tools/pi_tool.html */
 {
 int   i, sequenceLength;
 int   comp[ALPHABET_LEN];
 int   nTermResidue, cTermResidue;
-float charge, phMin, phMid = 0.0, phMax;
-float carg, cter, nter, ctyr, chis, clys, casp, cglu, ccys;
+double charge, phMin, phMid = 0.0, phMax;
+double carg, cter, nter, ctyr, chis, clys, casp, cglu, ccys;
 int    R, H, K, D, E, C, Y;
 
 R = (int)('R' - 'A');
 H = (int)('H' - 'A');
@@ -90,41 +89,21 @@
 }
 
 int main(int argc, char *argv[])
 {
-char *infName, *spDb, *outfName;
-float pI;
-char *acc;
-char *seq;
-char cond_str[255];
-struct lineFile *inf;
-FILE *outf1;
-char *row[1];
-
-if (argc != 4)
-   {
+if (argc != 3)
    usage();
-   }
-
-infName   = argv[1];
-spDb      = argv[2];
-outfName  = argv[3];
-
-inf = lineFileOpen(infName, TRUE); /* input file */
-outf1 = mustOpen(outfName, "w");
 
-while (lineFileRow(inf, row))
+char *spDb      = argv[1];
+char *outfName  = argv[2];
+FILE *f = mustOpen(outfName, "w");
+
+struct sqlConnection *conn = sqlConnect(spDb);
+struct sqlResult *sr = sqlGetResult(conn, "select acc,val from protein");
+char **row;
+while ((row = sqlNextRow(sr)) != NULL)
     {
-    acc = row[0];
-
-    safef(cond_str, sizeof(cond_str), "acc='%s'", acc);
-    seq  = sqlGetField(spDb, "protein", "val", cond_str);
-
-    if (seq != NULL)
-	{
-    	pI = calPi(seq);
-    	fprintf(outf1, "%s\t%.2f\n", acc, pI);
-    	}
+    fprintf(f, "%s\t%.2f\n", row[0], calPi(row[1]));
     }
-fclose(outf1);
+carefulClose(&f);
 return(0);
 }