src/hg/protein/pfamXref/pfamXref.c 1.10

1.10 2009/09/25 08:52:45 kent
Sped up 10 or 20x by using a hash table instead of repeated sql queries.
Index: src/hg/protein/pfamXref/pfamXref.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/protein/pfamXref/pfamXref.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -b -B -U 4 -r1.9 -r1.10
--- src/hg/protein/pfamXref/pfamXref.c	23 Sep 2009 18:42:25 -0000	1.9
+++ src/hg/protein/pfamXref/pfamXref.c	25 Sep 2009 08:52:45 -0000	1.10
@@ -47,8 +47,18 @@
 
 o1 = mustOpen(outputFileName, "w");
 o2 = mustOpen("jj.dat", "w");
     
+/* Build up hash for quick access to displayIds. */
+struct hash *displayIdHash = hashNew(20);
+struct sqlConnection *conn = sqlConnect(proteinDB);
+struct sqlResult *sr = sqlGetResult(conn, "select accession,displayID from spXref3");
+char **row;
+while ((row = sqlNextRow(sr)) != NULL)
+    hashAdd(displayIdHash, row[0], cloneString(row[1]));
+sqlFreeResult(&sr);
+sqlDisconnect(&conn);
+    
 if ((inf = mustOpen(proteinFileName, "r")) == NULL)
     {		
     fprintf(stderr, "Can't open file %s.\n", proteinFileName);
     exit(8);
@@ -113,10 +123,9 @@
 		if (chp2 != NULL) *chp2='\0';
     		swissAC = chp;
 
 		// get display ID from AC		
-		sprintf(cond_str, "accession = '%s'", swissAC);
-    		answer = sqlGetField(proteinDB, "spXref3", "displayID", cond_str);
+		answer = hashFindVal(displayIdHash, swissAC);
 		if (answer != NULL)
 		    {
 		    swissDisplayID = answer;
 		    }