src/hg/protein/pfamXref/pfamXref.c 1.10
1.10 2009/09/25 08:52:45 kent
Sped up 10 or 20x by using a hash table instead of repeated sql queries.
Index: src/hg/protein/pfamXref/pfamXref.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/protein/pfamXref/pfamXref.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -b -B -U 4 -r1.9 -r1.10
--- src/hg/protein/pfamXref/pfamXref.c 23 Sep 2009 18:42:25 -0000 1.9
+++ src/hg/protein/pfamXref/pfamXref.c 25 Sep 2009 08:52:45 -0000 1.10
@@ -47,8 +47,18 @@
o1 = mustOpen(outputFileName, "w");
o2 = mustOpen("jj.dat", "w");
+/* Build up hash for quick access to displayIds. */
+struct hash *displayIdHash = hashNew(20);
+struct sqlConnection *conn = sqlConnect(proteinDB);
+struct sqlResult *sr = sqlGetResult(conn, "select accession,displayID from spXref3");
+char **row;
+while ((row = sqlNextRow(sr)) != NULL)
+ hashAdd(displayIdHash, row[0], cloneString(row[1]));
+sqlFreeResult(&sr);
+sqlDisconnect(&conn);
+
if ((inf = mustOpen(proteinFileName, "r")) == NULL)
{
fprintf(stderr, "Can't open file %s.\n", proteinFileName);
exit(8);
@@ -113,10 +123,9 @@
if (chp2 != NULL) *chp2='\0';
swissAC = chp;
// get display ID from AC
- sprintf(cond_str, "accession = '%s'", swissAC);
- answer = sqlGetField(proteinDB, "spXref3", "displayID", cond_str);
+ answer = hashFindVal(displayIdHash, swissAC);
if (answer != NULL)
{
swissDisplayID = answer;
}