src/hg/lib/hdb.c 1.399

1.399 2009/03/17 19:54:56 markd
minor speed up of rendering of mRNA/ESTs by checking gbSeq table before seq table, avoiding two sql requests. Also simplified code.
Index: src/hg/lib/hdb.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/lib/hdb.c,v
retrieving revision 1.398
retrieving revision 1.399
diff -b -B -U 4 -r1.398 -r1.399
--- src/hg/lib/hdb.c	17 Mar 2009 05:56:38 -0000	1.398
+++ src/hg/lib/hdb.c	17 Mar 2009 19:54:56 -0000	1.399
@@ -1313,76 +1313,64 @@
 {
 return seqMustGet(db, acc, FALSE, seqTbl, extFileTbl);
 }
 
-static char* getSeqAndId(struct sqlConnection *conn, char *acc, HGID *retId, char *gbDate)
-/* Return sequence as a fasta record in a string and it's database ID, or
- * NULL if not found. Optionally get genbank modification date. */
+static boolean querySeqInfo(struct sqlConnection *conn, char *acc, char *seqTbl, char *extFileFld,
+                            HGID *retId, HGID *retExtId, size_t *retSize, off_t *retOffset)
+/* lookup information in the seq or gbSeq table */
 {
-struct sqlResult *sr = NULL;
-char **row;
-char query[256];
-HGID extId;
-size_t size;
-off_t offset;
-char *buf;
-int seqTblSet = SEQ_TBL_SET;
-struct largeSeqFile *lsf;
-
-row = NULL;
-if (sqlTableExists(conn, "seq"))
+boolean gotIt = FALSE;
+if (sqlTableExists(conn, seqTbl))
     {
+    char query[256];
     safef(query, sizeof(query),
-       "select id,extFile,file_offset,file_size,gb_date from seq where acc = '%s'",
-       acc);
-    sr = sqlMustGetResult(conn, query);
-    row = sqlNextRow(sr);
-    }
-
-if ((row == NULL) && sqlTableExists(conn, "gbSeq"))
+       "select id, %s, file_offset, file_size from %s where acc = '%s'",
+          extFileFld, seqTbl, acc);
+    struct sqlResult *sr = sqlMustGetResult(conn, query);
+    char **row = sqlNextRow(sr);
+    if (row != NULL) 
     {
-    /* try gbSeq table */
-    if (sr)
-	sqlFreeResult(&sr);
-    if (gbDate != NULL)
-        safef(query, sizeof(query),
-                "select gbSeq.id,gbExtFile,file_offset,file_size,moddate from gbSeq,gbCdnaInfo where (gbSeq.acc = '%s') and (gbCdnaInfo.acc = gbSeq.acc)",
-                acc);
-    else
-        safef(query, sizeof(query),
-                "select id,gbExtFile,file_offset,file_size from gbSeq where acc = '%s'",
-                acc);
-    sr = sqlMustGetResult(conn, query);
-    row = sqlNextRow(sr);
-    seqTblSet = GBSEQ_TBL_SET;
+        if (retId != NULL)
+            *retId = sqlUnsigned(row[0]);
+        if (retExtId != NULL)
+            *retExtId = sqlUnsigned(row[1]);
+        if (retOffset != NULL)
+            *retOffset = sqlLongLong(row[2]);
+        if (retSize != NULL)
+            *retSize = sqlUnsigned(row[3]);
+        gotIt = TRUE;
     }
-if (row == NULL)
-    {
     sqlFreeResult(&sr);
-    return NULL;
     }
-if (retId != NULL)
-    *retId = sqlUnsigned(row[0]);
-extId = sqlUnsigned(row[1]);
-offset = sqlLongLong(row[2]);
-size = sqlUnsigned(row[3]);
-if (gbDate != NULL)
-    strcpy(gbDate, row[4]);
-
-sqlFreeResult(&sr);
+return gotIt;
+}
 
-char *extTable = (seqTblSet == GBSEQ_TBL_SET) ? "gbExtFile" : "extFile";
-lsf = largeFileHandle(conn, extId, extTable);
-buf = readOpenFileSection(lsf->fd, offset, size, lsf->path, acc);
+static char* getSeqAndId(struct sqlConnection *conn, char *acc, HGID *retId)
+/* Return sequence as a fasta record in a string and it's database ID, or
+ * NULL if not found. Optionally get genbank modification date. */
+{
+HGID extId;
+size_t size;
+off_t offset;
+char *extTable = NULL;
+/* try gbExtFile table first, as it tends to be  more performance sensitive */
+if (querySeqInfo(conn, acc, "gbSeq", "gbExtFile", retId, &extId, &size, &offset))
+    extTable = "gbExtFile";
+else if (querySeqInfo(conn, acc, "seq", "extFile", retId, &extId, &size, &offset))
+    extTable = "extFile";
+else
+    return NULL;
+struct largeSeqFile *lsf = largeFileHandle(conn, extId, extTable);
+char *buf = readOpenFileSection(lsf->fd, offset, size, lsf->path, acc);
 return buf;
 }
 
 static char* mustGetSeqAndId(struct sqlConnection *conn, char *acc,
                              HGID *retId)
 /* Return sequence as a fasta record in a string and it's database ID,
  * abort if not found */
 {
-char *buf= getSeqAndId(conn, acc, retId, NULL);
+char *buf= getSeqAndId(conn, acc, retId);
 if (buf == NULL)
     errAbort("No sequence for %s in seq or gbSeq tables", acc);
 return buf;
 }
@@ -1390,16 +1378,15 @@
 char* hGetSeqAndId(struct sqlConnection *conn, char *acc, HGID *retId)
 /* Return sequence as a fasta record in a string and it's database ID, or
  * NULL if not found. */
 {
-return getSeqAndId(conn, acc, retId, NULL);
+return getSeqAndId(conn, acc, retId);
 }
 
-int hRnaSeqAndIdx(char *acc, struct dnaSeq **retSeq, HGID *retId, char *gbdate, struct sqlConnection *conn)
-/* Return sequence for RNA, it's database ID, and optionally genbank
- * modification date. Return -1 if not found. */
+int hRnaSeqAndIdx(char *acc, struct dnaSeq **retSeq, HGID *retId, struct sqlConnection *conn)
+/* Return sequence for RNA and it's database ID. Return -1 if not found. */
 {
-char *buf = getSeqAndId(conn, acc, retId, gbdate);
+char *buf = getSeqAndId(conn, acc, retId);
 if (buf == NULL)
     return -1;
 *retSeq = faFromMemText(buf);
 return 0;
@@ -1533,9 +1520,9 @@
     seq = loadSeqFromTable(conn, acc, compatTable);
     }
 else
     {
-    char *buf = getSeqAndId(conn, acc, NULL, NULL);
+    char *buf = getSeqAndId(conn, acc, NULL);
     if (buf != NULL)
         seq = faFromMemText(buf);
     }
 
@@ -1579,9 +1566,9 @@
     seq = loadSeqFromTable(conn, acc, compatTable);
     }
 else
     {
-    char *buf = getSeqAndId(conn, acc, NULL, NULL);
+    char *buf = getSeqAndId(conn, acc, NULL);
     if (buf != NULL)
         seq = faSeqFromMemText(buf, FALSE);
     }
 return seq;
@@ -4531,4 +4518,14 @@
 /* Return true if chrom is one of our "unknown" chromomsomes (e.g. chrUn). */
 {
 return  endsWith(chromName, "_random") || startsWith("chrUn", chromName);
 }
+
+char *hGenbankModDate(char *acc, struct sqlConnection *conn)
+/* Get string for genbank last modification date, or NULL if not found..
+ * Free resulting string. */
+{
+char query[128];
+safef(query, sizeof(query),
+      "select moddate from gbCdnaInfo where (acc = '%s')", acc);
+return sqlQuickString(conn, query);
+}