src/hg/lib/hdb.c 1.399
1.399 2009/03/17 19:54:56 markd
minor speed up of rendering of mRNA/ESTs by checking gbSeq table before seq table, avoiding two sql requests. Also simplified code.
Index: src/hg/lib/hdb.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/lib/hdb.c,v
retrieving revision 1.398
retrieving revision 1.399
diff -b -B -U 4 -r1.398 -r1.399
--- src/hg/lib/hdb.c 17 Mar 2009 05:56:38 -0000 1.398
+++ src/hg/lib/hdb.c 17 Mar 2009 19:54:56 -0000 1.399
@@ -1313,76 +1313,64 @@
{
return seqMustGet(db, acc, FALSE, seqTbl, extFileTbl);
}
-static char* getSeqAndId(struct sqlConnection *conn, char *acc, HGID *retId, char *gbDate)
-/* Return sequence as a fasta record in a string and it's database ID, or
- * NULL if not found. Optionally get genbank modification date. */
+static boolean querySeqInfo(struct sqlConnection *conn, char *acc, char *seqTbl, char *extFileFld,
+ HGID *retId, HGID *retExtId, size_t *retSize, off_t *retOffset)
+/* lookup information in the seq or gbSeq table */
{
-struct sqlResult *sr = NULL;
-char **row;
-char query[256];
-HGID extId;
-size_t size;
-off_t offset;
-char *buf;
-int seqTblSet = SEQ_TBL_SET;
-struct largeSeqFile *lsf;
-
-row = NULL;
-if (sqlTableExists(conn, "seq"))
+boolean gotIt = FALSE;
+if (sqlTableExists(conn, seqTbl))
{
+ char query[256];
safef(query, sizeof(query),
- "select id,extFile,file_offset,file_size,gb_date from seq where acc = '%s'",
- acc);
- sr = sqlMustGetResult(conn, query);
- row = sqlNextRow(sr);
- }
-
-if ((row == NULL) && sqlTableExists(conn, "gbSeq"))
+ "select id, %s, file_offset, file_size from %s where acc = '%s'",
+ extFileFld, seqTbl, acc);
+ struct sqlResult *sr = sqlMustGetResult(conn, query);
+ char **row = sqlNextRow(sr);
+ if (row != NULL)
{
- /* try gbSeq table */
- if (sr)
- sqlFreeResult(&sr);
- if (gbDate != NULL)
- safef(query, sizeof(query),
- "select gbSeq.id,gbExtFile,file_offset,file_size,moddate from gbSeq,gbCdnaInfo where (gbSeq.acc = '%s') and (gbCdnaInfo.acc = gbSeq.acc)",
- acc);
- else
- safef(query, sizeof(query),
- "select id,gbExtFile,file_offset,file_size from gbSeq where acc = '%s'",
- acc);
- sr = sqlMustGetResult(conn, query);
- row = sqlNextRow(sr);
- seqTblSet = GBSEQ_TBL_SET;
+ if (retId != NULL)
+ *retId = sqlUnsigned(row[0]);
+ if (retExtId != NULL)
+ *retExtId = sqlUnsigned(row[1]);
+ if (retOffset != NULL)
+ *retOffset = sqlLongLong(row[2]);
+ if (retSize != NULL)
+ *retSize = sqlUnsigned(row[3]);
+ gotIt = TRUE;
}
-if (row == NULL)
- {
sqlFreeResult(&sr);
- return NULL;
}
-if (retId != NULL)
- *retId = sqlUnsigned(row[0]);
-extId = sqlUnsigned(row[1]);
-offset = sqlLongLong(row[2]);
-size = sqlUnsigned(row[3]);
-if (gbDate != NULL)
- strcpy(gbDate, row[4]);
-
-sqlFreeResult(&sr);
+return gotIt;
+}
-char *extTable = (seqTblSet == GBSEQ_TBL_SET) ? "gbExtFile" : "extFile";
-lsf = largeFileHandle(conn, extId, extTable);
-buf = readOpenFileSection(lsf->fd, offset, size, lsf->path, acc);
+static char* getSeqAndId(struct sqlConnection *conn, char *acc, HGID *retId)
+/* Return sequence as a fasta record in a string and it's database ID, or
+ * NULL if not found. Optionally get genbank modification date. */
+{
+HGID extId;
+size_t size;
+off_t offset;
+char *extTable = NULL;
+/* try gbExtFile table first, as it tends to be more performance sensitive */
+if (querySeqInfo(conn, acc, "gbSeq", "gbExtFile", retId, &extId, &size, &offset))
+ extTable = "gbExtFile";
+else if (querySeqInfo(conn, acc, "seq", "extFile", retId, &extId, &size, &offset))
+ extTable = "extFile";
+else
+ return NULL;
+struct largeSeqFile *lsf = largeFileHandle(conn, extId, extTable);
+char *buf = readOpenFileSection(lsf->fd, offset, size, lsf->path, acc);
return buf;
}
static char* mustGetSeqAndId(struct sqlConnection *conn, char *acc,
HGID *retId)
/* Return sequence as a fasta record in a string and it's database ID,
* abort if not found */
{
-char *buf= getSeqAndId(conn, acc, retId, NULL);
+char *buf= getSeqAndId(conn, acc, retId);
if (buf == NULL)
errAbort("No sequence for %s in seq or gbSeq tables", acc);
return buf;
}
@@ -1390,16 +1378,15 @@
char* hGetSeqAndId(struct sqlConnection *conn, char *acc, HGID *retId)
/* Return sequence as a fasta record in a string and it's database ID, or
* NULL if not found. */
{
-return getSeqAndId(conn, acc, retId, NULL);
+return getSeqAndId(conn, acc, retId);
}
-int hRnaSeqAndIdx(char *acc, struct dnaSeq **retSeq, HGID *retId, char *gbdate, struct sqlConnection *conn)
-/* Return sequence for RNA, it's database ID, and optionally genbank
- * modification date. Return -1 if not found. */
+int hRnaSeqAndIdx(char *acc, struct dnaSeq **retSeq, HGID *retId, struct sqlConnection *conn)
+/* Return sequence for RNA and it's database ID. Return -1 if not found. */
{
-char *buf = getSeqAndId(conn, acc, retId, gbdate);
+char *buf = getSeqAndId(conn, acc, retId);
if (buf == NULL)
return -1;
*retSeq = faFromMemText(buf);
return 0;
@@ -1533,9 +1520,9 @@
seq = loadSeqFromTable(conn, acc, compatTable);
}
else
{
- char *buf = getSeqAndId(conn, acc, NULL, NULL);
+ char *buf = getSeqAndId(conn, acc, NULL);
if (buf != NULL)
seq = faFromMemText(buf);
}
@@ -1579,9 +1566,9 @@
seq = loadSeqFromTable(conn, acc, compatTable);
}
else
{
- char *buf = getSeqAndId(conn, acc, NULL, NULL);
+ char *buf = getSeqAndId(conn, acc, NULL);
if (buf != NULL)
seq = faSeqFromMemText(buf, FALSE);
}
return seq;
@@ -4531,4 +4518,14 @@
/* Return true if chrom is one of our "unknown" chromomsomes (e.g. chrUn). */
{
return endsWith(chromName, "_random") || startsWith("chrUn", chromName);
}
+
+char *hGenbankModDate(char *acc, struct sqlConnection *conn)
+/* Get string for genbank last modification date, or NULL if not found..
+ * Free resulting string. */
+{
+char query[128];
+safef(query, sizeof(query),
+ "select moddate from gbCdnaInfo where (acc = '%s')", acc);
+return sqlQuickString(conn, query);
+}