aba8125cb532df17beb7c7c9bc8467a43d09e3d6
braney
Wed Feb 10 13:39:27 2016 -0800
changes to allow for GenBank metadata to be held in a common table. #16809
diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c
index befa85c..32e2736 100644
--- src/hg/hgc/hgc.c
+++ src/hg/hgc/hgc.c
@@ -597,32 +597,32 @@
/* Clip start/end coordinates to fit in chromosome. */
{
static int chromSize = -1;
if (chromSize < 0)
chromSize = hChromSize(database, seqName);
if (*pStart < 0) *pStart = 0;
if (*pEnd > chromSize) *pEnd = chromSize;
return *pStart < *pEnd;
}
struct genbankCds getCds(struct sqlConnection *conn, char *acc)
/* obtain and parse the CDS, errAbort if not found or invalid */
{
char query[256];
-sqlSafef(query, sizeof(query), "select cds.name from gbCdnaInfo,cds where (acc=\"%s\") and (cds.id=cds)",
- acc);
+sqlSafef(query, sizeof(query), "select c.name from %s,%s c where (acc=\"%s\") and (c.id=cds)",
+ gbCdnaInfoTable,cdsTable, acc);
char *cdsStr = sqlQuickString(conn, query);
if (cdsStr == NULL)
errAbort("no CDS found for %s", acc);
struct genbankCds cds;
if (!genbankCdsParse(cdsStr, &cds))
errAbort("can't parse CDS for %s: %s", acc, cdsStr);
return cds;
}
void printCappedSequence(int start, int end, int extra)
/* Print DNA from start to end including extra at either end.
* Capitalize bits from start to end. */
{
@@ -5338,37 +5338,37 @@
/* Print out a link to GeneCards (Human only). */
{
if (startsWith("hg", database) && isNotEmpty(geneName))
{
printf("GeneCards: "
"%s
\n",
geneName, geneName);
}
}
int getImageId(struct sqlConnection *conn, char *acc)
/* get the image id for a clone, or 0 if none */
{
int imageId = 0;
-if (sqlTableExists(conn, "imageClone"))
+if (sqlTableExists(conn, imageCloneTable))
{
struct sqlResult *sr;
char **row;
char query[128];
sqlSafef(query, sizeof(query),
- "select imageId from imageClone where acc = '%s'", acc);
+ "select imageId from %s where acc = '%s'",imageCloneTable, acc);
sr = sqlMustGetResult(conn, query);
row = sqlNextRow(sr);
if (row != NULL)
imageId = sqlUnsigned(row[0]);
sqlFreeResult(&sr);
}
return imageId;
}
void htcDisplayMrna(char *acc)
/* Display mRNA available from genback or seq table.. */
{
struct dnaSeq *seq = hGenBankGetMrna(database, acc, NULL);
if (seq == NULL)
errAbort("mRNA sequence %s not found", acc);
@@ -5385,33 +5385,33 @@
{
char query[256], estOrient[64];
sqlSafef(query, sizeof(query),
"select intronOrientation from %s.estOrientInfo where chrom = '%s' and chromStart = %d and name = '%s'",
database, psl->tName, psl->tStart, psl->qName);
if (sqlQuickQuery(conn, query, estOrient, sizeof(estOrient)) != NULL)
return sqlSigned(estOrient) * ((psl->strand[0] == '+') ? 1 : -1);
else
return 0;
}
static struct gbWarn *checkGbWarn(struct sqlConnection *conn, char *acc)
/* check if there is a gbWarn entry for this accession, return NULL if none */
{
struct gbWarn *gbWarn = NULL;
-if (hTableExists(database, "gbWarn"))
+if (sqlTableExists(conn, gbWarnTable))
gbWarn = sqlQueryObjs(conn, (sqlLoadFunc)gbWarnLoad, sqlQuerySingle,
- "SELECT * FROM gbWarn WHERE acc = \"%s\"", acc);
+ "SELECT * FROM %s WHERE acc = \"%s\"", gbWarnTable, acc);
return gbWarn;
}
static void printGbWarn(char *acc, struct gbWarn *gbWarn)
/* print descriptive information about an accession in the gbWarn table */
{
char *msg = NULL;
switch (gbWarn->reason) {
case gbWarnInvitroNorm:
msg = "is from the InVitroGen/Genoscope full-length library. Some of the entries "
"associated with this dataset appear to have been aligned to the reference "
"genome and the sequences subsequently modified to match the genome. This "
"process may have resulted in apparent high-quality alignments to pseudogenes.";
break;
case gbWarnAthRage:
@@ -5432,85 +5432,85 @@
{
struct dyString *dy = newDyString(1024);
struct sqlConnection *conn = hAllocConn(database);
struct sqlConnection *conn2= hAllocConn(database);
struct sqlResult *sr;
char **row;
char rgdEstId[512];
char query[256];
char *type,*direction,*orgFullName,*library,*clone,*sex,*tissue,
*development,*cell,*cds,*description, *author,*geneName,
*date,*productName;
// char *source; unused variable
// int seqSize,fileSize; unused variables
// long fileOffset; unused variable
// char *extFile; unused variable
-boolean hasVersion = hHasField(database, "gbCdnaInfo", "version");
-boolean haveGbSeq = sqlTableExists(conn, "gbSeq");
-char *seqTbl = haveGbSeq ? "gbSeq" : "seq";
+boolean hasVersion = hHasField(database, gbCdnaInfoTable, "version");
+boolean haveGbSeq = sqlTableExists(conn, gbSeqTable);
+char *seqTbl = haveGbSeq ? gbSeqTable : "seq";
char *version = NULL;
struct trackDb *tdbRgdEst;
char *chrom = cartString(cart, "c");
int start = cartInt(cart, "o");
int end = cartUsualInt(cart, "t",0);
struct gbWarn *gbWarn = checkGbWarn(conn, acc);
/* This sort of query and having to keep things in sync between
* the first clause of the select, the from clause, the where
* clause, and the results in the row ... is really tedious.
* One of my main motivations for going to a more object
* based rather than pure relational approach in general,
* and writing 'autoSql' to help support this. However
* the pure relational approach wins for pure search speed,
* and these RNA fields are searched. So it looks like
* the code below stays. Be really careful when you modify
* it.
*
* Uses the gbSeq table if available, otherwise use seq for older databases.
*/
sqlDyStringAppend(dy,
- "select gbCdnaInfo.type,gbCdnaInfo.direction,"
- "source.name,organism.name,library.name,mrnaClone.name,"
- "sex.name,tissue.name,development.name,cell.name,cds.name,"
- "description.name,author.name,geneName.name,productName.name,");
+ "select g.type,g.direction,"
+ "so.name,o.name,l.name,m.name,"
+ "se.name,t.name,dev.name,ce.name,cd.name,"
+ "des.name,a.name,gene.name,p.name,");
if (haveGbSeq)
dyStringAppend(dy,
- "gbSeq.size,gbCdnaInfo.moddate,gbSeq.gbExtFile,gbSeq.file_offset,gbSeq.file_size ");
+ "gbS.size,g.moddate,gbS.gbExtFile,gbS.file_offset,gbS.file_size ");
else
dyStringAppend(dy,
- "seq.size,seq.gb_date,seq.extFile,seq.file_offset,seq.file_size ");
+ "se.size,se.gb_date,se.extFile,se.file_offset,se.file_size ");
-/* If the gbCdnaInfo table has a "version" column then will show it */
+/* If the gbCdnaInfoTAble table has a "version" column then will show it */
if (hasVersion)
{
dyStringAppend(dy,
- ", gbCdnaInfo.version ");
+ ", g.version ");
}
sqlDyStringPrintf(dy,
- " from gbCdnaInfo,%s,source,organism,library,mrnaClone,sex,tissue,"
- "development,cell,cds,description,author,geneName,productName "
- " where gbCdnaInfo.acc = '%s' and gbCdnaInfo.id = %s.id ",
- seqTbl, acc, seqTbl);
+ " from %s g,%s gbS,%s so,%s o,%s l,%s m,%s se,%s t,"
+ "%s dev,%s ce,%s cd,%s des,%s a,%s gene,%s p"
+ " where g.acc = '%s' and g.id = gbS.id ",
+ gbCdnaInfoTable,seqTbl, sourceTable, organismTable, libraryTable, mrnaCloneTable, sexTable, tissueTable, developmentTable, cellTable, cdsTable, descriptionTable, authorTable, geneNameTable, productNameTable, acc);
dyStringAppend(dy,
- "and gbCdnaInfo.source = source.id and gbCdnaInfo.organism = organism.id "
- "and gbCdnaInfo.library = library.id and gbCdnaInfo.mrnaClone = mrnaClone.id "
- "and gbCdnaInfo.sex = sex.id and gbCdnaInfo.tissue = tissue.id "
- "and gbCdnaInfo.development = development.id and gbCdnaInfo.cell = cell.id "
- "and gbCdnaInfo.cds = cds.id and gbCdnaInfo.description = description.id "
- "and gbCdnaInfo.author = author.id and gbCdnaInfo.geneName = geneName.id "
- "and gbCdnaInfo.productName = productName.id");
+ "and g.source = so.id and g.organism = o.id "
+ "and g.library = l.id and g.mrnaClone = m.id "
+ "and g.sex = se.id and g.tissue = t.id "
+ "and g.development = dev.id and g.cell = ce.id "
+ "and g.cds = cd.id and g.description = des.id "
+ "and g.author = a.id and g.geneName = gene.id "
+ "and g.productName = p.id");
sr = sqlMustGetResult(conn, dy->string);
row = sqlNextRow(sr);
if (row != NULL)
{
type=row[0];direction=row[1];
// source=row[2]; unused variable
orgFullName=row[3];library=row[4];clone=row[5];
sex=row[6];tissue=row[7];development=row[8];cell=row[9];cds=row[10];description=row[11];
author=row[12];geneName=row[13];productName=row[14];
// seqSize = sqlUnsigned(row[15]); unused variable
date = row[16];
// ext_file = row[17]; unused variable
// fileOffset=sqlUnsigned(row[18]); unused variable
// fileSize=sqlUnsigned(row[19]); unused variable
@@ -5586,31 +5586,31 @@
{
int estOrient = getEstTranscriptionDir(conn2, psl);
if (estOrient != 0)
printf("EST transcribed from %c strand (supported by %d splice sites).
\n",
(estOrient > 0 ? '+' : '-' ), abs(estOrient));
}
if (hGenBankHaveSeq(database, acc, NULL))
{
printf("%s sequence: ", type);
hgcAnchorSomewhere("htcDisplayMrna", acc, tdb->track, seqName);
printf("%s
\n", acc);
}
}
else
{
- warn("Couldn't find %s in gbCdnaInfo table", acc);
+ warn("Couldn't find %s in %s table", gbCdnaInfoTable, acc);
}
if (end != 0 && differentString(chrom,"0") && isNotEmpty(chrom))
{
printf("Position: "
"",
hgTracksPathAndSettings(), database, chrom, start+1, end);
printf("%s:%d-%d
\n", chrom, start+1, end);
}
gbWarnFree(&gbWarn);
sqlFreeResult(&sr);
freeDyString(&dy);
hFreeConn(&conn);
hFreeConn(&conn2);
}
@@ -6947,37 +6947,37 @@
else
puts("