5899cbd4120fc82ba0939dce519582841db961f7
markd
  Thu Mar 8 11:38:07 2018 -0800
Add support for GENCODE protein id to wgEncodeGencodeAttrs* table.  For backwards compatibility with existing tables, this was added as the last column of the table and the code checks for it's presences.  RM 21077

diff --git src/hg/hgc/gencodeClick.c src/hg/hgc/gencodeClick.c
index 634dc46..2496784 100644
--- src/hg/hgc/gencodeClick.c
+++ src/hg/hgc/gencodeClick.c
@@ -30,30 +30,31 @@
  *  - this will be integrated into hgGene at some point, however this was
  *    done as part of hgc for timing reasons and to allow more time to design
  *    the hgGene part.
  *  - Tables below will output at least one row even if no data is available.
  *    
  */
 
 /* Various URLs and URL templates.  At one time, these were in the ra file,
  * but that didn't prove that helpful and end up requiring updated the ra
  * files for every GENCODE version if a URL was added or changed. */
 //FIXME: clean up RA files when CGIs no longer need them
 static char *gencodeBiotypesUrl = "http://www.gencodegenes.org/gencode_biotypes.html";
 static char *gencodeTagsUrl = "http://www.gencodegenes.org/gencode_tags.html";
 static char *ensemblTranscriptIdUrl = "http://www.ensembl.org/%s/Transcript/Summary?db=core;t=%s";
 static char *ensemblGeneIdUrl = "http://www.ensembl.org/%s/Gene/Summary?db=core;t=%s";
+static char *ensemblProteinIdUrl = "http://www.ensembl.org/%s/Transcript/ProteinSummary?db=core;t=%s";
 static char *ensemblSupportingEvidUrl = "http://www.ensembl.org/%s/Transcript/SupportingEvidence?db=core;t=%s";
 static char *vegaTranscriptIdUrl = "http://vega.sanger.ac.uk/%s/Transcript/Summary?db=core;t=%s";
 static char *vegaGeneIdUrl = "http://vega.sanger.ac.uk/%s/Gene/Summary?db=core;g=%s";
 static char *yalePseudoUrl = "http://tables.pseudogene.org/%s";
 static char *hgncUrl = "http://www.genenames.org/data/hgnc_data.php?match=%s";
 static char *geneCardsUrl = "http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s";
 static char *apprisHomeUrl = "http://appris.bioinfo.cnio.es/";
 static char *apprisGeneUrl = "http://appris.bioinfo.cnio.es/report.html?id=%s&namespace=Ensembl_Gene_Id&specie=%s";
 static char *apprisTranscriptUrl = "http://appris.bioinfo.cnio.es/report.html?id=%s&namespace=Ensembl_Transcript_Id&specie=%s";
 
 static char *getBaseAcc(char *acc, char *accBuf, int accBufSize)
 /* get the accession with version number dropped. */
 {
 safecpy(accBuf, accBufSize, acc);
 char *dot = strchr(accBuf, '.');
@@ -103,35 +104,44 @@
 }
 
 static struct genePred *transAnnoLoad(struct sqlConnection *conn, struct trackDb *tdb, char *gencodeId)
 /* load the gencode annotations and sort the one corresponding to the one that was clicked on is
  * first.  Should only have one or two. */
 {
 // must check chrom due to PAR
 char where[256];
 sqlSafefFrag(where, sizeof(where), "(chrom = \"%s\") and (name = \"%s\")", seqName, gencodeId);
 struct genePred *transAnno = genePredReaderLoadQuery(conn, tdb->track, where);
 slSort(&transAnno, transAnnoCmp);
 return transAnno;
 }
 
 static struct wgEncodeGencodeAttrs *transAttrsLoad(struct trackDb *tdb, struct sqlConnection *conn, char *gencodeId)
-/* load the gencode class information */
+/* load the gencode attributes */
 {
-return sqlQueryObjs(conn, (sqlLoadFunc)wgEncodeGencodeAttrsLoad, sqlQuerySingle|sqlQueryMust,
-                    "select * from %s where transcriptId = \"%s\"",
+char query[1024];
+sqlSafef(query, sizeof(query), "select * from %s where transcriptId = \"%s\"",
          getGencodeTable(tdb, "wgEncodeGencodeAttrs"), gencodeId);
+struct sqlResult *sr = sqlGetResult(conn, query);
+char **row = sqlNextRow(sr);
+if (row == NULL)
+    errAbort("gencode transcript %s not found in %s", gencodeId,
+             getGencodeTable(tdb, "wgEncodeGencodeAttrs"));
+// older version don't have proteinId column.
+struct wgEncodeGencodeAttrs *transAttrs = wgEncodeGencodeAttrsLoad(row, sqlCountColumns(sr));
+sqlFreeResult(&sr);
+return transAttrs;
 }
 
 static void getGeneBounds(struct trackDb *tdb, struct sqlConnection *conn, struct genePred *transAnno,
                           int *geneChromStart, int *geneChromEnd)
 /* find bounds for the gene */
 {
 // must check chrom due to PAR
 char where[256];
 sqlSafefFrag(where, sizeof(where), "(chrom = \"%s\") and (name2 = \"%s\")", seqName, transAnno->name2);
 struct genePred *geneAnnos = genePredReaderLoadQuery(conn, tdb->track, where);
 struct genePred *geneAnno;
 *geneChromStart = transAnno->txStart;
 *geneChromEnd = transAnno->txEnd;
 for (geneAnno = geneAnnos; geneAnno != NULL; geneAnno = geneAnno->next)
     {
@@ -339,35 +349,47 @@
 printf("</tr>\n");
 }
 
 static void writeBasicInfoHtml(struct sqlConnection *conn, struct trackDb *tdb, char *gencodeId, struct genePred *transAnno,
                                struct wgEncodeGencodeAttrs *transAttrs,
                                int geneChromStart, int geneChromEnd,
                                struct wgEncodeGencodeGeneSource *geneSource, struct wgEncodeGencodeTranscriptSource *transcriptSource,
                                struct wgEncodeGencodeTag *tags, bool haveTsl, struct wgEncodeGencodeTranscriptionSupportLevel *tsl)
 /* write basic HTML info for all genes */
 {
 // basic gene and transcript information
 printf("<table class=\"hgcCcds\" style=\"white-space: nowrap;\"><thead>\n");
 printf("<tr><th><th>Transcript<th>Gene</tr>\n");
 printf("</thead><tbody>\n");
 
-printf("<tr><th>Gencode id");
+printf("<tr><th>GENCODE id");
 prTdEnsIdAnchor(transAttrs->transcriptId, ensemblTranscriptIdUrl);
 prTdEnsIdAnchor(transAttrs->geneId, ensemblGeneIdUrl);
 printf("</tr>\n");
 
+if (transAttrs->proteinId != NULL)
+    {
+    // protein id in database, maybe not this transcript
+    printf("<tr><th>Protein id");
+    if (strlen(transAttrs->proteinId) > 0)
+        prTdEnsIdAnchor(transAttrs->proteinId, ensemblProteinIdUrl);
+    else
+        printf("<td>&nbsp;");
+    printf("<td>");
+    printf("</tr>\n");
+    }
+
 printf("<tr><th>HAVANA manual id");
 prTdEnsIdAnchor(transAttrs->havanaTranscriptId, vegaTranscriptIdUrl);
 prTdEnsIdAnchor(transAttrs->havanaGeneId, vegaGeneIdUrl);
 printf("</tr>\n");
 
 printf("<tr><th>Position");
 printf("<td>");
 writePosLink(transAnno->chrom, transAnno->txStart, transAnno->txEnd);
 printf("<td>");
 writePosLink(transAnno->chrom, geneChromStart, geneChromEnd);
 printf("</tr>\n");
 
 printf("<tr><th>Strand<td>%s<td></tr>\n", transAnno->strand);
 
 printf("<tr><th><a href=\"%s\" target = _blank>Biotype</a><td>%s<td>%s</tr>\n", gencodeBiotypesUrl, transAttrs->transcriptType, transAttrs->geneType);