7df13efd19d14b472b5ae7588717adda34d2b665
markd
  Tue Dec 15 14:07:57 2015 -0800
include APPRIS classification in GENCODE details display (RM #16528)

diff --git src/hg/hgc/gencodeClick.c src/hg/hgc/gencodeClick.c
index 8e0bac0..0d31df9 100644
--- src/hg/hgc/gencodeClick.c
+++ src/hg/hgc/gencodeClick.c
@@ -230,58 +230,119 @@
 if (!isEmpty(id))
     {
     char urlBuf[512];
     safef(urlBuf, sizeof(urlBuf), urlTemplate, getScientificNameSym(), id);
     printf("<a href=\"%s\" target=_blank>%s</a>", urlBuf, id);
     }
 }
 
 static void prTdEnsIdAnchor(char *id, char *urlTemplate)
 /* print a table data element with an ensembl/vega anchor for a id */
 {
 printf("<td>");
 prEnsIdAnchor(id, urlTemplate);
 }
 
-static void prApprisTdAnchor(char *id, char *urlTemplate)
+static void prApprisTdAnchor(char *id, char *label, char *urlTemplate)
 /* print a gene or transcript link to APPRIS */
 {
 // under bar separated, lower case species name.
 char *speciesArg = hScientificName(database);
 toLowerN(speciesArg, strlen(speciesArg));
 subChar(speciesArg, ' ', '_');
 
 char accBuf[64];
 printf("<td><a href=\"");
 printf(urlTemplate, getBaseAcc(id, accBuf, sizeof(accBuf)), speciesArg);
-printf("\" target=_blank>%s</a>", id);
+printf("\" target=_blank>%s</a>", label);
 
 freeMem(speciesArg);
 }
 
 static void writePosLink(char *chrom, int chromStart, int chromEnd)
 /* write link to a genomic position */
 {
 printf("<a href=\"%s&db=%s&position=%s%%3A%d-%d\">%s:%d-%d</A>",
        hgTracksPathAndSettings(), database,
        chrom, chromStart, chromEnd, chrom, chromStart+1, chromEnd);
 }
 
-static void writeBasicInfoHtml(struct sqlConnection *conn, struct trackDb *tdb, char *gencodeId, struct genePred *transAnno, struct wgEncodeGencodeAttrs *transAttrs,
+static bool geneHasApprisTranscripts(struct trackDb *tdb, struct sqlConnection *conn, struct wgEncodeGencodeAttrs *transAttrs)
+/* check if any transcript in a gene has an APPRIS tags */
+{
+char query[1024];
+safef(query, sizeof(query),
+      "%s tag where tag.tag like \"appris%%\" and transcriptId in "
+      "(select transcriptId from %s where geneId=\"%s\"",
+      getGencodeTable(tdb, "wgEncodeGencodeTag"),
+      getGencodeTable(tdb, "wgEncodeGencodeAttrs"),
+      transAttrs->geneId);
+return sqlRowCount(conn, query) > 0;
+}
+
+static char* findApprisTag(struct wgEncodeGencodeTag *tags)
+/* search list for APPRIS tag or NULL */
+{
+struct wgEncodeGencodeTag *tag;
+for (tag = tags; tag != NULL; tag = tag->next)
+    {
+    if (startsWith("appris_", tag->tag))
+        return tag->tag;
+    }
+return NULL;
+}
+
+static char* apprisTagToSymbol(char* tag)
+/* convert APPRIS tag to the symbol use by APPRIS. WARNING static return. */
+{
+// appris_principal_1 -> PRINCIPAL:1
+static char buf[64];
+safecpy(buf, sizeof(buf), tag+7);
+touppers(buf);
+subChar(buf, '_', ':');
+return buf;
+}
+
+static void writeAprrisRow(struct sqlConnection *conn, struct trackDb *tdb,
+                           struct wgEncodeGencodeAttrs *transAttrs,
+                           struct wgEncodeGencodeTag *tags)
+/* write row for APPRIS */
+{
+// Get labels to use. if transcript has an appris tag, then we link to the transcript.
+// if it doesn;t have a appris tag, we can still link to the gene if any of the transcripts
+// have appris tags
+char* apprisTag = findApprisTag(tags);
+char* transLabel = (apprisTag != NULL) ? apprisTagToSymbol(apprisTag) : NULL;
+char *geneLabel = ((apprisTag != NULL) || geneHasApprisTranscripts(tdb, conn, transAttrs)) ? transAttrs->geneName : NULL;
+
+printf("<tr><th><a href=\"%s\" target=_blank>APPRIS</a>\n", apprisHomeUrl);
+if (transLabel != NULL)
+    prApprisTdAnchor(transAttrs->transcriptId, transLabel, apprisTranscriptUrl);
+else
+    printf("<td>&nbsp;");
+if (geneLabel != NULL)
+    prApprisTdAnchor(transAttrs->geneId, geneLabel, apprisGeneUrl);
+else
+    printf("<td>&nbsp;");
+printf("</tr>\n");
+}
+
+static void writeBasicInfoHtml(struct sqlConnection *conn, struct trackDb *tdb, char *gencodeId, struct genePred *transAnno,
+                               struct wgEncodeGencodeAttrs *transAttrs,
                                int geneChromStart, int geneChromEnd,
                                struct wgEncodeGencodeGeneSource *geneSource, struct wgEncodeGencodeTranscriptSource *transcriptSource,
-                               bool haveTsl, struct wgEncodeGencodeTranscriptionSupportLevel *tsl)
+                               struct wgEncodeGencodeTag *tags, bool haveTsl, struct wgEncodeGencodeTranscriptionSupportLevel *tsl)
 /* write basic HTML info for all genes */
 {
 /*
  * notes:
  *   - According to Steve: `status' is not the same for ensembl and havana.  So either avoid displaying it
  *     or display it as `automatic status' or `manual status'.
  */
 
 // basic gene and transcript information
 printf("<table class=\"hgcCcds\" style=\"white-space: nowrap;\"><thead>\n");
 printf("<tr><th><th>Transcript<th>Gene</tr>\n");
 printf("</thead><tbody>\n");
 
 printf("<tr><th>Gencode id");
 prTdEnsIdAnchor(transAttrs->transcriptId, ensemblTranscriptIdUrl);
@@ -321,36 +382,31 @@
 
 printf("<tr><th>CCDS<td>");
 if (!isEmpty(transAttrs->ccdsId))
     {
     printf("<a href=\"");
     printCcdsExtUrl(transAttrs->ccdsId);
     printf("\" target=_blank>%s</a>", transAttrs->ccdsId);
     }
 printf("<td></tr>\n");
 
 printf("<tr><th>GeneCards<td colspan=2>");
 prExtIdAnchor(transAttrs->geneName, geneCardsUrl);
 printf("</tr>\n");
 
 if (isProteinCodingTrans(transAttrs))
-    {
-    printf("<tr><th><a href=\"%s\" target=_blank>APPRIS</a>\n", apprisHomeUrl);
-    prApprisTdAnchor(transAttrs->transcriptId, apprisTranscriptUrl);
-    prApprisTdAnchor(transAttrs->geneId, apprisGeneUrl);
-    printf("</tr>\n");
-    }
+    writeAprrisRow(conn, tdb, transAttrs, tags);
 
 // FIXME: add sequence here??
 printf("</tbody></table>\n");
 }
 
 static void writeSequenceHtml(struct trackDb *tdb, char *gencodeId, struct genePred *transAnno)
 /* write links to get sequences */
 {
 printf("<table class=\"hgcCcds\"><thead>\n");
 printf("<tr><th colspan=\"2\">Sequences</tr>\n");
 printf("</thead><tbody>\n");
 if (transAnno->cdsStart < transAnno->cdsEnd)
     {
     // protein coding
     printf("<tr><td width=\"50%%\">");
@@ -739,31 +795,31 @@
 struct wgEncodeGencodeTranscriptionSupportLevel *tsl = haveTsl ? metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTranscriptionSupportLevel", "transcriptId", 0, (sqlLoadFunc)wgEncodeGencodeTranscriptionSupportLevelLoad) : NULL;
 
 int geneChromStart, geneChromEnd;
 getGeneBounds(tdb, conn, transAnno, &geneChromStart, &geneChromEnd);
 
 char *title = "GENCODE Transcript Annotation";
 char header[256];
 safef(header, sizeof(header), "%s %s", title, gencodeId);
 if (!isEmpty(transAttrs->geneName))
     safef(header, sizeof(header), "%s %s (%s)", title, gencodeId, transAttrs->geneName);
 else
     safef(header, sizeof(header), "%s %s", title, gencodeId);
 cartWebStart(cart, database, "%s", header);
 printf("<H2>%s</H2>\n", header);
 
-writeBasicInfoHtml(conn, tdb, gencodeId, transAnno, transAttrs, geneChromStart, geneChromEnd, geneSource, transcriptSource, haveTsl, tsl);
+writeBasicInfoHtml(conn, tdb, gencodeId, transAnno, transAttrs, geneChromStart, geneChromEnd, geneSource, transcriptSource, tags, haveTsl, tsl);
 writeTagLinkHtml(tags);
 writeSequenceHtml(tdb, gencodeId, transAnno);
 if (haveRemarks)
     writeAnnotationRemarkHtml(remarks);
 if (isProteinCodingTrans(transAttrs))
     writePdbLinkHtml(pdbs);
 writePubMedLinkHtml(pubMeds);
 if (haveEntrezGene)
     writeEntrezGeneLinkHtml(entrezGenes);
 writeRefSeqLinkHtml(refSeqs);
 if (isProteinCodingTrans(transAttrs))
     writeUniProtLinkHtml(uniProts);
 writeSupportingEvidenceLinkHtml(gencodeId, transcriptSupports, exonSupports);
 
 wgEncodeGencodeAttrsFree(&transAttrs);