f9d47d16a47f128366aeb1deb0575c0cb52aa0f9
markd
  Thu Nov 29 15:24:08 2018 -0800
For native GENCODE on GRCh37/hg19, link to http://grch37.ensembl.org, not primary GENCODE site.  #22549

diff --git src/hg/hgc/gencodeClick.c src/hg/hgc/gencodeClick.c
index 746b8f3..ba01314 100644
--- src/hg/hgc/gencodeClick.c
+++ src/hg/hgc/gencodeClick.c
@@ -26,38 +26,46 @@
 #include "encode/wgEncodeGencodeTranscriptionSupportLevel.h"
 
 /*
  * General notes:
  *  - this will be integrated into hgGene at some point, however this was
  *    done as part of hgc for timing reasons and to allow more time to design
  *    the hgGene part.
  *  - Tables below will output at least one row even if no data is available.
  *    
  */
 
 /* Various URLs and URL templates.  At one time, these were in the ra file,
  * but that didn't prove that helpful and end up requiring updated the ra
  * files for every GENCODE version if a URL was added or changed. */
 //FIXME: clean up RA files when CGIs no longer need them
-static char *gencodeBiotypesUrl = "http://www.gencodegenes.org/gencode_biotypes.html";
-static char *gencodeTagsUrl = "http://www.gencodegenes.org/gencode_tags.html";
 static char *ensemblTranscriptIdUrl = "http://www.ensembl.org/%s/Transcript/Summary?db=core;t=%s";
 static char *ensemblGeneIdUrl = "http://www.ensembl.org/%s/Gene/Summary?db=core;t=%s";
 static char *ensemblProteinIdUrl = "http://www.ensembl.org/%s/Transcript/ProteinSummary?db=core;t=%s";
 static char *ensemblSupportingEvidUrl = "http://www.ensembl.org/%s/Transcript/SupportingEvidence?db=core;t=%s";
+
+static char *ensemblH37TranscriptIdUrl = "http://grch37.ensembl.org/%s/Transcript/Summary?db=core;t=%s";
+static char *ensemblH37GeneIdUrl = "http://grch37.ensembl.org/%s/Gene/Summary?db=core;t=%s";
+static char *ensemblH37ProteinIdUrl = "http://grch37.ensembl.org/%s/Transcript/ProteinSummary?db=core;t=%s";
+static char *ensemblH37SupportingEvidUrl = "http://grch37.ensembl.org/%s/Transcript/SupportingEvidence?db=core;t=%s";
+
 static char *vegaTranscriptIdUrl = "http://vega.sanger.ac.uk/%s/Transcript/Summary?db=core;t=%s";
 static char *vegaGeneIdUrl = "http://vega.sanger.ac.uk/%s/Gene/Summary?db=core;g=%s";
+
+static char *gencodeBiotypesUrl = "http://www.gencodegenes.org/gencode_biotypes.html";
+static char *gencodeTagsUrl = "http://www.gencodegenes.org/gencode_tags.html";
+
 static char *yalePseudoUrl = "http://tables.pseudogene.org/%s";
 static char *hgncUrl = "http://www.genenames.org/data/hgnc_data.php?match=%s";
 static char *geneCardsUrl = "http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s";
 static char *apprisHomeUrl = "http://appris-tools.org/";
 static char *apprisGeneUrl = "http://appris-tools.org/#/database/id/%s/%s?sc=ensembl";
 
 static char *getBaseAcc(char *acc, char *accBuf, int accBufSize)
 /* get the accession with version number dropped. */
 {
 safecpy(accBuf, accBufSize, acc);
 char *dot = strchr(accBuf, '.');
 if (dot != NULL)
     *dot = '\0';
 return accBuf;
 }
@@ -69,30 +77,40 @@
 }
 
 static char *getGencodeTable(struct trackDb *tdb, char *tableBase)
 /* get a table name from the settings. */
 {
 return trackDbRequiredSetting(tdb, tableBase);
 }
 
 static char* getGencodeVersion(struct trackDb *tdb)
 /* get the GENCODE version or NULL for < V7, which is not supported
  * by this module. */
 {
 return trackDbSetting(tdb, "wgEncodeGencodeVersion");
 }
 
+static boolean isGrcH37Native(struct trackDb *tdb)
+/* Is this GENCODE GRCh37 native build, which requires a different Ensembl site. */
+{
+// check for non-lifted GENCODE on GRCh37/hg19
+if (sameString(database, "hg19"))
+    return stringIn("lift37", getGencodeVersion(tdb)) == NULL;
+else
+    return FALSE;
+}
+
 static int transAnnoCmp(const void *va, const void *vb)
 /* Compare genePreds, sorting to keep select gene first.  The only cases
  * that annotations will be duplicated is if they are in the PAR and thus
  * on different chroms. */
 {
 const struct genePred *a = *((struct genePred **)va);
 const struct genePred *b = *((struct genePred **)vb);
 if (sameString(a->name, seqName))
     return -1;
 else if (sameString(b->name, seqName))
     return 1;
 else
     return strcmp(a->name, b->name);
 }
 
@@ -350,40 +368,43 @@
 }
 
 static void writeBasicInfoHtml(struct sqlConnection *conn, struct trackDb *tdb, char *gencodeId, struct genePred *transAnno,
                                struct wgEncodeGencodeAttrs *transAttrs,
                                int geneChromStart, int geneChromEnd,
                                struct wgEncodeGencodeGeneSource *geneSource, struct wgEncodeGencodeTranscriptSource *transcriptSource,
                                struct wgEncodeGencodeTag *tags, bool haveTsl, struct wgEncodeGencodeTranscriptionSupportLevel *tsl)
 /* write basic HTML info for all genes */
 {
 // basic gene and transcript information
 printf("<table class=\"hgcCcds\" style=\"white-space: nowrap;\"><thead>\n");
 printf("<tr><th><th>Transcript<th>Gene</tr>\n");
 printf("</thead><tbody>\n");
 
 printf("<tr><th>GENCODE id");
-prTdEnsIdAnchor(transAttrs->transcriptId, ensemblTranscriptIdUrl);
-prTdEnsIdAnchor(transAttrs->geneId, ensemblGeneIdUrl);
+prTdEnsIdAnchor(transAttrs->transcriptId,
+                (isGrcH37Native(tdb) ? ensemblH37TranscriptIdUrl: ensemblTranscriptIdUrl));
+prTdEnsIdAnchor(transAttrs->geneId,
+                (isGrcH37Native(tdb) ? ensemblH37GeneIdUrl : ensemblGeneIdUrl));
 printf("</tr>\n");
 
 if (transAttrs->proteinId != NULL)
     {
-    // protein id in database, maybe not this transcript
+    // protein id in database, maybe not for this transcript
     printf("<tr><th>Protein id");
     if (strlen(transAttrs->proteinId) > 0)
-        prTdEnsIdAnchor(transAttrs->proteinId, ensemblProteinIdUrl);
+        prTdEnsIdAnchor(transAttrs->proteinId,
+                        (isGrcH37Native(tdb) ? ensemblH37ProteinIdUrl: ensemblProteinIdUrl));
     else
         printf("<td>&nbsp;");
     printf("<td>");
     printf("</tr>\n");
     }
 
 printf("<tr><th>HAVANA manual id");
 prTdEnsIdAnchor(transAttrs->havanaTranscriptId, vegaTranscriptIdUrl);
 prTdEnsIdAnchor(transAttrs->havanaGeneId, vegaGeneIdUrl);
 printf("</tr>\n");
 
 printf("<tr><th>Position");
 printf("<td>");
 writePosLink(transAnno->chrom, transAnno->txStart, transAnno->txEnd);
 printf("<td>");
@@ -719,39 +740,41 @@
 struct supportEvid *supportEvids = NULL;
 transcriptSupportToSupportEvid(&supportEvids, transcriptSupports);
 exonSupportToSupportEvid(&supportEvids, exonSupports);
 sortUniqSupportExidence(&supportEvids);
 return supportEvids;
 }
 
 static void writeSupportEvidenceEntry(struct supportEvid *supportEvid)
 /* write HTML table entry  for a supporting evidence */
 {
 // FIXME: should link to sources when possible
 printf("<td width=\"25%%\">%s", supportEvid->seqSrc);
 printf("<td width=\"25%%\">%s", supportEvid->seqId);
 }
 
-static void writeSupportingEvidenceLinkHtml(char *gencodeId, struct wgEncodeGencodeTranscriptSupport *transcriptSupports,
+static void writeSupportingEvidenceLinkHtml(struct trackDb *tdb, char *gencodeId,
+                                            struct wgEncodeGencodeTranscriptSupport *transcriptSupports,
                                             struct wgEncodeGencodeExonSupport *exonSupports)
 /* write HTML links to supporting evidence */
 {
 struct supportEvid *supportEvids = loadSupportEvid(transcriptSupports, exonSupports);
 
 printf("<table class=\"hgcCcds\"><thead>\n");
 printf("<tr><th colspan=\"4\">Supporting Evidence (");
-prEnsIdAnchor(gencodeId, ensemblSupportingEvidUrl);
+prEnsIdAnchor(gencodeId,
+              (isGrcH37Native(tdb) ? ensemblH37SupportingEvidUrl: ensemblSupportingEvidUrl));
 printf(")</tr>\n");
 printf("<tr class=\"hgcCcdsSub\"><th>Source<th>Sequence<th>Source<th>Sequence</tr>\n");
 printf("</thead><tbody>\n");
 struct supportEvid *supportEvid = supportEvids;
 int i, rowCnt = 0;
 while ((supportEvid != NULL) || (rowCnt == 0))
     {
     printf("<tr>");
     for (i = 0; i < 2; i++)
         {
         if (supportEvid != NULL)
             {
             writeSupportEvidenceEntry(supportEvid);
             supportEvid = supportEvid->next;
             }
@@ -839,31 +862,31 @@
 printf("<H2>%s</H2>\n", header);
 
 writeBasicInfoHtml(conn, tdb, gencodeId, transAnno, transAttrs, geneChromStart, geneChromEnd, geneSource, transcriptSource, tags, haveTsl, tsl);
 writeTagLinkHtml(tags);
 writeSequenceHtml(tdb, gencodeId, transAnno);
 if (haveRemarks)
     writeAnnotationRemarkHtml(remarks);
 if (isProteinCodingTrans(transAttrs))
     writePdbLinkHtml(pdbs);
 writePubMedLinkHtml(pubMeds);
 if (haveEntrezGene)
     writeEntrezGeneLinkHtml(entrezGenes);
 writeRefSeqLinkHtml(refSeqs);
 if (isProteinCodingTrans(transAttrs))
     writeUniProtLinkHtml(uniProts);
-writeSupportingEvidenceLinkHtml(gencodeId, transcriptSupports, exonSupports);
+writeSupportingEvidenceLinkHtml(tdb, gencodeId, transcriptSupports, exonSupports);
 
 wgEncodeGencodeAttrsFree(&transAttrs);
 wgEncodeGencodeAnnotationRemarkFreeList(&remarks);
 wgEncodeGencodeGeneSourceFreeList(&geneSource);
 wgEncodeGencodeTranscriptSourceFreeList(&transcriptSource);
 wgEncodeGencodePdbFreeList(&pdbs);
 wgEncodeGencodePubMedFreeList(&pubMeds);
 wgEncodeGencodeEntrezGeneFreeList(&entrezGenes);
 wgEncodeGencodeRefSeqFreeList(&refSeqs);
 wgEncodeGencodeTranscriptSupportFreeList(&transcriptSupports);
 wgEncodeGencodeExonSupportFreeList(&exonSupports);
 wgEncodeGencodeUniProtFreeList(&uniProts);
 wgEncodeGencodeTranscriptionSupportLevelFreeList(&tsl);
 }