cb2705847f5359843f2b0c975b66692fc3af28ba
markd
  Sat Apr 4 15:10:01 2015 -0700
import of GENCODE V22

diff --git src/hg/hgc/gencodeClick.c src/hg/hgc/gencodeClick.c
index c1f91a6..8e0bac0 100644
--- src/hg/hgc/gencodeClick.c
+++ src/hg/hgc/gencodeClick.c
@@ -9,30 +9,31 @@
 #include "genePred.h"
 #include "genePredReader.h"
 #include "ensFace.h"
 #include "htmshell.h"
 #include "jksql.h"
 #include "encode/wgEncodeGencodeAttrs.h"
 #include "encode/wgEncodeGencodeGeneSource.h"
 #include "encode/wgEncodeGencodePdb.h"
 #include "encode/wgEncodeGencodePubMed.h"
 #include "encode/wgEncodeGencodeRefSeq.h"
 #include "encode/wgEncodeGencodeTag.h"
 #include "encode/wgEncodeGencodeTranscriptSource.h"
 #include "encode/wgEncodeGencodeTranscriptSupport.h"
 #include "encode/wgEncodeGencodeExonSupport.h"
 #include "encode/wgEncodeGencodeUniProt.h"
+#include "encode/wgEncodeGencodeEntrezGene.h"
 #include "encode/wgEncodeGencodeAnnotationRemark.h"
 #include "encode/wgEncodeGencodeTranscriptionSupportLevel.h"
 
 /*
  * General notes:
  *  - this will be integrated into hgGene at some point, however this was
  *    done as part of hgc for timing reasons and to allow more time to design
  *    the hgGene part.
  *  - Tables below will output at least one row even if no data is available.
  *    
  */
 
 /* Various URLs and URL templates.  At one time, these were in the ra file,
  * but that didn't prove that helpful and end up requiring updated the ra
  * files for every GENCODE version if a URL was added or changed. */
@@ -49,30 +50,36 @@
 static char *geneCardsUrl = "http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s";
 static char *apprisHomeUrl = "http://appris.bioinfo.cnio.es/";
 static char *apprisGeneUrl = "http://appris.bioinfo.cnio.es/report.html?id=%s&namespace=Ensembl_Gene_Id&specie=%s";
 static char *apprisTranscriptUrl = "http://appris.bioinfo.cnio.es/report.html?id=%s&namespace=Ensembl_Transcript_Id&specie=%s";
 
 static char *getBaseAcc(char *acc, char *accBuf, int accBufSize)
 /* get the accession with version number dropped. */
 {
 safecpy(accBuf, accBufSize, acc);
 char *dot = strchr(accBuf, '.');
 if (dot != NULL)
     *dot = '\0';
 return accBuf;
 }
 
+static bool haveGencodeTable(struct trackDb *tdb, char *tableBase)
+/* determine if table is in settings and thus in this gencode release */
+{
+return trackDbSetting(tdb, tableBase) != NULL;
+}
+
 static char *getGencodeTable(struct trackDb *tdb, char *tableBase)
 /* get a table name from the settings. */
 {
 return trackDbRequiredSetting(tdb, tableBase);
 }
 
 static int transAnnoCmp(const void *va, const void *vb)
 /* Compare genePreds, sorting to keep select gene first.  The only cases
  * that annotations will be duplicated is if they are in the PAR and thus
  * on different chroms. */
 {
 const struct genePred *a = *((struct genePred **)va);
 const struct genePred *b = *((struct genePred **)vb);
 if (sameString(a->name, seqName))
     return -1;
@@ -431,30 +438,65 @@
         {
         if (pubMed != NULL)
             {
             writePubMedEntry(pubMed);
             pubMed = pubMed->next;
             }
         else
             printf("<td width=\"33.33%%\">");
         }
     printf("</tr>\n");
     rowCnt++;
     }
 printf("</tbody></table>\n");
 }
 
+static void writeEntrezGeneEntry(struct wgEncodeGencodeEntrezGene *entrezGene)
+/* write HTML table entry for a entrezGene */
+{
+printf("<td width=\"33.33%%\"><a href=\"");
+printEntrezGeneUrl(stdout, entrezGene->entrezGeneId);
+printf("\" target=_blank>%d</a>", entrezGene->entrezGeneId);
+}
+
+static void writeEntrezGeneLinkHtml(struct wgEncodeGencodeEntrezGene *entrezGenes)
+/* write HTML links to EntrezGene */
+{
+printf("<table class=\"hgcCcds\"><thead>\n");
+printf("<tr><th colspan=\"3\">Entrez Gene</tr>\n");
+printf("</thead><tbody>\n");
+struct wgEncodeGencodeEntrezGene *entrezGene = entrezGenes;
+int i, rowCnt = 0;
+while ((entrezGene != NULL) || (rowCnt == 0))
+    {
+    printf("<tr>");
+    for (i = 0; i < 3; i++)
+        {
+        if (entrezGene != NULL)
+            {
+            writeEntrezGeneEntry(entrezGene);
+            entrezGene = entrezGene->next;
+            }
+        else
+            printf("<td width=\"33.33%%\">");
+        }
+    printf("</tr>\n");
+    rowCnt++;
+    }
+printf("</tbody></table>\n");
+}
+
 static void writeRefSeqEntry(struct wgEncodeGencodeRefSeq *refSeq)
 /* write HTML table entry for a RefSeq */
 {
 printf("<td width=\"50%%\"><a href=\"");
 printEntrezNucleotideUrl(stdout, refSeq->rnaAcc);
 printf("\" target=_blank>%s</a>", refSeq->rnaAcc);
 printf("<td width=\"50%%\">");
 if (!isEmpty(refSeq->pepAcc))
     {
     printf("<a href=\"");
     printEntrezProteinUrl(stdout, refSeq->pepAcc);
     printf("\" target=_blank>%s</a>", refSeq->pepAcc);
     }
 }
 
@@ -669,75 +711,80 @@
             printf("<td width=\"33.33%%\">");
         }
     printf("</tr>\n");
     rowCnt++;
     }
 printf("</tbody></table>\n");
 }
 
 static void doGencodeGeneTrack(struct trackDb *tdb, char *gencodeId, struct sqlConnection *conn, struct genePred *transAnno)
 /* Process click on a GENCODE gene annotation track. */
 {
 struct wgEncodeGencodeAttrs *transAttrs = transAttrsLoad(tdb, conn, gencodeId);
 char *gencodeGeneId = transAttrs->geneId;
 struct wgEncodeGencodeGeneSource *geneSource = metaDataLoad(tdb, conn, gencodeGeneId, "wgEncodeGencodeGeneSource", "geneId", sqlQueryMust|sqlQuerySingle, (sqlLoadFunc)wgEncodeGencodeGeneSourceLoad);
 struct wgEncodeGencodeTranscriptSource *transcriptSource = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTranscriptSource", "transcriptId", sqlQueryMust|sqlQuerySingle, (sqlLoadFunc)wgEncodeGencodeTranscriptSourceLoad);
-bool haveRemarks = (trackDbSetting(tdb, "wgEncodeGencodeAnnotationRemark") != NULL);
+bool haveRemarks = haveGencodeTable(tdb, "wgEncodeGencodeAnnotationRemark");
 struct wgEncodeGencodeAnnotationRemark *remarks = haveRemarks ? metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeAnnotationRemark", "transcriptId", 0, (sqlLoadFunc)wgEncodeGencodeAnnotationRemarkLoad) : NULL;
 struct wgEncodeGencodePdb *pdbs = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodePdb", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodePdbLoad);
 struct wgEncodeGencodePubMed *pubMeds = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodePubMed", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodePubMedLoad);
+bool haveEntrezGene = haveGencodeTable(tdb, "wgEncodeGencodeEntrezGene");
+struct wgEncodeGencodeEntrezGene *entrezGenes = haveEntrezGene ? metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeEntrezGene", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeEntrezGeneLoad) : NULL;
 struct wgEncodeGencodeRefSeq *refSeqs = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeRefSeq", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeRefSeqLoad);
 struct wgEncodeGencodeTag *tags = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTag", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeTagLoad);
 struct wgEncodeGencodeTranscriptSupport *transcriptSupports = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTranscriptSupport", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeTranscriptSupportLoad);
 struct wgEncodeGencodeExonSupport *exonSupports = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeExonSupport", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeExonSupportLoad);
 struct wgEncodeGencodeUniProt *uniProts = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeUniProt", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeUniProtLoad);
 slSort(&uniProts, uniProtDatasetCmp);
-bool haveTsl = (trackDbSetting(tdb, "wgEncodeGencodeTranscriptionSupportLevel") != NULL);
+bool haveTsl = haveGencodeTable(tdb, "wgEncodeGencodeTranscriptionSupportLevel");
 struct wgEncodeGencodeTranscriptionSupportLevel *tsl = haveTsl ? metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTranscriptionSupportLevel", "transcriptId", 0, (sqlLoadFunc)wgEncodeGencodeTranscriptionSupportLevelLoad) : NULL;
 
 int geneChromStart, geneChromEnd;
 getGeneBounds(tdb, conn, transAnno, &geneChromStart, &geneChromEnd);
 
 char *title = "GENCODE Transcript Annotation";
 char header[256];
 safef(header, sizeof(header), "%s %s", title, gencodeId);
 if (!isEmpty(transAttrs->geneName))
     safef(header, sizeof(header), "%s %s (%s)", title, gencodeId, transAttrs->geneName);
 else
     safef(header, sizeof(header), "%s %s", title, gencodeId);
 cartWebStart(cart, database, "%s", header);
 printf("<H2>%s</H2>\n", header);
 
 writeBasicInfoHtml(conn, tdb, gencodeId, transAnno, transAttrs, geneChromStart, geneChromEnd, geneSource, transcriptSource, haveTsl, tsl);
 writeTagLinkHtml(tags);
 writeSequenceHtml(tdb, gencodeId, transAnno);
 if (haveRemarks)
     writeAnnotationRemarkHtml(remarks);
 if (isProteinCodingTrans(transAttrs))
     writePdbLinkHtml(pdbs);
 writePubMedLinkHtml(pubMeds);
+if (haveEntrezGene)
+    writeEntrezGeneLinkHtml(entrezGenes);
 writeRefSeqLinkHtml(refSeqs);
 if (isProteinCodingTrans(transAttrs))
     writeUniProtLinkHtml(uniProts);
 writeSupportingEvidenceLinkHtml(gencodeId, transcriptSupports, exonSupports);
 
 wgEncodeGencodeAttrsFree(&transAttrs);
 wgEncodeGencodeAnnotationRemarkFreeList(&remarks);
 wgEncodeGencodeGeneSourceFreeList(&geneSource);
 wgEncodeGencodeTranscriptSourceFreeList(&transcriptSource);
 wgEncodeGencodePdbFreeList(&pdbs);
 wgEncodeGencodePubMedFreeList(&pubMeds);
+wgEncodeGencodeEntrezGeneFreeList(&entrezGenes);
 wgEncodeGencodeRefSeqFreeList(&refSeqs);
 wgEncodeGencodeTranscriptSupportFreeList(&transcriptSupports);
 wgEncodeGencodeExonSupportFreeList(&exonSupports);
 wgEncodeGencodeUniProtFreeList(&uniProts);
 wgEncodeGencodeTranscriptionSupportLevelFreeList(&tsl);
 }
 
 static void doGencodeGene2WayPseudo(struct trackDb *tdb, char *gencodeId, struct sqlConnection *conn, struct genePred *pseudoAnno)
 /* Process click on a GENCODE two-way pseudogene annotation track. */
 {
 char header[256];
 safef(header, sizeof(header), "GENCODE 2-way consensus pseudogene %s", gencodeId);
 cartWebStart(cart, database, "%s", header);
 printf("<H2>%s</H2>\n", header);
 printf("<b>Yale id:</b> ");