fcdf5c401c80840d28c346409c4fbb527544fac7
markd
  Tue Jun 9 13:59:33 2020 -0700
make gencode hgc robust against metadata that is sometimes not mapped in the backmap releases

diff --git src/hg/hgc/gencodeClick.c src/hg/hgc/gencodeClick.c
index 953ccc9..e31305c 100644
--- src/hg/hgc/gencodeClick.c
+++ src/hg/hgc/gencodeClick.c
@@ -46,30 +46,32 @@
 
 static char *ensemblH37TranscriptIdUrl = "http://grch37.ensembl.org/%s/Transcript/Summary?db=core;t=%s";
 static char *ensemblH37GeneIdUrl = "http://grch37.ensembl.org/%s/Gene/Summary?db=core;t=%s";
 static char *ensemblH37ProteinIdUrl = "http://grch37.ensembl.org/%s/Transcript/ProteinSummary?db=core;t=%s";
 static char *ensemblH37SupportingEvidUrl = "http://grch37.ensembl.org/%s/Transcript/SupportingEvidence?db=core;t=%s";
 
 static char *gencodeBiotypesUrl = "http://www.gencodegenes.org/pages/biotypes.html";
 static char *gencodeTagsUrl = "http://www.gencodegenes.org/pages/tags.html";
 
 static char *yalePseudoUrl = "http://tables.pseudogene.org/%s";
 static char *hgncUrl = " https://www.genenames.org/data/gene-symbol-report/#!/symbol/%s";
 static char *geneCardsUrl = "http://www.genecards.org/cgi-bin/carddisp.pl?gene=%s";
 static char *apprisHomeUrl = "http://appris-tools.org/";
 static char *apprisGeneUrl = "http://appris-tools.org/#/database/id/%s/%s?sc=ensembl";
 
+static char* UNKNOWN = "unknown";
+
 static char *getBaseAcc(char *acc, char *accBuf, int accBufSize)
 /* get the accession with version number dropped. */
 {
 safecpy(accBuf, accBufSize, acc);
 char *dot = strchr(accBuf, '.');
 if (dot != NULL)
     *dot = '\0';
 return accBuf;
 }
 
 static bool haveGencodeTable(struct trackDb *tdb, char *tableBase)
 /* determine if table is in settings and thus in this gencode release */
 {
 return trackDbSetting(tdb, tableBase) != NULL;
 }
@@ -179,30 +181,32 @@
 return sqlQueryObjs(conn, loadFunc, queryOpts, "select * from %s where %s = \"%s\"",
                     getGencodeTable(tdb, tableBase), keyCol, gencodeId);
 }
 
 static int uniProtDatasetCmp(const void *va, const void *vb)
 /* Compare wgEncodeGencodeUniProt by dateset */
 {
 const struct wgEncodeGencodeUniProt *a = *((struct wgEncodeGencodeUniProt **)va);
 const struct wgEncodeGencodeUniProt *b = *((struct wgEncodeGencodeUniProt **)vb);
 return a->dataset - b->dataset;
 }
 
 static char *getMethodDesc(char *source)
 /* return the annotation method name based gene or transcript source */
 {
+// sometimes backmap doesn't get every entry method entry mapped.  Until that
+// is fixed, allow it to be missing
 // looks for being havana and/or ensembl
 // classifies other sources as automatic (mt_genbank_import ncrna ncrna_pseudogene)
 bool hasHav = containsStringNoCase(source, "havana") != NULL;
 bool hasEns = containsStringNoCase(source, "ensembl") != NULL;
 if (hasHav && hasEns)
     return "manual & automatic";
 else if (hasHav)
     return "manual";
 else
     return "automatic";
 }
 
 static char *getLevelDesc(int level)
 /* return english description for level */
 {
@@ -410,31 +414,33 @@
 printf("</tr>\n");
 
 printf("<tr><th>Position");
 printf("<td>");
 writePosLink(transAnno->chrom, transAnno->txStart, transAnno->txEnd);
 printf("<td>");
 writePosLink(transAnno->chrom, geneChromStart, geneChromEnd);
 printf("</tr>\n");
 
 printf("<tr><th>Strand<td>%s<td></tr>\n", transAnno->strand);
 
 printf("<tr><th><a href=\"%s\" target = _blank>Biotype</a><td>%s<td>%s</tr>\n", gencodeBiotypesUrl, transAttrs->transcriptType, transAttrs->geneType);
 
 printf("<tr><th>Annotation Level<td>%s (%d)<td></tr>\n", getLevelDesc(transAttrs->level), transAttrs->level);
 
-printf("<tr><th>Annotation Method<td>%s<td>%s</tr>\n", getMethodDesc(transcriptSource->source), getMethodDesc(geneSource->source));
+char *transSrcDesc = (transcriptSource != NULL) ? getMethodDesc(transcriptSource->source) : UNKNOWN;
+char *geneSrcDesc = (geneSource != NULL) ? getMethodDesc(geneSource->source) : UNKNOWN;
+printf("<tr><th>Annotation Method<td>%s<td>%s</tr>\n", transSrcDesc, geneSrcDesc);
 
 if (haveTsl)
     {
     char *tslDesc = getSupportLevelDesc(tsl);
     printf("<tr><th><a href=\"#tsl\">Transcription Support Level</a><td><a href=\"#%s\">%s</a><td></tr>\n", tslDesc, tslDesc);
     }
 printf("<tr><th>HGNC gene symbol<td colspan=2>");
 if (!isFakeGeneSymbol(transAttrs->geneName))
     prExtIdAnchor(transAttrs->geneName, hgncUrl);
 printf("</tr>\n");
 
 printf("<tr><th>CCDS<td>");
 if (!isEmpty(transAttrs->ccdsId))
     {
     printf("<a href=\"");
@@ -822,32 +828,32 @@
             }
         else
             printf("<td width=\"33.33%%\">");
         }
     printf("</tr>\n");
     rowCnt++;
     }
 printf("</tbody></table>\n");
 }
 
 static void doGencodeGeneTrack(struct trackDb *tdb, char *gencodeId, struct sqlConnection *conn, struct genePred *transAnno)
 /* Process click on a GENCODE gene annotation track. */
 {
 struct wgEncodeGencodeAttrs *transAttrs = transAttrsLoad(tdb, conn, gencodeId);
 char *gencodeGeneId = transAttrs->geneId;
-struct wgEncodeGencodeGeneSource *geneSource = metaDataLoad(tdb, conn, gencodeGeneId, "wgEncodeGencodeGeneSource", "geneId", sqlQueryMust|sqlQuerySingle, (sqlLoadFunc)wgEncodeGencodeGeneSourceLoad);
-struct wgEncodeGencodeTranscriptSource *transcriptSource = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTranscriptSource", "transcriptId", sqlQueryMust|sqlQuerySingle, (sqlLoadFunc)wgEncodeGencodeTranscriptSourceLoad);
+struct wgEncodeGencodeGeneSource *geneSource = metaDataLoad(tdb, conn, gencodeGeneId, "wgEncodeGencodeGeneSource", "geneId", sqlQuerySingle, (sqlLoadFunc)wgEncodeGencodeGeneSourceLoad);
+struct wgEncodeGencodeTranscriptSource *transcriptSource = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTranscriptSource", "transcriptId", sqlQuerySingle, (sqlLoadFunc)wgEncodeGencodeTranscriptSourceLoad);
 bool haveRemarks = haveGencodeTable(tdb, "wgEncodeGencodeAnnotationRemark");
 struct wgEncodeGencodeAnnotationRemark *remarks = haveRemarks ? metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeAnnotationRemark", "transcriptId", 0, (sqlLoadFunc)wgEncodeGencodeAnnotationRemarkLoad) : NULL;
 struct wgEncodeGencodePdb *pdbs = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodePdb", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodePdbLoad);
 struct wgEncodeGencodePubMed *pubMeds = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodePubMed", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodePubMedLoad);
 bool haveEntrezGene = haveGencodeTable(tdb, "wgEncodeGencodeEntrezGene");
 struct wgEncodeGencodeEntrezGene *entrezGenes = haveEntrezGene ? metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeEntrezGene", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeEntrezGeneLoad) : NULL;
 struct wgEncodeGencodeRefSeq *refSeqs = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeRefSeq", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeRefSeqLoad);
 struct wgEncodeGencodeTag *tags = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTag", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeTagLoad);
 struct wgEncodeGencodeTranscriptSupport *transcriptSupports = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeTranscriptSupport", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeTranscriptSupportLoad);
 struct wgEncodeGencodeExonSupport *exonSupports = NULL;
 // exonSupports not available in back mapped GENCODE releases
 if (haveGencodeTable(tdb, "wgEncodeGencodeExonSupport"))
     exonSupports = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeExonSupport", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeExonSupportLoad);
 struct wgEncodeGencodeUniProt *uniProts = metaDataLoad(tdb, conn, gencodeId, "wgEncodeGencodeUniProt", "transcriptId", sqlQueryMulti, (sqlLoadFunc)wgEncodeGencodeUniProtLoad);
 slSort(&uniProts, uniProtDatasetCmp);