e1b46b0188570dc6f03b14520ba708233ec56446
hiram
  Tue Aug 16 13:59:07 2011 -0700
now showing correctly protein coding vs not for Ensembl genes
diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c
index 374e3b4..4681f34 100644
--- src/hg/hgc/hgc.c
+++ src/hg/hgc/hgc.c
@@ -2359,66 +2359,85 @@
 }
 
 void showGenePos(char *name, struct trackDb *tdb)
 /* Show gene prediction position and other info. */
 {
 char *rootTable = tdb->table;
 char query[512];
 struct sqlConnection *conn = hAllocConn(database);
 struct genePred *gpList = NULL, *gp = NULL;
 boolean hasBin;
 char table[64];
 struct sqlResult *sr = NULL;
 char **row = NULL;
 char *classTable = trackDbSetting(tdb, GENEPRED_CLASS_TBL);
 
+
 hFindSplitTable(database, seqName, rootTable, table, &hasBin);
 safef(query, sizeof(query), "name = \"%s\"", name);
 gpList = genePredReaderLoadQuery(conn, table, query);
 for (gp = gpList; gp != NULL; gp = gp->next)
     {
     printPos(gp->chrom, gp->txStart, gp->txEnd, gp->strand, FALSE, NULL);
     if(sameString(tdb->type,"genePred")
     && startsWith("ENCODE Gencode",tdb->longLabel)
     && startsWith("ENST",name))
         {
         char *ensemblIdUrl = trackDbSetting(tdb, "ensemblIdUrl");
 
         printf("<b>Ensembl Transcript Id:&nbsp</b>");
         if (ensemblIdUrl != NULL)
             printf("<a href=\"%s%s\" target=\"_blank\">%s</a><br>", ensemblIdUrl,name,name);
         else
             printf("%s<br>",name);
         }
     if (gp->name2 != NULL && strlen(trimSpaces(gp->name2))> 0)
         {
         /* in Ensembl gene info downloaded from ftp site, sometimes the
            name2 field is populated with "noXref" because there is
            no alternate name. Replace this with "none" */
         printf("<b>Gene Symbol:");
         if (sameString(gp->name2, "noXref"))
            printf("</b> none<br>\n");
         else
            printf("</b> %s<br>\n",gp->name2);
         }
+    char *ensemblSource = NULL;
+    if (sameString("ensGene", table))
+	{
+	if (hTableExists(database, "ensemblSource"))
+	    {
+	    safef(query, sizeof(query),
+		"select source from ensemblSource where name='%s'", name);
+	    ensemblSource = sqlQuickString(conn, query);
+	    }
+	}
     if ((gp->exonFrames != NULL) && (!genbankIsRefSeqNonCodingMRnaAcc(gp->name)))
         {
+	if (ensemblSource && differentString("protein_coding",ensemblSource))
+	    {
+	    printf("<b>CDS Start: </b> none (non-coding)<BR>\n");
+	    printf("<b>CDS End: </b> none (non-coding)<BR>\n");
+	    }
+	else
+	    {
         printf("<b>CDS Start: </b>");
         printCdsStatus((gp->strand[0] == '+') ? gp->cdsStartStat : gp->cdsEndStat);
         printf("<b>CDS End: </b>");
         printCdsStatus((gp->strand[0] == '+') ? gp->cdsEndStat : gp->cdsStartStat);
         }
+	}
     /* if a gene class table exists, get gene class and print */
     if (classTable != NULL)
         {
         if (hTableExists(database, classTable))
            {
            safef(query, sizeof(query),
                 "select class from %s where name = \"%s\"", classTable, name);
            sr = sqlGetResult(conn, query);
            /* print class */
            if ((row = sqlNextRow(sr)) != NULL)
               printf("<b>Prediction Class:</b> %s<br>\n", row[0]);
            sqlFreeResult(&sr);
            if (sqlFieldIndex(conn, classTable, "level") > 0 )
                {
                safef(query, sizeof(query),
@@ -8241,33 +8260,39 @@
     warn("Organism %s not found!", organism); fflush(stdout);
     return;
     }
 
 /* print URL that links to Ensembl transcript details */
 if (archive != NULL)
     safef(ensUrl, sizeof(ensUrl), "http://%s.archive.ensembl.org/%s",
             archive, genomeStrEnsembl);
 else
     safef(ensUrl, sizeof(ensUrl), "http://www.ensembl.org/%s", genomeStrEnsembl);
 
 char query[512];
 char *geneName = NULL;
 if (hTableExists(database, "ensemblToGeneName"))
     {
-    safef(query, sizeof(query), "select value from ensemblToGeneName where name='%s' limit 1", itemName);
+    safef(query, sizeof(query), "select value from ensemblToGeneName where name='%s'", itemName);
     geneName = sqlQuickString(conn, query);
     }
+char *ensemblSource = NULL;
+if (hTableExists(database, "ensemblSource"))
+    {
+    safef(query, sizeof(query), "select source from ensemblSource where name='%s'", itemName);
+    ensemblSource = sqlQuickString(conn, query);
+    }
 
 boolean nonCoding = FALSE;
 safef(query, sizeof(query), "name = \"%s\"", itemName);
 struct genePred *gpList = genePredReaderLoadQuery(conn, "ensGene", query);
 if (gpList && gpList->name2)
     {
     if (gpList->cdsStart == gpList->cdsEnd)
 	nonCoding = TRUE;
     printf("<B>Ensembl Gene Link: </B>");
     if (sameString(gpList->name2, "noXref"))
        printf("none<BR>\n");
     else
        printf("<A HREF=\"%s/geneview?gene=%s\" "
 	    "target=_blank>%s</A><BR>", ensUrl, gpList->name2, gpList->name2);
     }
@@ -8385,30 +8410,35 @@
 	printf("<B>Ensembl Protein: </B>");
 	printf("<A HREF=\"%s/protview?peptide=%s\" target=_blank>",
 	    ensUrl,proteinID);
 	printf("%s</A><BR>\n", proteinID);
 	}
     else
 	{
 	printf("<B>Ensembl Protein: </B>none (non-coding)<BR>\n");
 	}
     }
 if (geneName)
     {
     printf("<B>Gene Name: </B>%s<BR>\n", geneName);
     freeMem(geneName);
     }
+if (ensemblSource)
+    {
+    printf("<B>Ensembl type: </B>%s<BR>\n", ensemblSource);
+    freeMem(ensemblSource);
+    }
 freeMem(shortItemName);
 }
 
 void printEnsemblOrVegaCustomUrl(struct trackDb *tdb, char *itemName, boolean encode, char *archive)
 /* Print Ensembl Gene URL. */
 {
 boolean isEnsembl = FALSE;
 boolean isVega = FALSE;
 boolean hasEnsGtp = FALSE;
 boolean hasVegaGtp = FALSE;
 char *shortItemName;
 char *genomeStrEnsembl = "";
 struct sqlConnection *conn = hAllocConn(database);
 char cond_str[256], cond_str2[256];
 char *geneID = NULL;