25a1ed58b2f9fc2d9ad7fdbe9117f0b2f47253f3
fanhsu
  Tue Nov 9 16:38:23 2010 -0800
Update for RGD Genes, Redmine issue #29.
diff --git src/hg/hgGene/rgdInfo.c src/hg/hgGene/rgdInfo.c
new file mode 100644
index 0000000..75b2932
--- /dev/null
+++ src/hg/hgGene/rgdInfo.c
@@ -0,0 +1,330 @@
+/* rgdGeneInfo - functions related to RGD Genes */
+
+#include "common.h"
+#include "hash.h"
+#include "linefile.h"
+#include "dystring.h"
+#include "hdb.h"
+#include "hgGene.h"
+
+// define external URLs used
+#define  RGD_GENE_URL   "http://rgd.mcw.edu/tools/genes/genes_view.cgi?id="
+#define  PUBMED_URL	"http://www.ncbi.nlm.nih.gov/pubmed/"
+#define  GBNK_PROT_URL	"http://www.ncbi.nlm.nih.gov/protein/"
+#define  GBNK_DNA_URL	"http://www.ncbi.nlm.nih.gov/nuccore/"
+#define  ENTREZ_URL	"http://www.ncbi.nlm.nih.gov/sites/entrez?db=gene&cmd=retrieve&list_uids="
+#define  UNIPROT_URL	"http://www.uniprot.org/uniprot/"
+#define  UNIGENE_URL	"http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Rn&CID="
+#define  ENSEMBL_URL	"http://www.ensembl.org/Rattus_norvegicus/Gene/Summary?g="
+#define  TIGR_URL	"http://compbio.dfci.harvard.edu/tgi/cgi-bin/tgi/tc_report.pl?gudb=rat&tc="
+#define  SSLP_URL	"http://rgd.mcw.edu/objectSearch/sslpReport.jsp?rgd_id="
+#define  QTL_URL	"http://rgd.mcw.edu/objectSearch/qtlReport.jsp?rgd_id="
+#define  IMAGE_URL	"http://image.hudsonalpha.org/IQ/bin/singleCloneQuery?clone_id="
+#define  MGC_URL	"http://mgc.nci.nih.gov/Genes/CloneList?ORG=Rn&LIST="
+
+char *getRgdGeneId(struct sqlConnection *conn, char *geneId)
+/* Return rgdGene ID for now. */
+{
+return(geneId);
+}
+
+char *getRgdGeneUniProtAcc(char *geneId, struct sqlConnection *conn)
+/* get UniProt Acc from an RGD Gene ID */
+{
+char query[256];
+struct sqlResult *sr;
+char **row;
+char *protAcc;
+
+safef(query, sizeof(query), "select value from rgdGene2ToUniProt where name = '%s'", geneId);
+sr = sqlGetResult(conn, query);
+
+row = sqlNextRow(sr);
+if (row != NULL)
+    {
+    protAcc = strdup(row[0]);
+    sqlFreeResult(&sr);
+    return(protAcc);
+    }
+else
+    {
+    sqlFreeResult(&sr);
+    return(NULL);
+    }
+}
+
+boolean isRgdGene(struct sqlConnection *conn)
+/* Return true if the gene set is RGD Genes. */
+{
+/* The existence of the table genes_rat storing GENES_RAT data from RGD indicates that we 
+are using RGD Genes as our main gene set for this genome. */
+return(hTableExists(sqlGetDatabase(conn), "genes_rat"));
+}
+
+static boolean rgdGeneInfoExists(struct section *section, 
+	struct sqlConnection *conn, char *geneId)
+/* Return TRUE if rgdGene info tables exist. */
+{
+char *rgdGeneId = getRgdGeneId(conn, geneId);
+
+if (rgdGeneId == NULL)
+    return FALSE;
+else
+    return(TRUE);
+}
+
+void do1Colx(char **row, int iCol, char *colName, char *entryName, char *externalUrl)
+    {
+    char *chp, *chp1;
+    if (strlen(row[iCol]) > 0) 
+    	{
+	printf("<b>%s:</b>\n", entryName);
+
+	if (externalUrl == NULL)
+	    {
+	    printf("%s<br>", row[iCol]);
+	    return;
+	    }
+	chp1 = row[iCol];
+	chp  = chp1;
+	chp++;
+	while (*chp != '\0')
+	    {
+	    if (*chp == ',')
+	    	{
+		*chp = '\0';
+		printf("<A HREF=\"%s", externalUrl);
+		printf("%s", chp1);fflush(stdout);
+		printf("\" TARGET=_blank>%s</A>, ", chp1);fflush(stdout);
+
+		chp1 = chp;
+		chp1++;
+		}
+	    chp ++;
+    	    }
+	
+	/* print the last entry */
+	printf("<A HREF=\"%s", externalUrl);
+	printf("%s", chp1);
+	printf("\" TARGET=_blank>%s</A>", chp1);
+	printf("<br>\n");
+	}	
+    }
+
+void do1Col(char **row, int iCol, char *colName)
+    {
+    if (strlen(row[iCol]) > 0) 
+    	{
+	printf("<b>%s:</b> %s<br>\n", colName, row[iCol]);fflush(stdout);
+    	}
+    }
+
+static void rgdGeneInfoPrint(struct section *section, 
+	struct sqlConnection *conn, char *geneId)
+/* Print out RgdGene info. */
+{
+char *rgdGeneId = getRgdGeneId(conn, geneId);
+char query[256], **row;
+struct sqlResult *sr;
+char *chp;
+int iCol;
+
+chp = strstr(rgdGeneId, ":"); chp++;
+safef(query, sizeof(query),
+      "select * from %s where gene_rgd_id='%s'", section->rgdGeneTable, chp);
+sr = sqlGetResult(conn, query);
+if ((row = sqlNextRow(sr)) != NULL)
+    	{
+    	/* Data of some columns are not displayed.
+	   The statements for those are left there but commented out for documentation purpose.
+	   In case we need additional variables displayed, just un-comment the lines.
+	*/
+
+	iCol = 0;
+	do1Colx(row, iCol, "gene_rgd_id", "RGD Gene", RGD_GENE_URL); 
+	
+	iCol++;
+	do1Colx(row, iCol, "symbol", "Gene Symbol", NULL);
+	
+	iCol++;
+	do1Colx(row, iCol, "name", "Name", NULL); 
+	
+	iCol++;
+	//do1Col(row, iCol, "gene_desc"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "chromosome_celera"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "chromosome_31"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "chromosome_34"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "fish_band"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "start_pos_celera"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "stop_pos_celera"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "strand_celera"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "start_pos_31"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "stop_pos_31"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "strand_31"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "start_pos_34"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "stop_pos_34"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "strand_34"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "curated_ref_rgd_id"); 
+	
+	iCol++;
+	do1Colx(row, iCol, "curated_ref_pubmed_id", "Curated Pubmed Papers", PUBMED_URL); 
+	
+	iCol++;
+	do1Colx(row, iCol, "uncurated_pubmed_id", "Uncurated Pubmed Papers", PUBMED_URL); 
+	
+	iCol++;
+	//do1Col(row, iCol, "ratmap_id"); 
+	
+	iCol++;
+	do1Colx(row, iCol, "entrez_gene", "Entrez Gene", ENTREZ_URL); 
+	
+	iCol++;
+	do1Colx(row, iCol, "uniprot_id", "UniProt", UNIPROT_URL); 
+	
+	iCol++;
+	//do1Col(row, iCol, "rhdb_id"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "uncurated_ref_medline_id"); 
+	
+	iCol++;
+	do1Colx(row, iCol, "genbank_nucleotide", "GenBank Nucleotide", GBNK_DNA_URL); 
+	
+	iCol++;
+	do1Colx(row, iCol, "tigr_id", "TIGR", TIGR_URL); 
+	
+	iCol++;
+	do1Colx(row, iCol, "genbank_protein", "Genebank Protein", GBNK_PROT_URL); 
+	
+	iCol++;
+	do1Colx(row, iCol, "unigene_id", "UniGene", UNIGENE_URL); 
+	
+	iCol++;
+	//do1Col(row, iCol, "gdb_id"); 
+	
+	iCol++;
+	do1Colx(row, iCol, "sslp_rgd_id", "RGD SSLP", SSLP_URL); 
+	
+	iCol++;
+	//do1Col(row, iCol, "sslp_symbol"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "old_symbol"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "old_name"); 
+	
+	iCol++;
+	do1Colx(row, iCol, "qtl_rgd_id", "RGD QTL", QTL_URL); 
+	
+	iCol++;
+	//do1Col(row, iCol, "qtl_symbol"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "nomenclature_status"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "splice_rgd_id"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "splice_symbol"); 
+	
+	iCol++;
+	//do1Col(row, iCol, "gene_type"); 
+	
+	iCol++;
+	do1Colx(row, iCol, "ensembl_id", "Ensembl Gene", ENSEMBL_URL); 
+    }
+sqlFreeResult(&sr);
+
+/* display IMAGE info */
+safef(query, sizeof(query), "select info from rgdGene2Xref where rgdGeneId='%s' and infoType='IMAGE'", rgdGeneId);
+sr = sqlGetResult(conn, query);
+row = sqlNextRow(sr);
+if (row != NULL)
+    {
+    printf("<b>IMAGE CLONE: </b>");
+
+    while (row != NULL)
+    	{
+    	printf("<A HREF=\"%s", IMAGE_URL);
+    	printf("%s", row[0]);fflush(stdout);
+    	printf("\" TARGET=_blank>%s</A>", row[0]);fflush(stdout);
+	row = sqlNextRow(sr);
+	if (row != NULL) printf(", ");
+        }
+    printf("<br>");
+    }
+sqlFreeResult(&sr);
+
+/* display MGC info */
+safef(query, sizeof(query), "select info from rgdGene2Xref where rgdGeneId='%s' and infoType='MGC'", rgdGeneId);
+sr = sqlGetResult(conn, query);
+row = sqlNextRow(sr);
+if (row != NULL)
+    {
+    printf("<b>MGC: </b>");
+
+    while (row != NULL)
+    	{
+    	printf("<A HREF=\"%s", MGC_URL);
+    	printf("%s", row[0]);fflush(stdout);
+    	printf("\" TARGET=_blank>%s</A>", row[0]);fflush(stdout);
+	row = sqlNextRow(sr);
+	if (row != NULL) printf(", ");
+        }
+    printf("<br>");
+    }
+sqlFreeResult(&sr);
+}
+
+struct section *rgdGeneInfoSection(struct sqlConnection *conn,
+	struct hash *sectionRa, char *sectionName, char *table)
+/* Create RgdGene info section. */
+{
+struct section *section = sectionNew(sectionRa, sectionName);
+if (section != NULL)
+    {
+    section->exists       = rgdGeneInfoExists;
+    section->print 	  = rgdGeneInfoPrint;
+    section->rgdGeneTable = table;
+    }
+return section;
+}
+
+/* display info from downloaded raw data file */
+struct section *rgdGeneRawSection(struct sqlConnection *conn,
+	struct hash *sectionRa)
+/* Create RgdGene roles section. */
+{
+return rgdGeneInfoSection(conn, sectionRa, "rgdGeneRaw", "genes_rat");
+}
+