6dd383de66888d8081a22ded91063bc5dc2d7061
hiram
Thu Jan 26 16:22:17 2012 -0800
change rn4 table name of genes_rat to rgdGene2Raw
diff --git src/hg/hgGene/rgdInfo.c src/hg/hgGene/rgdInfo.c
index 75b2932..aa756c1 100644
--- src/hg/hgGene/rgdInfo.c
+++ src/hg/hgGene/rgdInfo.c
@@ -1,330 +1,331 @@
/* rgdGeneInfo - functions related to RGD Genes */
#include "common.h"
#include "hash.h"
#include "linefile.h"
#include "dystring.h"
#include "hdb.h"
#include "hgGene.h"
// define external URLs used
#define RGD_GENE_URL "http://rgd.mcw.edu/tools/genes/genes_view.cgi?id="
#define PUBMED_URL "http://www.ncbi.nlm.nih.gov/pubmed/"
#define GBNK_PROT_URL "http://www.ncbi.nlm.nih.gov/protein/"
#define GBNK_DNA_URL "http://www.ncbi.nlm.nih.gov/nuccore/"
#define ENTREZ_URL "http://www.ncbi.nlm.nih.gov/sites/entrez?db=gene&cmd=retrieve&list_uids="
#define UNIPROT_URL "http://www.uniprot.org/uniprot/"
#define UNIGENE_URL "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Rn&CID="
#define ENSEMBL_URL "http://www.ensembl.org/Rattus_norvegicus/Gene/Summary?g="
#define TIGR_URL "http://compbio.dfci.harvard.edu/tgi/cgi-bin/tgi/tc_report.pl?gudb=rat&tc="
#define SSLP_URL "http://rgd.mcw.edu/objectSearch/sslpReport.jsp?rgd_id="
#define QTL_URL "http://rgd.mcw.edu/objectSearch/qtlReport.jsp?rgd_id="
#define IMAGE_URL "http://image.hudsonalpha.org/IQ/bin/singleCloneQuery?clone_id="
#define MGC_URL "http://mgc.nci.nih.gov/Genes/CloneList?ORG=Rn&LIST="
char *getRgdGeneId(struct sqlConnection *conn, char *geneId)
/* Return rgdGene ID for now. */
{
return(geneId);
}
char *getRgdGeneUniProtAcc(char *geneId, struct sqlConnection *conn)
/* get UniProt Acc from an RGD Gene ID */
{
char query[256];
struct sqlResult *sr;
char **row;
char *protAcc;
safef(query, sizeof(query), "select value from rgdGene2ToUniProt where name = '%s'", geneId);
sr = sqlGetResult(conn, query);
row = sqlNextRow(sr);
if (row != NULL)
{
protAcc = strdup(row[0]);
sqlFreeResult(&sr);
return(protAcc);
}
else
{
sqlFreeResult(&sr);
return(NULL);
}
}
boolean isRgdGene(struct sqlConnection *conn)
/* Return true if the gene set is RGD Genes. */
{
-/* The existence of the table genes_rat storing GENES_RAT data from RGD indicates that we
-are using RGD Genes as our main gene set for this genome. */
-return(hTableExists(sqlGetDatabase(conn), "genes_rat"));
+/* The existence of the table rgdGene2Raw storing
+ * GENES_RAT data from RGD indicates that we are using RGD Genes as
+ * our main gene set for this genome. */
+return(hTableExists(sqlGetDatabase(conn), "rgdGene2Raw"));
}
static boolean rgdGeneInfoExists(struct section *section,
struct sqlConnection *conn, char *geneId)
/* Return TRUE if rgdGene info tables exist. */
{
char *rgdGeneId = getRgdGeneId(conn, geneId);
if (rgdGeneId == NULL)
return FALSE;
else
return(TRUE);
}
void do1Colx(char **row, int iCol, char *colName, char *entryName, char *externalUrl)
{
char *chp, *chp1;
if (strlen(row[iCol]) > 0)
{
printf("%s:\n", entryName);
if (externalUrl == NULL)
{
printf("%s
", row[iCol]);
return;
}
chp1 = row[iCol];
chp = chp1;
chp++;
while (*chp != '\0')
{
if (*chp == ',')
{
*chp = '\0';
printf("%s, ", chp1);fflush(stdout);
chp1 = chp;
chp1++;
}
chp ++;
}
/* print the last entry */
printf("%s", chp1);
printf("
\n");
}
}
void do1Col(char **row, int iCol, char *colName)
{
if (strlen(row[iCol]) > 0)
{
printf("%s: %s
\n", colName, row[iCol]);fflush(stdout);
}
}
static void rgdGeneInfoPrint(struct section *section,
struct sqlConnection *conn, char *geneId)
/* Print out RgdGene info. */
{
char *rgdGeneId = getRgdGeneId(conn, geneId);
char query[256], **row;
struct sqlResult *sr;
char *chp;
int iCol;
chp = strstr(rgdGeneId, ":"); chp++;
safef(query, sizeof(query),
"select * from %s where gene_rgd_id='%s'", section->rgdGeneTable, chp);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
{
/* Data of some columns are not displayed.
The statements for those are left there but commented out for documentation purpose.
In case we need additional variables displayed, just un-comment the lines.
*/
iCol = 0;
do1Colx(row, iCol, "gene_rgd_id", "RGD Gene", RGD_GENE_URL);
iCol++;
do1Colx(row, iCol, "symbol", "Gene Symbol", NULL);
iCol++;
do1Colx(row, iCol, "name", "Name", NULL);
iCol++;
//do1Col(row, iCol, "gene_desc");
iCol++;
//do1Col(row, iCol, "chromosome_celera");
iCol++;
//do1Col(row, iCol, "chromosome_31");
iCol++;
//do1Col(row, iCol, "chromosome_34");
iCol++;
//do1Col(row, iCol, "fish_band");
iCol++;
//do1Col(row, iCol, "start_pos_celera");
iCol++;
//do1Col(row, iCol, "stop_pos_celera");
iCol++;
//do1Col(row, iCol, "strand_celera");
iCol++;
//do1Col(row, iCol, "start_pos_31");
iCol++;
//do1Col(row, iCol, "stop_pos_31");
iCol++;
//do1Col(row, iCol, "strand_31");
iCol++;
//do1Col(row, iCol, "start_pos_34");
iCol++;
//do1Col(row, iCol, "stop_pos_34");
iCol++;
//do1Col(row, iCol, "strand_34");
iCol++;
//do1Col(row, iCol, "curated_ref_rgd_id");
iCol++;
do1Colx(row, iCol, "curated_ref_pubmed_id", "Curated Pubmed Papers", PUBMED_URL);
iCol++;
do1Colx(row, iCol, "uncurated_pubmed_id", "Uncurated Pubmed Papers", PUBMED_URL);
iCol++;
//do1Col(row, iCol, "ratmap_id");
iCol++;
do1Colx(row, iCol, "entrez_gene", "Entrez Gene", ENTREZ_URL);
iCol++;
do1Colx(row, iCol, "uniprot_id", "UniProt", UNIPROT_URL);
iCol++;
//do1Col(row, iCol, "rhdb_id");
iCol++;
//do1Col(row, iCol, "uncurated_ref_medline_id");
iCol++;
do1Colx(row, iCol, "genbank_nucleotide", "GenBank Nucleotide", GBNK_DNA_URL);
iCol++;
do1Colx(row, iCol, "tigr_id", "TIGR", TIGR_URL);
iCol++;
do1Colx(row, iCol, "genbank_protein", "Genebank Protein", GBNK_PROT_URL);
iCol++;
do1Colx(row, iCol, "unigene_id", "UniGene", UNIGENE_URL);
iCol++;
//do1Col(row, iCol, "gdb_id");
iCol++;
do1Colx(row, iCol, "sslp_rgd_id", "RGD SSLP", SSLP_URL);
iCol++;
//do1Col(row, iCol, "sslp_symbol");
iCol++;
//do1Col(row, iCol, "old_symbol");
iCol++;
//do1Col(row, iCol, "old_name");
iCol++;
do1Colx(row, iCol, "qtl_rgd_id", "RGD QTL", QTL_URL);
iCol++;
//do1Col(row, iCol, "qtl_symbol");
iCol++;
//do1Col(row, iCol, "nomenclature_status");
iCol++;
//do1Col(row, iCol, "splice_rgd_id");
iCol++;
//do1Col(row, iCol, "splice_symbol");
iCol++;
//do1Col(row, iCol, "gene_type");
iCol++;
do1Colx(row, iCol, "ensembl_id", "Ensembl Gene", ENSEMBL_URL);
}
sqlFreeResult(&sr);
/* display IMAGE info */
safef(query, sizeof(query), "select info from rgdGene2Xref where rgdGeneId='%s' and infoType='IMAGE'", rgdGeneId);
sr = sqlGetResult(conn, query);
row = sqlNextRow(sr);
if (row != NULL)
{
printf("IMAGE CLONE: ");
while (row != NULL)
{
printf("%s", row[0]);fflush(stdout);
row = sqlNextRow(sr);
if (row != NULL) printf(", ");
}
printf("
");
}
sqlFreeResult(&sr);
/* display MGC info */
safef(query, sizeof(query), "select info from rgdGene2Xref where rgdGeneId='%s' and infoType='MGC'", rgdGeneId);
sr = sqlGetResult(conn, query);
row = sqlNextRow(sr);
if (row != NULL)
{
printf("MGC: ");
while (row != NULL)
{
printf("%s", row[0]);fflush(stdout);
row = sqlNextRow(sr);
if (row != NULL) printf(", ");
}
printf("
");
}
sqlFreeResult(&sr);
}
struct section *rgdGeneInfoSection(struct sqlConnection *conn,
struct hash *sectionRa, char *sectionName, char *table)
/* Create RgdGene info section. */
{
struct section *section = sectionNew(sectionRa, sectionName);
if (section != NULL)
{
section->exists = rgdGeneInfoExists;
section->print = rgdGeneInfoPrint;
section->rgdGeneTable = table;
}
return section;
}
/* display info from downloaded raw data file */
struct section *rgdGeneRawSection(struct sqlConnection *conn,
struct hash *sectionRa)
/* Create RgdGene roles section. */
{
return rgdGeneInfoSection(conn, sectionRa, "rgdGeneRaw", "genes_rat");
}