dc23932b2dae91522d130c311ae3b28dbce488e6 braney Fri May 22 13:54:01 2015 -0700 add NCBI metadata to NCBI RefSeq track ( refs# 13673) diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c index 24f80f2..9fa8286 100644 --- src/hg/hgc/hgc.c +++ src/hg/hgc/hgc.c @@ -86,30 +86,31 @@ #include "cnpIafrate.h" #include "cnpIafrate2.h" #include "cnpLocke.h" #include "cnpSebat.h" #include "cnpSebat2.h" #include "cnpSharp.h" #include "cnpSharp2.h" #include "delHinds2.h" #include "delConrad2.h" #include "dgv.h" #include "dgvPlus.h" #include "tokenizer.h" #include "softberryHom.h" #include "borkPseudoHom.h" #include "sanger22extra.h" +#include "ncbiRefLink.h" #include "refLink.h" #include "hgConfig.h" #include "estPair.h" #include "softPromoter.h" #include "customTrack.h" #include "trackHub.h" #include "hubConnect.h" #include "sage.h" #include "sageExp.h" #include "pslWScore.h" #include "lfs.h" #include "mcnBreakpoints.h" #include "fishClones.h" #include "featureBits.h" #include "web.h" @@ -282,30 +283,31 @@ struct bed *sageExpList = NULL; char ncbiOmimUrl[255] = {"http://www.ncbi.nlm.nih.gov/omim/"}; struct palInfo { char *chrom; int left; int right; char *rnaName; }; /* See this NCBI web doc for more info about entrezFormat: * http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html */ char *entrezFormat = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Search&db=%s&term=%s&doptcmdl=%s&tool=genome.ucsc.edu"; char *entrezPureSearchFormat = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=PureSearch&db=%s&details_term=%s[%s] "; +char *ncbiGeneFormat = "http://www.ncbi.nlm.nih.gov/gene/%s"; char *entrezUidFormat = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=%s&list_uids=%d&dopt=%s&tool=genome.ucsc.edu"; /* db=unists is not mentioned in NCBI's doc... so stick with this usage: */ char *unistsnameScript = "http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?db=unists"; char *unistsScript = "http://www.ncbi.nlm.nih.gov/genome/sts/sts.cgi?uid="; char *gdbScript = "http://www.gdb.org/gdb-bin/genera/accno?accessionNum="; char *cloneDbScript = "http://www.ncbi.nlm.nih.gov/clone?term="; char *traceScript = "http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=retrieve&val="; char *genMapDbScript = "http://genomics.med.upenn.edu/perl/genmapdb/byclonesearch.pl?clone="; char *uniprotFormat = "http://www.uniprot.org/uniprot/%s"; /* variables for gv tables */ char *gvPrevCat = NULL; char *gvPrevType = NULL; /* initialized by getCtList() if necessary: */ @@ -314,30 +316,36 @@ /* getDNA stuff actually works when the database doesn't exist! */ boolean dbIsFound = FALSE; /* forwards */ char *getPredMRnaProtSeq(struct genePred *gp); void doAltGraphXDetails(struct trackDb *tdb, char *item); char* getEntrezNucleotideUrl(char *accession) /* get URL for Entrez browser on a nucleotide. free resulting string */ { char url[512]; safef(url, sizeof(url), entrezFormat, "Nucleotide", accession, "GenBank"); return cloneString(url); } +void printNcbiGeneUrl(FILE *f, char *gene) +/* Print URL for Entrez browser on a nucleotide. */ +{ +fprintf(f, ncbiGeneFormat, gene); +} + void printEntrezNucleotideUrl(FILE *f, char *accession) /* Print URL for Entrez browser on a nucleotide. */ { fprintf(f, entrezFormat, "Nucleotide", accession, "GenBank"); } void printEntrezEstUrl(FILE *f, char *accession) /* Print URL for Entrez browser on a nucleotide. */ { fprintf(f, entrezFormat, "nucest", accession, "GenBank"); } void printEntrezProteinUrl(FILE *f, char *accession) /* Print URL for Entrez browser on a protein. */ { @@ -11008,30 +11016,63 @@ char *org = sqlQuickString(conn, query); if (org == NULL) org = cloneString("unknown"); printf("<B>Organism:</B> %s<BR>", org); char *xenoDb = hDbForSciName(org); if ((xenoDb != NULL) && hDbIsActive(xenoDb) && hTableExists(xenoDb, "refSeqAli")) { printf("<B>UCSC browser: </B> \n"); linkToOtherBrowserSearch(xenoDb, rl->mrnaAcc); printf("%s on %s (%s)</B> \n", rl->mrnaAcc, hOrganism(xenoDb), xenoDb); printf("</A><BR>"); } freeMem(org); } +void prNcbiRefGeneInfo(struct sqlConnection *conn, char *rnaName, + char *sqlRnaName, struct ncbiRefLink *rl, boolean isPredicted) +/* print basic details information and links for a NCBI RefGene */ +{ + +printf("<td valign=top nowrap>\n"); +printf("<H2>NCBI RefSeq Gene %s</H2>\n", rl->id); +printf("<B>RefSeq:</B> <A HREF=\""); +printEntrezNucleotideUrl(stdout, rl->id); +printf("\" TARGET=_blank>%s</A><BR>", rl->id); + +if (!isEmpty(rl->gene)) + { + printf("<B>Gene name:</B> %s<BR>\n", rl->gene); + } +if (!isEmpty(rl->gbKey)) + { + printf("<B>Molecule type:</B> %s<BR>\n", rl->gbKey); + } +if (!isEmpty(rl->dbXref) && startsWith("GeneID:", rl->dbXref)) + { + char *geneId = strchr(rl->dbXref, ':'); + geneId++; + printf("<B>NCBI Gene:</B> <A HREF=\""); + printNcbiGeneUrl(stdout, geneId); + printf("\" TARGET=_blank>%s</A><BR>", geneId); + } +if (!isEmpty(rl->product)) + { + printf("<B>Product:</B> %s<BR>\n", rl->product); + } +} + void prRefGeneInfo(struct sqlConnection *conn, char *rnaName, char *sqlRnaName, struct refLink *rl, boolean isXeno) /* print basic details information and links for a RefGene */ { struct sqlResult *sr; char **row; char query[256]; int ver = gbCdnaGetVersion(conn, rl->mrnaAcc); char *cdsCmpl = NULL; printf("<td valign=top nowrap>\n"); if (isXeno) { if (startsWith("panTro", database)) printf("<H2>Other RefSeq Gene %s</H2>\n", rl->name); @@ -11332,30 +11373,75 @@ { AllocVar(palInfo); palInfo->chrom = chrom; palInfo->left = left; palInfo->right = right; palInfo->rnaName = rnaName; } geneShowPosAndLinksPal(rl->mrnaAcc, rl->protAcc, tdb, "refPep", "htcTranslatedProtein", "htcRefMrna", "htcGeneInGenome", "mRNA Sequence",palInfo); printTrackHtml(tdb); hFreeConn(&conn); } +void doNcbiRefGene(struct trackDb *tdb, char *rnaName) +/* Process click on a NCBI RefSeq gene. */ +{ +struct sqlConnection *conn = hAllocConn(database); +struct sqlResult *sr; +char **row; +char query[256]; +char *sqlRnaName = rnaName; +struct ncbiRefLink *rl; +boolean isPredicted = sameString(tdb->table, "ncbiRefPredicted"); + +/* Make sure to escape single quotes for DB parseability */ +if (strchr(rnaName, '\'')) + { + sqlRnaName = replaceChars(rnaName, "'", "''"); + } +/* get refLink entry */ +sqlSafef(query, sizeof(query), "select * from ncbiRefLink where id = '%s'", sqlRnaName); +sr = sqlGetResult(conn, query); +if ((row = sqlNextRow(sr)) == NULL) + errAbort("Couldn't find %s in ncbiRefLink table.", rnaName); +rl = ncbiRefLinkLoad(row); +sqlFreeResult(&sr); + +/* print the first section with info */ +if (isPredicted) + cartWebStart(cart, database, "NCBI Predicted RefSeq Gene"); +else + cartWebStart(cart, database, "NCBI Curated RefSeq Gene"); +printf("<table border=0>\n<tr>\n"); +prNcbiRefGeneInfo(conn, rnaName, sqlRnaName, rl, isPredicted); + +printf("</tr>\n</table>\n"); + +htmlHorizontalLine(); + +struct palInfo *palInfo = NULL; + + +geneShowPosAndLinksPal(rl->id, NULL, tdb, NULL, "htcTranslatedProtein", + "htcGeneMrna", "htcGeneInGenome", "mRNA Sequence",palInfo); + +printTrackHtml(tdb); +hFreeConn(&conn); +} void doRefGene(struct trackDb *tdb, char *rnaName) /* Process click on a known RefSeq gene. */ { struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; char **row; char query[256]; char *sqlRnaName = rnaName; char *summary = NULL; boolean isXeno = sameString(tdb->table, "xenoRefGene"); struct refLink *rl; int start = cartInt(cart, "o"); int left = cartInt(cart, "l"); int right = cartInt(cart, "r"); char *chrom = cartString(cart, "c"); @@ -24807,30 +24893,34 @@ { doSuperfamily(tdb, item, NULL); } else if (sameWord(table, "ensGene") || sameWord (table, "ensGeneNonCoding")) { doEnsemblGene(tdb, item, NULL); } else if (sameWord(table, "xenoRefGene")) { doRefGene(tdb, item); } else if (sameWord(table, "knownGene")) { doKnownGene(tdb, item); } +else if (sameWord(table, "ncbiRefPredicted") || sameWord(table, "ncbiRefCurated") ) + { + doNcbiRefGene(tdb, item); + } else if (sameWord(table, "refGene") ) { doRefGene(tdb, item); } else if (sameWord(table, "ccdsGene")) { doCcdsGene(tdb, item); } else if (isNewGencodeGene(tdb)) { doGencodeGene(tdb, item); } else if (sameWord(table, "mappedRefSeq")) /* human refseqs on chimp browser */ {