25a1ed58b2f9fc2d9ad7fdbe9117f0b2f47253f3 fanhsu Tue Nov 9 16:38:23 2010 -0800 Update for RGD Genes, Redmine issue #29. diff --git src/hg/hgGene/synonym.c src/hg/hgGene/synonym.c index 1a05c3d..df00f6d 100644 --- src/hg/hgGene/synonym.c +++ src/hg/hgGene/synonym.c @@ -1,312 +1,410 @@ /* Synonym - print out other names for this gene. */ #include "common.h" #include "hash.h" #include "hdb.h" #include "linefile.h" #include "dystring.h" #include "hgGene.h" #include "spDb.h" #include "ccdsGeneMap.h" static char const rcsid[] = "$Id: synonym.c,v 1.10 2009/01/30 23:30:55 fanhsu Exp $"; static void printOurMrnaUrl(FILE *f, char *accession) /* Print URL for Entrez browser on a nucleotide. */ { fprintf(f, "../cgi-bin/hgc?%s&g=mrna&i=%s&c=%s&o=%d&t=%d&l=%d&r=%d&db=%s", cartSidUrlString(cart), accession, curGeneChrom, curGeneStart, curGeneEnd, curGeneStart, curGeneEnd, database); } static void printOurRefseqUrl(FILE *f, char *accession) /* Print URL for Entrez browser on a nucleotide. */ { fprintf(f, "../cgi-bin/hgc?%s&g=refGene&i=%s&c=%s&o=%d&l=%d&r=%d&db=%s", cartSidUrlString(cart), accession, curGeneChrom, curGeneStart, curGeneStart, curGeneEnd, database); } static int countAlias(char *id, struct sqlConnection *conn) /* Count how many valid gene symbols to be printed */ { char query[256]; struct sqlResult *sr; int cnt = 0; char **row; safef(query, sizeof(query), "select alias from kgAlias where kgId = '%s' order by alias", id); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); while (row != NULL) { /* skip kgId and the maint gene symbol (curGeneName) */ if ((!sameWord(id, row[0])) && (!sameWord(row[0], curGeneName))) { cnt++; } row = sqlNextRow(sr); } sqlFreeResult(&sr); return(cnt); } char *aliasString(char *id, struct sqlConnection *conn) /* return alias string as it would be printed in html, can free after use */ { char query[256]; struct sqlResult *sr = NULL; char **row; int totalCount; int cnt = 0; totalCount = countAlias(id,conn); if (totalCount > 0) { struct dyString *aliasReturn = dyStringNew(0); dyStringPrintf(aliasReturn, "Alternate Gene Symbols: "); safef(query, sizeof(query), "select alias from kgAlias where kgId = '%s' order by alias", id); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); while (cnt < totalCount) { /* skip kgId and the maint gene symbol (curGeneName) */ if ((!sameWord(id, row[0])) && (!sameWord(row[0], curGeneName))) { dyStringPrintf(aliasReturn, "%s", row[0]); if (cnt < (totalCount-1)) dyStringPrintf(aliasReturn, ", "); cnt++; } row = sqlNextRow(sr); } dyStringPrintf(aliasReturn, "
"); sqlFreeResult(&sr); return dyStringCannibalize(&aliasReturn); } return NULL; } static void printAlias(char *id, struct sqlConnection *conn) /* Print out description of gene given ID. */ { char *aliases = aliasString(id, conn); if (aliases) { hPrintf("%s", aliases); freeMem(aliases); } } static void printGeneSymbol (char *geneId, char *table, char *idCol, struct sqlConnection *conn) /* Print out official Entrez gene symbol from a cross-reference table.*/ { char query[256]; struct sqlResult *sr = NULL; char **row; char *geneSymbol; if (sqlTablesExist(conn, table)) { hPrintf("Entrez Gene Official Symbol: "); safef(query, sizeof(query), "select geneSymbol from %s where %s = '%s'", table, idCol, geneId); sr = sqlGetResult(conn, query); if (sr != NULL) { row = sqlNextRow(sr); geneSymbol = cloneString(row[0]); if (!sameString(geneSymbol, "")) hPrintf("%s
", geneSymbol); } } sqlFreeResult(&sr); } static char *getRefSeqAcc(char *id, char *table, char *idCol, struct sqlConnection *conn) /* Finds RefSeq accession from a cross-reference table. */ { char query[256]; struct sqlResult *sr = NULL; char **row; char *refSeqAcc = NULL; if (sqlTablesExist(conn, table)) { safef(query, sizeof(query), "select refSeq from %s where %s = '%s'", table, idCol, id); sr = sqlGetResult(conn, query); if (sr != NULL) { row = sqlNextRow(sr); refSeqAcc = cloneString(row[0]); } } sqlFreeResult(&sr); return refSeqAcc; } static void printCcds(char *kgId, struct sqlConnection *conn) /* Print out CCDS ids most closely matching the kg. */ { struct ccdsGeneMap *ccdsKgs = NULL; if (sqlTablesExist(conn, "ccdsKgMap")) ccdsKgs = ccdsGeneMapSelectByGene(conn, "ccdsKgMap", kgId, 0.0); if (ccdsKgs != NULL) { struct ccdsGeneMap *ccdsKg; hPrintf("CCDS: "); /* since kg is not by location (even though we have a * curGeneStart/curGeneEnd), we need to use the location in the * ccdsGeneMap */ for (ccdsKg = ccdsKgs; ccdsKg != NULL; ccdsKg = ccdsKg->next) { if (ccdsKg != ccdsKgs) hPrintf(", "); hPrintf("%s", cartSidUrlString(cart), ccdsKg->ccdsId, ccdsKg->chrom, ccdsKg->chromStart, ccdsKg->chromStart, ccdsKg->chromEnd, database, ccdsKg->ccdsId); } hPrintf("
\n"); } } +char *addComma(char *inStr) +{ +char *chp; +chp = inStr; + +while (*chp != '\0') + { + if (*chp == '|') + *chp = ','; + chp++; + } +return inStr; +} + +static void rgdGene2SynonymPrint(struct section *section, + struct sqlConnection *conn, char *rgdGeneId) +{ +char *geneSym = NULL, *geneName = NULL; +char query[256], **row; +struct sqlResult *sr; +if (rgdGeneId != NULL) + { + safef(query, sizeof(query), + "select old_symbol, old_name from genes_rat where gene_rgd_id = '%s'", + rgdGeneId+4L); + sr = sqlGetResult(conn, query); + if ((row = sqlNextRow(sr)) != NULL) + { + if (row[0][0] != 0 && !sameString(row[0], "n/a")) + { + geneSym = cloneString(row[0]); + hPrintf("Symbol: %s ", addComma(row[0])); + //hPrintf("
\n"); + } + if (row[1][0] != 0 && !sameString(row[0], "n/a")) + { + geneName = cloneString(row[1]); + hPrintf("
Name: %s ", addComma(geneName)); + hPrintf("
\n"); + } + } + sqlFreeResult(&sr); + + safef(query, sizeof(query), + "select value from rgdGene2ToRefSeq where name= '%s'", rgdGeneId); + sr = sqlGetResult(conn, query); + if ((row = sqlNextRow(sr)) != NULL) + { + hPrintf("RefSeq Accession: %s
\n", row[0]); + } + sqlFreeResult(&sr); + + safef(query, sizeof(query), + "select value from rgdGene2ToUniProt where name= '%s'", rgdGeneId); + sr = sqlGetResult(conn, query); + if ((row = sqlNextRow(sr)) != NULL) + { + char *spId, *spDisplayId, *oldDisplayId; + spId = row[0]; + hPrintf("Protein: "); + hPrintf("%s\n", + spId, spId); + + /* show SWISS-PROT display ID if it is different than the accession ID */ + /* but, if display name is like: Q03399 | Q03399_HUMAN, then don't show display name */ + spDisplayId = spAnyAccToId(spConn, spId); + if (spDisplayId == NULL) + { + errAbort("
The corresponding protein %s of this gene is not found in our current UniProtKB DB.", spId); + } + + if (strstr(spDisplayId, spId) == NULL) + { + hPrintf(" (aka %s", spDisplayId); + /* show once if the new and old displayId are the same */ + oldDisplayId = oldSpDisplayId(spDisplayId); + if (oldDisplayId != NULL) + { + if (!sameWord(spDisplayId, oldDisplayId) + && !sameWord(spId, oldDisplayId)) + { + hPrintf(" or %s", oldDisplayId); + } + } + hPrintf(")
\n"); + } + } + sqlFreeResult(&sr); + } +} static void synonymPrint(struct section *section, struct sqlConnection *conn, char *id) /* Print out SwissProt comments - looking up typeId/commentVal. */ { char *protAcc = getSwissProtAcc(conn, spConn, id); char *spDisplayId; char *refSeqAcc = ""; char *mrnaAcc = ""; char *oldDisplayId; char condStr[255]; char *kgProteinID; char *parAcc; /* parent accession of a variant splice protein */ char *chp; +if (isRgdGene(conn)) + { + rgdGene2SynonymPrint(section,conn, id); + return; + } if (sqlTablesExist(conn, "kgAlias")) printAlias(id, conn); if (sameWord(genome, "Zebrafish")) { char *xrefTable = "ensXRefZfish"; char *geneIdCol = "ensGeneId"; /* get Gene Symbol and RefSeq accession from Zebrafish-specific */ /* cross-reference table */ printGeneSymbol(id, xrefTable, geneIdCol, conn); refSeqAcc = getRefSeqAcc(id, xrefTable, geneIdCol, conn); hPrintf("ENSEMBL ID: %s", id); } else { char query[256]; char *toRefTable = genomeOptionalSetting("knownToRef"); if (toRefTable != NULL && sqlTableExists(conn, toRefTable)) { safef(query, sizeof(query), "select value from %s where name='%s'", toRefTable, id); refSeqAcc = emptyForNull(sqlQuickString(conn, query)); } if (sqlTableExists(conn, "kgXref")) { safef(query, sizeof(query), "select mRNA from kgXref where kgID='%s'", id); mrnaAcc = emptyForNull(sqlQuickString(conn, query)); } if (sameWord(genome, "C. elegans")) hPrintf("WormBase ID: %s
", id); else hPrintf("UCSC ID: %s
", id); } if (refSeqAcc[0] != 0) { hPrintf("RefSeq Accession: %s
\n", refSeqAcc); } else if (mrnaAcc[0] != 0) { safef(condStr, sizeof(condStr), "acc = '%s'", mrnaAcc); if (sqlGetField(database, "gbCdnaInfo", "acc", condStr) != NULL) { hPrintf("Representative RNA: %s
\n", mrnaAcc); } else /* do not show URL link if it is not found in gbCdnaInfo */ { hPrintf("Representative RNA: %s ", mrnaAcc); } } if (protAcc != NULL) { kgProteinID = cloneString(""); if (hTableExists(sqlGetDatabase(conn), "knownGene") && (isNotEmpty(cartOptionalString(cart, hggChrom)) && differentWord(cartOptionalString(cart, hggChrom),"none"))) { safef(condStr, sizeof(condStr), "name = '%s' and chrom = '%s' and txStart=%s and txEnd=%s", id, cartOptionalString(cart, hggChrom), cartOptionalString(cart, hggStart), cartOptionalString(cart, hggEnd)); kgProteinID = sqlGetField(database, "knownGene", "proteinID", condStr); } hPrintf("Protein: "); if (strstr(kgProteinID, "-") != NULL) { parAcc = cloneString(kgProteinID); chp = strstr(parAcc, "-"); *chp = '\0'; /* show variant splice protein and the UniProt link here */ hPrintf("%s, splice isoform of ", kgProteinID, kgProteinID); hPrintf("%s\n", parAcc, parAcc); } else { hPrintf("%s\n", protAcc, protAcc); } /* show SWISS-PROT display ID if it is different than the accession ID */ /* but, if display name is like: Q03399 | Q03399_HUMAN, then don't show display name */ spDisplayId = spAnyAccToId(spConn, protAcc); if (spDisplayId == NULL) { errAbort("
%s seems to no longer be a valid protein ID in our latest UniProtKB DB.", protAcc); } if (strstr(spDisplayId, protAcc) == NULL) { hPrintf(" (aka %s", spDisplayId); /* show once if the new and old displayId are the same */ oldDisplayId = oldSpDisplayId(spDisplayId); if (oldDisplayId != NULL) { if (!sameWord(spDisplayId, oldDisplayId) && !sameWord(protAcc, oldDisplayId)) { hPrintf(" or %s", oldDisplayId); } } hPrintf(")
\n"); } } printCcds(id, conn); } struct section *synonymSection(struct sqlConnection *conn, struct hash *sectionRa) /* Create synonym (aka Other Names) section. */ { struct section *section = sectionNew(sectionRa, "synonym"); section->print = synonymPrint; return section; }