6a00cf44e1c0ba0d6901464e4b7fd220ce1f3d41 braney Fri Feb 13 13:02:40 2015 -0800 some modifications to support the GENCODE as UCSC genes switch diff --git src/hg/lib/spDb.c src/hg/lib/spDb.c index 3a6291c..ff4c37e 100644 --- src/hg/lib/spDb.c +++ src/hg/lib/spDb.c @@ -1,677 +1,691 @@ /* spTest - Test out sp library.. */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "dystring.h" #include "jksql.h" #include "hdb.h" #include "spDb.h" boolean spIsPrimaryAcc(struct sqlConnection *conn, char *acc) /* Return TRUE if this is a primary accession in database. */ { char query[256]; SpAcc temp; sqlSafef(query, sizeof(query), "select acc from displayId where acc = '%s'", acc); return sqlQuickQuery(conn, query, temp, sizeof(temp)) != NULL; } char *spFindAcc(struct sqlConnection *conn, char *id) /* Return primary accession given either display ID, * primary accession, or secondary accession. Return * NULL if not found. */ { char *acc; if (spIsPrimaryAcc(conn, id)) return cloneString(id); acc = spIdToAcc(conn, id); if (acc != NULL) return acc; else { char query[256]; sqlSafef(query, sizeof(query), "select acc from otherAcc where val = '%s'", id); return sqlQuickString(conn, query); } } char *spAccToId(struct sqlConnection *conn, char *acc) /* Convert primary accession to SwissProt ID (which will * often look something like HXA1_HUMAN. */ { char query[256]; sqlSafef(query, sizeof(query), "select val from displayId where acc = '%s'", acc); return sqlNeedQuickString(conn, query); } char *spAnyAccToId(struct sqlConnection *conn, char *acc) /* Convert primary accession to SwissProt ID (which will * often look something like HXA1_HUMAN. Please note that compared to spAccToID(), this function calls sqlQuickString() instead of sqlNeedQuickString(), so if nothing is found, it will return NULL instead of abort. */ { char query[256]; sqlSafef(query, sizeof(query), "select val from displayId where acc = '%s'", spFindAcc(conn, acc)); return sqlQuickString(conn, query); } char *spIdToAcc(struct sqlConnection *conn, char *id) /* Convert SwissProt ID (things like HXA1_HUMAN) to * accession. Returns NULL if the conversion fails. * (doesn't abort). */ { char query[256]; sqlSafef(query, sizeof(query), "select acc from displayId where val = '%s'", id); return sqlQuickString(conn, query); } -char *spLookupPrimaryAcc(struct sqlConnection *conn, + +char *spLookupPrimaryAccMaybe(struct sqlConnection *conn, char *anyAcc) /* Primary or secondary accession. */ /* This will return the primary accession. It's ok to pass in * either a primary or secondary accession. */ { char query[256]; if (spIsPrimaryAcc(conn, anyAcc)) return cloneString(anyAcc); else { sqlSafef(query, sizeof(query), "select acc from otherAcc where val = '%s'", anyAcc); - return sqlNeedQuickString(conn, query); + return sqlQuickString(conn, query); } } +char *spLookupPrimaryAcc(struct sqlConnection *conn, + char *anyAcc) /* Primary or secondary accession. */ +/* This will return the primary accession. It's ok to pass in + * either a primary or secondary accession. */ +{ +char *acc = spLookupPrimaryAccMaybe(conn, anyAcc); + +if (acc == NULL) + errAbort("accession not found: %s", anyAcc); + +return acc; +} + char *spDescription(struct sqlConnection *conn, char *acc) /* Return protein description. FreeMem this when done. */ { char query[256]; sqlSafef(query, sizeof(query), "select val from description where acc = '%s'", acc); return sqlNeedQuickString(conn, query); } boolean spIsCurated(struct sqlConnection *conn, char *acc) /* Return TRUE if it is a curated entry. */ { char query[256]; sqlSafef(query, sizeof(query), "select isCurated from info where acc = '%s'", acc); return sqlNeedQuickNum(conn, query); } int spAaSize(struct sqlConnection *conn, char *acc) /* Return number of amino acids. */ { char query[256]; sqlSafef(query, sizeof(query), "select aaSize from info where acc = '%s'", acc); return sqlNeedQuickNum(conn, query); } int spMolWeight(struct sqlConnection *conn, char *acc) /* Return molecular weight in daltons. */ { char query[256]; sqlSafef(query, sizeof(query), "select molWeight from info where acc = '%s'", acc); return sqlNeedQuickNum(conn, query); } char *spCreateDate(struct sqlConnection *conn, char *acc) /* Return creation date in YYYY-MM-DD format. FreeMem * this when done. */ { char query[256]; sqlSafef(query, sizeof(query), "select createDate from info where acc = '%s'", acc); return sqlNeedQuickString(conn, query); } char *spSeqDate(struct sqlConnection *conn, char *acc) /* Return sequence last update date in YYYY-MM-DD format. FreeMem * this when done. */ { char query[256]; sqlSafef(query, sizeof(query), "select seqDate from info where acc = '%s'", acc); return sqlNeedQuickString(conn, query); } char *spAnnDate(struct sqlConnection *conn, char *acc) /* Return annotation last update date in YYYY-MM-DD format. FreeMem * this when done. */ { char query[256]; sqlSafef(query, sizeof(query), "select annDate from info where acc = '%s'", acc); return sqlNeedQuickString(conn, query); } char *spOrganelle(struct sqlConnection *conn, char *acc) /* Return text description of organelle. FreeMem this when done. * This may return NULL if it's not an organelle. */ { char query[256]; sqlSafef(query, sizeof(query), "select organelle.val from info,organelle " "where info.acc = '%s' and info.organelle = organelle.id" , acc); return sqlQuickString(conn, query); } struct slName *spGeneToAccs(struct sqlConnection *conn, char *gene, int taxon) /* Get list of accessions associated with this gene. If * taxon is zero then this will return all accessions, if * taxon is non-zero then it will restrict it to a single * organism with that taxon ID. */ { char query[256]; if (taxon == 0) { sqlSafef(query, sizeof(query), "select acc from gene where val = '%s'", gene); } else { sqlSafef(query, sizeof(query), "select gene.acc from gene,accToTaxon " "where gene.val = '%s' " "and gene.acc = accToTaxon.acc " "and accToTaxon.taxon = %d" , gene, taxon); } return sqlQuickList(conn, query); } struct slName *spProteinEvidence(struct sqlConnection *conn, char *acc) /* Get list of evidence that protein exists for accession. There will be at least one. */ { char query[256]; sqlSafef(query, sizeof(query), "select proteinEvidenceType.val from proteinEvidence,proteinEvidenceType " "where proteinEvidence.acc = '%s' " "and proteinEvidence.proteinEvidenceType = proteinEvidenceType.id" , acc); return sqlQuickList(conn, query); } struct slName *spGenes(struct sqlConnection *conn, char *acc) /* Return list of genes associated with accession */ { char query[256]; sqlSafef(query, sizeof(query), "select val from gene where acc = '%s'", acc); return sqlQuickList(conn, query); } struct slName *spTaxons(struct sqlConnection *conn, char *acc) /* Return list of taxons associated with accession */ { char query[256]; sqlSafef(query, sizeof(query), "select taxon from accToTaxon where acc = '%s'", acc); return sqlQuickList(conn, query); } struct slName *spBinomialNames(struct sqlConnection *conn, char *acc) /* Return list of scientific names of organisms * associated with accessoin */ { char query[256]; sqlSafef(query, sizeof(query), "select binomial from accToTaxon,taxon " "where accToTaxon.acc = '%s' and accToTaxon.taxon = taxon.id" , acc); return sqlQuickList(conn, query); } int spTaxon(struct sqlConnection *conn, char *acc) /* Return taxon of first organism associated with accession. */ { char query[256]; sqlSafef(query, sizeof(query), "select taxon from accToTaxon where acc = '%s'", acc); return sqlNeedQuickNum(conn, query); } int spBinomialToTaxon(struct sqlConnection *conn, char *name) /* Return taxon associated with binomial (Mus musculus) name. */ { char query[256]; sqlSafef(query, sizeof(query), "select id from taxon where binomial = '%s'", name); return sqlNeedQuickNum(conn, query); } int spCommonToTaxon(struct sqlConnection *conn, char *commonName) /* Return taxon ID associated with common (mouse) name or * binomial name. */ { char query[256]; int taxon; sqlSafef(query, sizeof(query), "select taxon from commonName where val = '%s'", commonName); taxon = sqlQuickNum(conn, query); if (taxon == 0) taxon = spBinomialToTaxon(conn, commonName); return taxon; } char *spTaxonToBinomial(struct sqlConnection *conn, int taxon) /* Return Latin binomial name associated with taxon. */ { char query[256]; if (taxon <= 0) errAbort("Bad taxon id %d\n", taxon); sqlSafef(query, sizeof(query), "select binomial from taxon where id = %d", taxon); return sqlNeedQuickString(conn, query); } char *spTaxonToCommon(struct sqlConnection *conn, int taxon) /* Given NCBI common ID return first common name associated * with it if possible, otherwise return scientific name. */ { char query[256]; char *ret; if (taxon <= 0) errAbort("Bad taxon id %d\n", taxon); sqlSafef(query, sizeof(query), "select val from commonName where taxon = %d", taxon); ret = sqlQuickString(conn, query); if (ret == NULL) ret = spTaxonToBinomial(conn, taxon); return ret; } struct slName *spKeywords(struct sqlConnection *conn, char *acc) /* Return list of keywords for accession. */ { char query[256]; sqlSafef(query, sizeof(query), "select keyword.val from accToKeyword,keyword " "where accToKeyword.acc = '%s' " "and accToKeyword.keyword = keyword.id" , acc); return sqlQuickList(conn, query); } struct slName *spKeywordSearch(struct sqlConnection *conn, char *keyword, int taxon) /* Return list of accessions that use keyword. If taxon is non-zero * then restrict accessions to given organism. */ { char query[256]; int kwId; sqlSafef(query, sizeof(query), "select id from keyword where val = '%s'", keyword); kwId = sqlQuickNum(conn, query); if (kwId == 0) return NULL; if (taxon == 0) { sqlSafef(query, sizeof(query), "select acc from accToKeyword where keyword = %d", kwId); } else { sqlSafef(query, sizeof(query), "select accToKeyword.acc from accToKeyword,accToTaxon " "where accToKeyword.keyword = %d " "and accToKeyword.acc = accToTaxon.acc " "and accToTaxon.taxon = %d" , kwId, taxon); } return sqlQuickList(conn, query); } struct slName *slComments(struct sqlConnection *conn, char *acc, /* Primary accession. */ char *type) /* Comment type name, NULL for all comments. */ /* Get list of comments associated with accession. * If optional type parameter is included it should be * something in the commentType table. Some good types * are: DISEASE, FUNCTION, "SUBCELLULAR LOCATION" etc. */ { char query[256]; if (type == NULL) { sqlSafef(query, sizeof(query), "select commentVal.val from comment,commentVal " "where comment.acc = '%s' " "and comment.commentVal = commentVal.id" , acc); } else { int typeId; sqlSafef(query, sizeof(query), "select id from commentType where val = '%s'", type); typeId = sqlNeedQuickNum(conn, query); sqlSafef(query, sizeof(query), "select commentVal.val from comment,commentVal " "where comment.acc = '%s' " "and comment.commentType = %d " "and comment.commentVal = commentVal.id " , acc, typeId); } return sqlQuickList(conn, query); } struct slName *slCommentTypes(struct sqlConnection *conn) /* Get list of comment types in database. */ { char query[256]; sqlSafef(query, sizeof(query), "select val from commentType"); return sqlQuickList(conn, query); } char *spCommentType(struct sqlConnection *conn, int typeId) /* Look up text associated with typeId. freeMem result when done. */ { char query[256]; sqlSafef(query, sizeof(query), "select val from commentType where id=%d", typeId); return sqlQuickString(conn, query); } char *spCommentVal(struct sqlConnection *conn, int valId) /* Look up text associated with valId. freeMem result when done. */ { char query[256]; sqlSafef(query, sizeof(query), "select val from commentVal where id=%d", valId); return sqlQuickString(conn, query); } struct slName *spExtDbAcc1List(struct sqlConnection *conn, char *acc, char *db) /* Get list of accessions from external database associated with this * swissProt entity. The db parameter can be anything in the * extDb table. Some common external databases are 'EMBL' 'PDB' 'Pfam' * 'Interpro'. */ { char query[256]; sqlSafef(query, sizeof(query), "select extDbRef.extAcc1 from extDbRef,extDb " "where extDbRef.acc = '%s' " "and extDbRef.extDb = extDb.id " "and extDb.val = '%s'" , acc, db); return sqlQuickList(conn, query); } struct slName *spEmblAccs(struct sqlConnection *conn, char *acc) /* Get list of EMBL/Genbank mRNAaccessions associated with this. */ { return spExtDbAcc1List(conn, acc, "EMBL"); } struct slName *spPdbAccs(struct sqlConnection *conn, char *acc) /* Get list of PDB associations associated with this. */ { return spExtDbAcc1List(conn, acc, "PDB"); } char *spAccFromEmbl(struct sqlConnection *conn, char *acc) /* Get SwissProt accession associated with EMBL mRNA. */ { char query[256]; int emblId; sqlSafef(query, sizeof(query), "select id from extDb where val = 'EMBL'"); emblId = sqlNeedQuickNum(conn, query); sqlSafef(query, sizeof(query), "select acc from extDbRef where extAcc1 = '%s' and extDb = %d" , acc, emblId); return sqlQuickString(conn, query); } struct spFeature *spFeatures(struct sqlConnection *conn, char *acc, int classId, /* Feature class ID, 0 for all */ int typeId) /* Feature type ID, 0 for all */ /* Get feature list. slFreeList this when done. */ { struct dyString *dy = dyStringNew(0); struct spFeature *list = NULL, *el; char **row; struct sqlResult *sr; sqlDyStringAppend(dy, "select start,end,featureClass,featureType,softEndBits from feature "); sqlDyStringPrintf(dy, "where acc = '%s'", acc); if (classId != 0) dyStringPrintf(dy, " and featureClass=%d", classId); if (typeId != 0) dyStringPrintf(dy, " and featureType=%d", typeId); sr = sqlGetResult(conn, dy->string); while ((row = sqlNextRow(sr)) != NULL) { AllocVar(el); el->start = sqlUnsigned(row[0]); el->end = sqlUnsigned(row[1]); el->featureClass = sqlUnsigned(row[2]); el->featureType = sqlUnsigned(row[3]); el->softEndBits = sqlUnsigned(row[4]); slAddHead(&list, el); } sqlFreeResult(&sr); dyStringFree(&dy); slReverse(&list); return list; } char *spFeatureTypeName(struct sqlConnection *conn, int featureType) /* Return name associated with featureType */ { if (featureType == 0) return cloneString("n/a"); else { char query[256]; sqlSafef(query, sizeof(query), "select val from featureType where id=%d", featureType); return sqlNeedQuickString(conn, query); } } int spFeatureTypeId(struct sqlConnection *conn, char *name) /* Return feature type id associated with given name. */ { if (sameString(name, "n/a")) return 0; else { char query[256]; sqlSafef(query, sizeof(query), "select id from featureType where val='%s'", name); return sqlNeedQuickNum(conn, query); } } char *spFeatureClassName(struct sqlConnection *conn, int featureClass) /* Return name associated with featureClass. */ { char query[256]; sqlSafef(query, sizeof(query), "select val from featureClass where id=%d", featureClass); return sqlNeedQuickString(conn, query); } int spFeatureClassId(struct sqlConnection *conn, char *name) /* Return feature class id associated with given name. */ { char query[256]; sqlSafef(query, sizeof(query), "select id from featureClass where val='%s'", name); return sqlNeedQuickNum(conn, query); } struct spCitation *spCitations(struct sqlConnection *conn, char *acc) /* Get list of citations to references associated with this * accession. You can slFreeList this when done. */ { struct sqlResult *sr; char query[256], **row; struct spCitation *list = NULL, *el; sqlSafef(query, sizeof(query), "select id,acc,reference,rp from citation where acc='%s'", acc); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { AllocVar(el); el->id = sqlUnsigned(row[0]); memcpy(el->acc, row[1], sizeof(el->acc)); el->reference = sqlUnsigned(row[2]); el->rp = sqlUnsigned(row[3]); slAddHead(&list, el); } sqlFreeResult(&sr); slReverse(&list); return list; } char *spRefTitle(struct sqlConnection *conn, int refId) /* Get title of reference. This can be NULL legitimately. */ { char query[256]; sqlSafef(query, sizeof(query), "select title from reference where id=%d", refId); return sqlQuickString(conn, query); } struct slName *spRefAuthors(struct sqlConnection *conn, int refId) /* Get list of authors associated with reference. */ { char query[256]; sqlSafef(query, sizeof(query), "select author.val from referenceAuthors,author " "where referenceAuthors.reference = %d " "and referenceAuthors.author = author.id" , refId); return sqlQuickList(conn, query); } char *spRefCite(struct sqlConnection *conn, int refId) /* Get journal/page/etc of reference. */ { char query[256]; sqlSafef(query, sizeof(query), "select cite from reference where id=%d", refId); return sqlNeedQuickString(conn, query); } char *spRefPubMed(struct sqlConnection *conn, int refId) /* Get PubMed id. May be NULL legitimately. */ { char query[256]; sqlSafef(query, sizeof(query), "select pubMed from reference where id=%d", refId); return sqlQuickString(conn, query); } struct slName *spRefToAccs(struct sqlConnection *conn, int refId) /* Get list of accessions associated with reference. */ { char query[256]; sqlSafef(query, sizeof(query), "select acc from citation where reference = %d", refId); return sqlQuickList(conn, query); } char *newSpDisplayId(char *oldSpDisplayId) /* Convert from old Swiss-Prot display ID to new display ID */ { static struct sqlConnection *conn=NULL; char condStr[255]; char *newSpDisplayId; if (conn==NULL) { conn = sqlConnect(PROTEOME_DB_NAME); if (conn == NULL) return NULL; } sqlSafefFrag(condStr, sizeof(condStr), "oldDisplayId='%s'", oldSpDisplayId); newSpDisplayId = sqlGetField(PROTEOME_DB_NAME, "spOldNew", "newDisplayId", condStr); return(newSpDisplayId); } char *oldSpDisplayId(char *newSpDisplayId) /* Convert from new Swiss-Prot display ID to old display ID */ { static struct sqlConnection *conn=NULL; char condStr[255]; char *oldSpDisplayId; if (conn==NULL) { conn = sqlConnect(PROTEOME_DB_NAME); if (conn == NULL) return NULL; } sqlSafefFrag(condStr, sizeof(condStr), "newDisplayId='%s'", newSpDisplayId); oldSpDisplayId = sqlGetField(PROTEOME_DB_NAME, "spOldNew", "oldDisplayId", condStr); return(oldSpDisplayId); } char *uniProtFindPrimAcc(char *id) /* Return primary accession given an alias. */ /* The alias could be an accession number, display ID, old display ID, etc. * NULL if not found. */ { static struct sqlConnection *conn=NULL; char *acc; char query[256]; if (conn==NULL) { conn = sqlMayConnect(PROTEOME_DB_NAME); if (conn == NULL) return NULL; } sqlSafef(query, sizeof(query), "select acc from uniProtAlias where alias = '%s'", id); acc = sqlQuickString(conn, query); return(acc); } char *uniProtFindPrimAccFromGene(char *gene, char *db) /* Return primary accession given gene name. * NULL if not found. */ { static struct sqlConnection *conn=NULL; char *acc = NULL; char query[256]; char *pdb = cloneString(UNIPROT_DB_NAME); char *geneTable = "gene"; if (conn==NULL) { conn = sqlMayConnect(pdb); if (conn == NULL) return NULL; } if (geneTable != NULL && sqlTableExists(conn,geneTable)) { sqlSafef(query, sizeof(query), "select g.acc from %s g , accToTaxon a, taxon t where val = '%s' and g.acc = a.acc and id = a.taxon and binomial = '%s' ",geneTable, gene, hGenome(db)); acc = sqlQuickString(conn, query); } return(acc); }