4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/hg/hgLinkIn/handlerList.c src/hg/hgLinkIn/handlerList.c index 8c1fcc9..3e6dcec 100644 --- src/hg/hgLinkIn/handlerList.c +++ src/hg/hgLinkIn/handlerList.c @@ -1,214 +1,214 @@ /* Copyright (C) 2017 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ /* handlerList - a list of handler functions for translating identifiers * from different external databases to browser positions. Used by * linkInHandlers.c. */ #include "common.h" #include "hdb.h" #include "jksql.h" #include "handlerList.h" /******************************************** * UniProtKB Handler *******************************************/ /* Databases whose IDs we can link to using the position parameter. ... this varies by database, * so be restrictive. Ordered by preference. */ /* 1 = EMBL, 2 = RefSeq, */ #define UniProtExtRefSeqId 2 #define UniProtExtEMBLId 1 /* * +---------+--------------+----+-----+------+--+ * | acc | char(12) | NO | MUL | NULL | | * | extDb | int(11) | NO | | NULL | | * | extAcc1 | varchar(255) | NO | MUL | NULL | | * | extAcc2 | varchar(255) | NO | | NULL | | * | extAcc3 | varchar(255) | NO | | NULL | | * +---------+--------------+----+-----+------+--+ * * Looks like UniProt marks the end of the IDs list on * each line with a '-'. Supplementary material may follow * in the extra field(s), e.g. * P01892 1 M86404 - NOT_ANNOTATED_CDS */ void addIdsToList(struct slName **listPtr, char **ids, int count) /* Given a list of alternate identifiers taken from the uniProt * table, add them to the supplied list. The identifiers list is * terminated with a '-' */ { int i=0; for (i=0; i<count; i++) { if (sameString(ids[i], "-")) break; slNameStore(listPtr, ids[i]); } } char *uniProtAccToDb(char *id, struct sqlConnection *conn) /* Searches the uniProt database (which conn must be connected to) * for the given identifier and returns the name of the database * that is most relevant. This is usually the default assembly * for the genome that the identifier is on. */ { char *db = NULL; char query[4096]; sqlSafef(query, sizeof(query), "select taxon from accToTaxon where acc = '%s'", id); int taxon = sqlQuickNum(conn, query); if (taxon != 0) db = hDbForTaxon(taxon); return db; } struct linkInResult *getEmblList(char *db, struct slName *emblNames, struct sqlConnection *conn) /* Given a connection to a genome database (e.g., hg19), the name of that database, and * a list of identifiers, search for any EMBL mRNA names that we have positions for. * Return a list of matching positions. */ { struct linkInResult *results = NULL; bool hasMRna = sqlTableExists(conn, "all_mrna"); if (!hasMRna) return NULL; struct dyString *query = dyStringNew(0); sqlDyStringPrintf(query, "select tName, tStart, tEnd from all_mrna where qName in ("); struct slName *thisName = emblNames; bool firstName = TRUE; while (thisName != NULL) { if (firstName) { sqlDyStringPrintfFrag(query, "'%s'", thisName->name); firstName = FALSE; } else sqlDyStringPrintfFrag(query, ",'%s'", thisName->name); thisName = thisName->next; } sqlDyStringPrintfFrag(query, ")"); struct sqlResult *sr = sqlGetResult(conn, dyStringContents(query)); char **row = NULL; while ((row = sqlNextRow(sr)) != NULL) { struct linkInResult *newResult = NULL; AllocVar(newResult); newResult->db = cloneString(db); char position[4096]; safef(position, sizeof(position), "%s:%s-%s", row[0], row[1], row[2]); newResult->position = cloneString(position); newResult->trackToView = cloneString("all_mrna"); slAddHead(&results, newResult); } slReverse(&results); sqlFreeResult(&sr); return results; } struct linkInResult *getRefGeneList(char *db, struct slName *refNames, struct sqlConnection *conn) /* Given a connection to a genome database (e.g., hg19), the name of that database, and * a list of identifiers, search for any RefSeq gene names that we have positions for. * Return a list of matching positions. */ { struct linkInResult *results = NULL; bool hasNcbiRefGene = sqlTableExists(conn, "ncbiRefGene"); bool hasRefGene = sqlTableExists(conn, "refGene"); if (!hasNcbiRefGene && !hasRefGene) return NULL; struct dyString *query = dyStringNew(0); sqlDyStringPrintf(query, "select chrom, txStart, txEnd from %s where name in (", hasNcbiRefGene?"ncbiRefGene":"refGene"); struct slName *thisName = refNames; bool firstName = TRUE; while (thisName != NULL) { if (firstName) { sqlDyStringPrintfFrag(query, "'%s'", thisName->name); firstName = FALSE; } else sqlDyStringPrintfFrag(query, ",'%s'", thisName->name); thisName = thisName->next; } sqlDyStringPrintfFrag(query, ")"); struct sqlResult *sr = sqlGetResult(conn, dyStringContents(query)); char **row = NULL; while ((row = sqlNextRow(sr)) != NULL) { struct linkInResult *newResult = NULL; AllocVar(newResult); newResult->db = cloneString(db); char position[4096]; safef(position, sizeof(position), "%s:%s-%s", row[0], row[1], row[2]); newResult->position = cloneString(position); newResult->trackToView = cloneString(hasNcbiRefGene?"ncbiRefGene":"refGene"); slAddHead(&results, newResult); } slReverse(&results); sqlFreeResult(&sr); return results; } struct linkInResult *uniProtHandler(struct sqlConnection *conn, char *id) /* Handler for translating a UniProtKB identifier to a genome assembly and * positions within that assembly. Changes the database that conn * points to - first to uniProt, then to the relevant assembly. * Looks first for related RefSeq gene positions. If none are found, will * then search for related mRNA positions. */ { char query[8092], **row = NULL; struct slName *emblIdList = NULL, *refSeqIdList = NULL; sqlSafef(query, sizeof(query), "use uniProt"); sqlUpdate(conn, query); char *db = uniProtAccToDb(id, conn); if (isEmpty(db)) return NULL; sqlSafef(query, sizeof(query), "select extDb, extAcc1, extAcc2, extAcc3 from extDbRef where " "acc = '%s' and extDb in (%d, %d)", id, UniProtExtRefSeqId, UniProtExtEMBLId); /* Assemble resulting IDs into two lists; one for refSeq, and one for EMBL */ struct sqlResult *sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { int extDbId = atoi(row[0]); switch(extDbId) { case UniProtExtEMBLId: addIdsToList(&emblIdList, &row[1], 3); break; case UniProtExtRefSeqId: addIdsToList(&refSeqIdList, &row[1], 3); break; default: errAbort("Got unexpected external database ID %d", extDbId); } } sqlFreeResult(&sr); sqlSafef(query, sizeof(query), "use %s", db); sqlUpdate(conn, query); struct linkInResult *searchResults = NULL; /* If there are any RefSeq IDs, and if they're in our database, use that */ if (refSeqIdList != NULL) searchResults = getRefGeneList(db, refSeqIdList, conn); /* otherwise, hunt for any EMBL hits */ if ((searchResults == NULL) && (emblIdList != NULL)) searchResults = getEmblList(db, emblIdList, conn); return searchResults; } /*************************************** * End of UniProtKB handler **************************************/