e70152e44cc66cc599ff6b699eb8adc07f3e656a kent Sat May 24 21:09:34 2014 -0700 Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment. diff --git src/hg/nci60/findStanAlignments.c src/hg/nci60/findStanAlignments.c index 65f1fbf..5d1b683 100644 --- src/hg/nci60/findStanAlignments.c +++ src/hg/nci60/findStanAlignments.c @@ -1,212 +1,215 @@ +/* Copyright (C) 2013 The Regents of the University of California + * See README in this or parent directory for licensing information. */ + #include "common.h" #include "hash.h" #include "jksql.h" #include "linefile.h" #include "hdb.h" #include "psl.h" #include "imageClone.h" #include "stanMad.h" void usage() { errAbort("findStanAlignments - takes a stanford microarray experiment file and\n" "tries to look up an alignment for the relevant clone in the database.\n" "Starts by trying to look up the longest genbank clone from image id, \n" "then tries to look up the 5' and 3' accesion numbers.\n" "usage:\n" "\tfindStanAlignments \n"); } struct imageClone *imageCloneLoadAllNonZero(char *fileName) /* Load all imageClone from a tab-separated file where there are genbank ids. * Dispose of this with imageCloneFreeList(). */ { struct imageClone *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[9]; while (lineFileRow(lf, row)) { el = imageCloneLoad(row); if(el->numGenbank != 0) { slAddHead(&list, el); } else { imageCloneFree(&el); } } lineFileClose(&lf); slReverse(&list); return list; } /* read all of the entries in an imageClone file into a hash for easy access */ void readInImageHash(struct hash *iHash, char *image) { struct imageClone *icList =NULL, *ic = NULL; char buff[256]; int count =0; icList = imageCloneLoadAllNonZero(image); for(ic = icList; ic != NULL; ic = ic->next) { if((count++ % 10000) ==0) { printf("."); fflush(stdout); } sprintf(buff, "%d", ic->id); hashAdd(iHash, buff, ic); } printf("\n"); } struct psl *pslLoadByQuery(struct sqlConnection *conn, char *query) /* Load all psl from table that satisfy the query given. * Where query is of the form 'select * from example where something=something' * or 'select example.* from example, anotherTable where example.something = * anotherTable.something'. * Dispose of this with pslFreeList(). */ { struct psl *list = NULL, *el; struct sqlResult *sr; char **row; int offSet = 0; sr = sqlGetResult(conn, query); offSet = sqlCountColumns(sr) - 21; while ((row = sqlNextRow(sr)) != NULL) { el = pslLoad(row+offSet); slAddHead(&list, el); } slReverse(&list); sqlFreeResult(&sr); return list; } /* create object on the heap, copy values from psl and return, don't forget to free */ struct psl *copyPsl(struct psl *psl) { struct psl *ret = NULL; AllocVar(ret); ret->next = NULL; ret->match = psl->match; ret->misMatch = psl->misMatch; ret->repMatch = psl->repMatch; ret->nCount = psl->nCount; ret->qNumInsert = psl->qNumInsert; ret->qBaseInsert = psl->qBaseInsert; ret->tNumInsert = psl->tNumInsert; ret->tBaseInsert = psl ->tBaseInsert; strcpy(ret->strand, psl->strand); ret->qName = cloneString(psl->qName); ret->qSize = psl->qSize; ret->qStart = psl->qStart; ret->qEnd = psl->qEnd; ret->tName = cloneString(psl->tName); ret->tSize = psl->tSize; ret->tStart = psl->tStart; ret->tEnd = psl->tEnd; ret->blockCount = psl->blockCount; ret->blockSizes = CloneArray(psl->blockSizes, psl->blockCount); ret->qStarts = CloneArray(psl->qStarts, psl->blockCount); ret->tStarts = CloneArray(psl->tStarts, psl->blockCount); return ret; } void outputAlignmentForStan(struct sqlConnection *conn, struct stanMad *sm, struct hash *iHash, FILE *out) { struct psl *pslList, *bestPsl = NULL; char buff[1024]; int i; struct imageClone *ic = NULL; sprintf(buff, "%d", sm->clid); printf("Looking for %s\n", buff); ic = hashFindVal(iHash, buff); if(ic != NULL) { /* first try looking for the image clones themselves... */ for(i=0; inumGenbank; i++) { char query[1024]; sqlSafef(query, sizeof query, "select * from all_est where qName='%s'", ic->genbankIds[i]); pslList = pslLoadByQuery(conn, buff); if(pslList != NULL) { slSort(&pslList, pslCmpScore); if(bestPsl == NULL || (pslScore(pslList) > pslScore(bestPsl))) pslFree(&bestPsl); bestPsl = copyPsl(pslList); } pslFreeList(&pslList); } if(bestPsl != NULL) { freez(&bestPsl->qName); sprintf(buff, "%d", sm->clid); bestPsl->qName = cloneString(buff); pslTabOut(bestPsl,out); } else { fprintf(out, "%d\talignment unknown\n", sm->clid); } } else { fprintf(out, "%d\tunknown\n", sm->clid); } } void findStanAlignments(char *db, char *stan, char *image, char *pslOut) { struct hash *iHash = newHash(5); struct stanMad *smList = NULL, *sm = NULL; FILE *out = mustOpen(pslOut, "w"); int count =0; struct sqlConnection *conn = NULL; warn("Getting sql Connection..."); conn = hAllocConn(db); warn("Reading in image clones..."); readInImageHash(iHash, image); warn("Loading Stanford Alignments.."); smList = stanMadLoadAll(stan); warn("Finding best Alignments..."); for(sm = smList; sm != NULL; sm = sm->next) { if(differentString(sm->type,"Control")) { if((count++ % 10000) ==0) { printf("."); fflush(stdout); } outputAlignmentForStan(conn, sm, iHash, out); } } printf("\n"); warn("Done. Cleaning up..."); stanMadFreeList(&smList); freeHash(&iHash); hFreeConn(&conn); } int main(int argc, char *argv[]) { if(argc != 5) usage(); findStanAlignments(argv[1],argv[2],argv[3],argv[4]); return 0; }