6085f4dbce05570d9daefdec328b2fcb6fc9bf9f hartera Mon Jul 16 14:19:11 2012 -0700 Added code so that if the utr option is set, then alignments are retrieved for all exons not just the CDS region. diff --git src/hg/lib/mafGene.c src/hg/lib/mafGene.c index 38bcd9f..4b5da13 100644 --- src/hg/lib/mafGene.c +++ src/hg/lib/mafGene.c @@ -751,112 +751,123 @@ /* free exonInfo list */ static void freeGIList(struct exonInfo *list) { struct exonInfo *giNext; for(; list ; list = giNext) { giNext = list->next; mafAliFreeList(&list->ali); } } static struct exonInfo *buildGIList(char *database, struct genePred *pred, - char *mafTable) + char *mafTable, unsigned options) { struct exonInfo *giList = NULL; unsigned *exonStart = pred->exonStarts; unsigned *lastStart = &exonStart[pred->exonCount]; unsigned *exonEnd = pred->exonEnds; int *frames = pred->exonFrames; +boolean utr = options & MAFGENE_UTR; + if (frames == NULL) { genePredAddExonFrames(pred); frames = pred->exonFrames; } assert(frames != NULL); int start = 0; -/* first skip 5' UTR */ +/* first skip 5' UTR if the includeUtr option is not set */ +if (!utr) + { for(; exonStart < lastStart; exonStart++, exonEnd++, frames++) { int size = *exonEnd - *exonStart; if (*exonStart + size > pred->cdsStart) break; } + } for(; exonStart < lastStart; exonStart++, exonEnd++, frames++) { struct exonInfo *gi; int thisStart = *exonStart; + int thisEnd = *exonEnd; + if (!utr) + { if (thisStart > pred->cdsEnd) break; if (thisStart < pred->cdsStart) thisStart = pred->cdsStart; - int thisEnd = *exonEnd; if (thisEnd > pred->cdsEnd) thisEnd = pred->cdsEnd; + } int thisSize = thisEnd - thisStart; + if (!utr) verbose(3, "in %d %d cds %d %d\n",*exonStart,*exonEnd, thisStart, thisEnd); - AllocVar(gi); gi->frame = *frames; gi->name = pred->name; gi->ali = getAliForRange(database, mafTable, pred->chrom, thisStart, thisEnd); gi->chromStart = thisStart; gi->chromEnd = thisEnd; gi->exonStart = start; gi->exonSize = thisSize; verbose(3, "exon size %d\n", thisSize); gi->strand = pred->strand[0]; start += gi->exonSize; slAddHead(&giList, gi); + if (!utr) + { if (thisEnd == pred->cdsEnd) break; } + } slReverse(&giList); return giList; } void mafGeneOutPred(FILE *f, struct genePred *pred, char *dbName, char *mafTable, struct slName *speciesNameList, unsigned options, int numCols) { boolean inExons = options & MAFGENE_EXONS; if (pred->cdsStart == pred->cdsEnd) return; if (numCols < -1) errAbort("Number of columns must be zero or greater."); -struct exonInfo *giList = buildGIList(dbName, pred, mafTable); +struct exonInfo *giList = buildGIList(dbName, pred, mafTable, options); if (giList == NULL) return; struct hash *speciesInfoHash = newHash(5); struct speciesInfo *speciesList = getSpeciesInfo(giList, speciesNameList, speciesInfoHash); copyMafs(speciesInfoHash, &giList, inExons); struct speciesInfo *si = speciesList; for(; si ; si = si->next) si->curPosString = si->posStrings; writeOutSpecies(f, dbName, speciesList, giList, options, numCols);