81b2ee7407e5916847893c4851b38e0788efe728 angie Wed May 30 15:07:16 2018 -0700 LRG mappings to the reference genomes used to always span the entire LRG, but they don't anymore; there may be one mapping that spans the entire LRG and another mapping that spans only part of it. Instead of assuming that lrgStart is 0 and lrgEnd is size, detect if they're not, and in that case bundle lrgStart and lrgEnd into the name field and parse back out when making PSL. sorta refs #13359 #17877 diff --git src/hg/lib/lrg.c src/hg/lib/lrg.c index 8a840c8..f72dd50 100644 --- src/hg/lib/lrg.c +++ src/hg/lib/lrg.c @@ -335,56 +335,61 @@ freeMem(diff->lrgSeq); } slFreeList(pDiff); } static int calcBlockSize(uint nextTStart, uint nextQStart, struct psl *psl, int blockIx) { int tLen = nextTStart - psl->tStarts[blockIx]; int qLen = nextQStart - psl->qStarts[blockIx]; return min(tLen, qLen); } struct psl *lrgToPsl(struct lrg *lrg, uint chromSize) /* Use lrg's mismatches and indels to make a PSL. */ { -struct psl *psl; -AllocVar(psl); struct lrgDiff *mismatches = lrgParseMismatches(lrg), *indels = lrgParseIndels(lrg); +int lrgStart = 0, lrgEnd = lrg->lrgSize; +char *lrgName = lrg->name; +char lrgNameBuf[strlen(lrgName)+1]; +if (strchr(lrgName, ':')) + { + // Parse lrgStart and lrgEnd out of name field (LRG_*:start-end) + safecpy(lrgNameBuf, sizeof(lrgNameBuf), lrgName); + char *p = strchr(lrgNameBuf, ':'); + // Chop at : (after name) + *p++ = '\0'; + char *num = p; + p = strchr(num, '-'); + if (p == NULL) + errAbort("Error parsing LRG name:start-end '%s' -- no '-' following ':'", lrg->name); + *p++ = '\0'; + lrgStart = atoi(num) - 1; + num = p; + lrgEnd = atoi(num); + lrgName = lrgNameBuf; + } +int blockCount = slCount(indels) + 1; +unsigned opts = 0; +struct psl *psl = pslNew(lrgName, lrg->lrgSize, lrgStart, lrgEnd, + lrg->chrom, chromSize, lrg->chromStart, lrg->chromEnd, + lrg->strand, blockCount, opts); +psl->blockCount = blockCount; psl->misMatch = slCount(mismatches); -psl->repMatch = 0; -psl->nCount = 0; -psl->qNumInsert = 0; -psl->qBaseInsert = 0; -psl->tNumInsert = 0; -psl->tBaseInsert = 0; -psl->strand[0] = lrg->strand[0]; -psl->qName = cloneString(lrg->name); -psl->qSize = lrg->lrgSize; -psl->qStart = 0; -psl->qEnd = lrg->lrgSize; -psl->tName = cloneString(lrg->chrom); -psl->tSize = chromSize; -psl->tStart = lrg->chromStart; -psl->tEnd = lrg->chromEnd; -psl->blockCount = slCount(indels) + 1; -AllocArray(psl->blockSizes, psl->blockCount); -AllocArray(psl->qStarts, psl->blockCount); -AllocArray(psl->tStarts, psl->blockCount); boolean isRc = (lrg->strand[0] == '-'); // Translate gap coords from indels into block coords: -psl->qStarts[0] = 0; +psl->qStarts[0] = isRc ? (lrg->lrgSize - lrgEnd) : lrgStart; psl->tStarts[0] = lrg->chromStart; int alignedBaseCount = 0; int blockIx = 1; struct lrgDiff *diff; for (diff = indels; diff != NULL; diff = diff->next) { // now we know the size of the previous block: int lrgStartPosStrand = isRc ? (lrg->lrgSize - diff->lrgEnd) : diff->lrgStart; psl->blockSizes[blockIx-1] = calcBlockSize(diff->chromStart, lrgStartPosStrand, psl, blockIx-1); alignedBaseCount += psl->blockSizes[blockIx-1]; // Insertion on t or q? int lrgLen = diff->lrgEnd - diff->lrgStart; int refLen = diff->chromEnd - diff->chromStart; if (lrgLen > refLen) {