src/hg/blastToPsl/pslBuild.c 1.2
1.2 2010/04/16 17:32:12 markd
add options to convert TBLASTN to codon nucleotide/nucleotide coordinates, fixed bug with tStart/tEnd extending beyond actual range
Index: src/hg/blastToPsl/pslBuild.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/blastToPsl/pslBuild.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -b -B -U 4 -r1.1 -r1.2
--- src/hg/blastToPsl/pslBuild.c 8 Feb 2010 03:04:23 -0000 1.1
+++ src/hg/blastToPsl/pslBuild.c 16 Apr 2010 17:32:12 -0000 1.2
@@ -30,16 +30,17 @@
/* coordinates of a block use during build */
{
char *qAln; /* alignment strings (not owned) */
char *tAln;
- int qStart; /* Query start/end positions. */
+ int qStart; /* Query start/end positions, in blast units */
int qEnd;
- int tStart; /* Target start/end position. */
+ int tStart; /* Target start/end position, in blast units */
int tEnd;
int qLetMult; /* each letter counts as this many units */
int tLetMult;
- int qSizeMult; /* user to convert coordinates */
- int tSizeMult;
+ int qCoordMult; /* used to convert coordinates */
+ int tCoordMult;
+ int q2tBlkSizeMult; /* convert query block size to target block size */
int countMult; /* column count for match/mismatch */
int alnStart; /* start/end in alignment */
int alnEnd;
};
@@ -54,30 +55,45 @@
// initialize ends to starts for first call to nextUngappedBlk
blk->qEnd = qStart;
blk->tEnd = tStart;
-if (flags & tblastn)
+if (flags & cnvNucCoords)
{
+ // DNA/DNA PSL from protein/DNA align
+ assert(flags & tblastn);
blk->qLetMult = 1;
- blk->qSizeMult = 1;
+ blk->qCoordMult = 3;
blk->tLetMult = 3;
- blk->tSizeMult = 3;
+ blk->tCoordMult = 1;
+ blk->q2tBlkSizeMult = 3;
+ blk->countMult = 3;
+ }
+else if (flags & tblastn)
+ {
+ // protein/DNA PSL
+ blk->qLetMult = 1;
+ blk->qCoordMult = 1;
+ blk->tLetMult = 3;
+ blk->tCoordMult = 1;
+ blk->q2tBlkSizeMult = 3;
blk->countMult = 1;
}
else if (flags & tblastx)
{
blk->qLetMult = 3;
- blk->qSizeMult = 1;
+ blk->qCoordMult = 1;
blk->tLetMult = 3;
- blk->tSizeMult = 1;
+ blk->tCoordMult = 1;
+ blk->q2tBlkSizeMult = 1;
blk->countMult = 3;
}
else
{
blk->qLetMult = 1;
- blk->qSizeMult = 1;
+ blk->qCoordMult = 1;
blk->tLetMult = 1;
- blk->tSizeMult = 1;
+ blk->tCoordMult = 1;
+ blk->q2tBlkSizeMult = 1;
blk->countMult = 1;
}
}
@@ -131,31 +147,31 @@
tPtr++;
blk->alnEnd++;
}
-assert((blk->tSizeMult * (blk->qEnd - blk->qStart)) == (blk->qSizeMult * (blk->tEnd - blk->tStart)));
+// sanity test for blast blocks being the same length after conversion to the same units
+assert((blk->tLetMult * (blk->qEnd - blk->qStart)) == (blk->qLetMult * (blk->tEnd - blk->tStart)));
return TRUE;
}
static void addUngappedBlock(struct psl* psl, int* pslSpace, struct block* blk, unsigned flags)
/* add the next ungapped block to a psl */
{
unsigned newIBlk = psl->blockCount;
-unsigned blkSize = blk->qSizeMult * (blk->qEnd - blk->qStart);
+unsigned blkSize = blk->qEnd - blk->qStart; // uses query size so protein psl is right
if (newIBlk >= *pslSpace)
pslGrow(psl, pslSpace);
-psl->qStarts[newIBlk] = blk->qStart;
-psl->tStarts[newIBlk] = blk->tStart;
-/* uses query size so protein psl is right */
-psl->blockSizes[newIBlk] = blkSize;
+psl->qStarts[newIBlk] = blk->qCoordMult * blk->qStart;
+psl->tStarts[newIBlk] = blk->tCoordMult * blk->tStart;
+psl->blockSizes[newIBlk] = blk->qCoordMult * blkSize;
/* keep bounds current */
psl->qStart = psl->qStarts[0];
-psl->qEnd = psl->qStarts[newIBlk] + (blk->qSizeMult * psl->blockSizes[newIBlk]);
+psl->qEnd = psl->qStarts[newIBlk] + (blk->qCoordMult * blkSize);
if (psl->strand[0] == '-')
reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize);
psl->tStart = psl->tStarts[0];
-psl->tEnd = psl->tStarts[newIBlk] + (blk->tSizeMult * psl->blockSizes[newIBlk]);
+psl->tEnd = psl->tStarts[newIBlk] + (blk->q2tBlkSizeMult * blkSize);
if (psl->strand[1] == '-')
reverseIntRange(&psl->tStart, &psl->tEnd, psl->tSize);
if (flags & bldPslx)
@@ -259,18 +275,20 @@
{
if ((flags & tblastx) && (flags & bldPslx))
errAbort("can't convert TBLASTX to PSLX");
+struct block blk;
+blkInit(&blk, qStart, qAln, tStart, tAln, flags);
+
// construct psl
int pslSpace = 8;
char strand[3];
safef(strand, sizeof(strand), "%c%c", qStrand, tStrand);
-struct psl *psl = pslNew(qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd,
+struct psl *psl = pslNew(qName, blk.qCoordMult * qSize, blk.qCoordMult * qStart, blk.qCoordMult * qEnd,
+ tName, blk.tCoordMult * tSize, blk.tCoordMult * tStart, blk.tCoordMult * tEnd,
strand, pslSpace, ((flags & bldPslx) ? PSL_XA_FORMAT : 0));
// fill in psl
-struct block blk;
-blkInit(&blk, qStart, qAln, tStart, tAln, flags);
hspToBlocks(psl, &pslSpace, &blk, flags);
finishPsl(psl, flags);
return psl;
}