src/hg/genePredToFakePsl/genePredToFakePsl.c 1.4
1.4 2010/04/28 04:46:37 markd
Fixed incorrect calculation of CDS
refactor
Index: src/hg/genePredToFakePsl/genePredToFakePsl.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/genePredToFakePsl/genePredToFakePsl.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 4 -r1.3 -r1.4
--- src/hg/genePredToFakePsl/genePredToFakePsl.c 3 Sep 2008 19:18:39 -0000 1.3
+++ src/hg/genePredToFakePsl/genePredToFakePsl.c 28 Apr 2010 04:46:37 -0000 1.4
@@ -32,87 +32,85 @@
"e.g. NM_123456 34..305\n"
"\n");
}
-void fakePslFromGenePred(char *db, char *fileTbl, char *pslOut, char *cdsOut)
-/* check a genePred */
+static void cnvGenePredCds(struct genePred *gp, int qSize, FILE *cdsFh)
+/* determine CDS and output */
{
-struct sqlConnection *conn = NULL;
-struct genePredReader *gpr;
-struct genePred *gp;
-int iRec = 0;
-int chromSize = -1;
-FILE *out = mustOpen(pslOut, "w");
-FILE *cds = mustOpen(cdsOut, "w");
+int e, off = 0;
+int qCdsStart = -1, qCdsEnd = -1;
+int eCdsStart, eCdsEnd;
-conn = hAllocConn(db);
-
-if (fileExists(fileTbl))
+for (e = 0; e < gp->exonCount; ++e)
{
- gpr = genePredReaderFile(fileTbl, NULL);
- }
-else
+ if (genePredCdsExon(gp, e, &eCdsStart, &eCdsEnd))
{
- gpr = genePredReaderQuery(conn, fileTbl, NULL);
+ if (qCdsStart < 0)
+ qCdsStart = off + (eCdsStart - gp->exonStarts[e]);
+ qCdsEnd = off + (eCdsEnd - gp->exonStarts[e]);
+ }
+ off += gp->exonEnds[e] - gp->exonStarts[e];
}
+if (gp->strand[0] == '-')
+ reverseIntRange(&qCdsStart, &qCdsEnd, qSize);
+fprintf(cdsFh,"%s\t%d..%d\n", gp->name, qCdsStart+1, qCdsEnd); /* genbank cds is closed 1-based */
+}
-while ((gp = genePredReaderNext(gpr)) != NULL)
- {
- struct psl *psl;
- int e = 0, qSize=0, qCdsStart=0, qCdsEnd=0;
+static void cnvGenePred(char *db, struct genePred *gp, FILE *pslFh, FILE *cdsFh)
+/* convert a genePred to a psl and CDS */
+{
+int chromSize = hChromSize(db, gp->chrom);
+int e = 0, qSize=0;
- iRec++;
- chromSize = hChromSize(db, gp->chrom);
- for (e = 0; e < gp->exonCount; ++e)
+for (e = 0; e < gp->exonCount; ++e)
qSize+=(gp->exonEnds[e] - gp->exonStarts[e]);
- psl = pslNew(gp->name, qSize, 0, qSize,
+struct psl *psl = pslNew(gp->name, qSize, 0, qSize,
gp->chrom, chromSize, gp->txStart, gp->txEnd,
gp->strand, gp->exonCount, 0);
- psl->blockCount = gp->exonCount;
- for (e = 0; e < gp->exonCount; ++e)
+psl->blockCount = gp->exonCount;
+for (e = 0; e < gp->exonCount; ++e)
{
psl->blockSizes[e] = (gp->exonEnds[e] - gp->exonStarts[e]);
psl->qStarts[e] = e==0 ? 0 : psl->qStarts[e-1] + psl->blockSizes[e-1];
psl->tStarts[e] = gp->exonStarts[e];
}
- psl->match = qSize;
- psl->tNumInsert = psl->blockCount-1;
- psl->tBaseInsert = (gp->txEnd - gp->txStart) - qSize;
- pslTabOut(psl, out);
- qCdsStart = gp->cdsStart - gp->txStart;
- qCdsEnd = gp->cdsEnd - gp->txStart;
+psl->match = qSize;
+psl->tNumInsert = psl->blockCount-1;
+psl->tBaseInsert = (gp->txEnd - gp->txStart) - qSize;
+pslTabOut(psl, pslFh);
+pslFree(&psl);
+if (gp->cdsStart < gp->cdsEnd)
+ cnvGenePredCds(gp, qSize, cdsFh);
+}
- for (e = 1; e < gp->exonCount; ++e)
+static void fakePslFromGenePred(char *db, char *fileTbl, char *pslOut, char *cdsOut)
+/* check a genePred */
+{
+struct sqlConnection *conn = NULL;
+struct genePredReader *gpr;
+struct genePred *gp;
+FILE *pslFh = mustOpen(pslOut, "w");
+FILE *cdsFh = mustOpen(cdsOut, "w");
+
+conn = hAllocConn(db);
+
+if (fileExists(fileTbl))
{
- int intronSize = gp->exonStarts[e] - gp->exonEnds[e-1];
- if (gp->exonStarts[e] <= gp->cdsStart)
- qCdsStart -= intronSize;
- if (gp->exonStarts[e] <= gp->cdsEnd)
- qCdsEnd -= intronSize;
+ gpr = genePredReaderFile(fileTbl, NULL);
}
-
- if (gp->strand[0] == '-')
+else
{
- int temp = qCdsEnd;
- qCdsEnd = qSize - qCdsStart;
- qCdsStart = qSize - temp;
+ gpr = genePredReaderQuery(conn, fileTbl, NULL);
}
- fprintf(cds,"%s\t", gp->name);
- //if (gp->strand[0] == '-')
- // fprintf(cds,"complement(");
-
- fprintf(cds,"%d..%d", qCdsStart+1, qCdsEnd); /* genbank cds is closed 1-based */
-
- //if (gp->strand[0] == '-')
- // fprintf(cds,")");
- fprintf(cds,"\n");
-
- pslFree(&psl);
+while ((gp = genePredReaderNext(gpr)) != NULL)
+ {
+ cnvGenePred(db, gp, pslFh, cdsFh);
}
genePredReaderFree(&gpr);
-if (conn != NULL)
- hFreeConn(&conn);
+hFreeConn(&conn);
+carefulClose(&pslFh);
+carefulClose(&cdsFh);
}
int main(int argc, char *argv[])
/* Process command line. */