src/hg/pslPseudo/checkExp/checkExp.c 1.6
1.6 2009/08/02 21:26:09 baertsch
type checking and add -l ssl
Index: src/hg/pslPseudo/checkExp/checkExp.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/pslPseudo/checkExp/checkExp.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -b -B -U 1000000 -r1.5 -r1.6
--- src/hg/pslPseudo/checkExp/checkExp.c 6 Dec 2004 23:30:36 -0000 1.5
+++ src/hg/pslPseudo/checkExp/checkExp.c 2 Aug 2009 21:26:09 -0000 1.6
@@ -1,721 +1,723 @@
/* checkExp - check if retroGene aligns better to parent or retroGene */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "memalloc.h"
#include "psl.h"
#include "bed.h"
#include "hdb.h"
#include "axt.h"
#include "dnautil.h"
#include "dnaseq.h"
#include "nib.h"
#include "fa.h"
#include "dlist.h"
#include "dystring.h"
#include "binRange.h"
#include "options.h"
#include "genePred.h"
#include "genePredReader.h"
#include "dystring.h"
#include "pseudoGeneLink.h"
#include "verbose.h"
#include "chain.h"
#include "twoBit.h"
#include "chainToAxt.h"
static char const rcsid[] = "$Id$";
struct axtScoreScheme *ss = NULL; /* blastz scoring matrix */
struct dnaSeq *mrnaList = NULL; /* list of all input mrna sequences */
struct hash *pseudoHash = NULL, *mrnaHash = NULL, *chainHash = NULL, *faHash = NULL, *tHash = NULL;
int maxGap = 100;
struct dlList *fileCache = NULL;
struct hash *nibHash = NULL;
FILE *outFile = NULL; /* output tab sep file */
struct twoBitFile *twoBitFile = NULL;
struct misMatch
/* list of mismatches between gene and retrogene */
{
struct misMatch *next; /* next in list. */
int retroLoc; /* genomic location of mismatch in retroGene */
int parentLoc; /* genomic location of mismatch in parent Gene */
char retroBase; /* contents of base in retroGene */
char parentBase; /* contents of base in parent gene */
};
struct cachedFile
/* File in cache. */
{
struct cachedFile *next; /* next in list. */
char *name; /* File name (allocated here) */
FILE *f; /* Open file. */
};
struct seqFilePos
/* Where a sequence is in a file. */
{
struct filePos *next; /* Next in list. */
char *name; /* Sequence name. Allocated in hash. */
char *file; /* Sequence file name, allocated in hash. */
long pos; /* Position in fa file/size of nib. */
bool isNib; /* True if a nib file. */
};
void usage()
/* Print usage instructions and exit. */
{
errAbort(
"checkExp - check if retroGene aligns better to parent or retroGene \n"
"usage:\n"
" checkExp pseudoGeneLink.bed chrom.sizes all_mrna.psl target.lst mrna.2bit chrX_self.chain nibDir score.tab\n\n"
"where \n"
"pseudoGeneLink.bed is a list of retroGenes records \n"
"chrom.sizes is a list of chromosome followed by size\n"
"all_mrna.psl contains blat mRNA alignment file\n"
"target.lst contains list nib files\n"
"mrna.2bit contains fasta sequences of all mrnas \n"
"chrX_self.chain blastz Self chain\n"
"directory containing nibs for self alignment\n"
"score.tab is output\n"
);
}
void writeGap(struct dyString *aRes, int aGap, char *aSeq, struct dyString *bRes, int bGap, char *bSeq)
/* Write double - gap. Something like:
* --c
* ag- */
{
dyStringAppendMultiC(aRes, '-', bGap);
dyStringAppendN(bRes, bSeq, bGap);
dyStringAppendN(aRes, aSeq, aGap);
dyStringAppendMultiC(bRes, '-', aGap);
}
void writeInsert(struct dyString *aRes, struct dyString *bRes, char *aSeq, int gapSize)
/* Write out gap, possibly shortened, to aRes, bRes. */
{
dyStringAppendN(aRes, aSeq, gapSize);
dyStringAppendMultiC(bRes, '-', gapSize);
}
struct hash *readChainToBinKeeper(char *sizeFileName, char *fileName)
{
struct binKeeper *bk;
struct chain *chain;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct lineFile *sf = lineFileOpen(sizeFileName, TRUE);
struct hash *hash = newHash(0);
char *chromRow[2];
while (lineFileRow(sf, chromRow))
{
char *name = chromRow[0];
int size = lineFileNeedNum(sf, chromRow, 1);
if (hashLookup(hash, name) != NULL)
warn("Duplicate %s, ignoring all but first\n", name);
else
{
bk = binKeeperNew(0, size);
assert(size > 1);
hashAdd(hash, name, bk);
}
}
while ((chain = chainRead(lf)) != NULL)
{
bk = hashMustFindVal(hash, chain->tName);
binKeeperAdd(bk, chain->tStart, chain->tEnd, chain);
}
lineFileClose(&lf);
return hash;
}
struct dnaSeq *nibInfoLoadSeq(struct nibInfo *nib, int start, int size)
/* Load in a sequence in mixed case from nib file. */
{
return nibLdPartMasked(NIB_MASK_MIXED, nib->fileName, nib->f, nib->size,
start, size);
}
struct dnaSeq *nibInfoLoadStrand(struct nibInfo *nib, int start, int end,
char strand)
/* Load in a mixed case sequence from nib file, from reverse strand if
* strand is '-'. */
{
struct dnaSeq *seq;
int size = end - start;
assert(size >= 0);
if (strand == '-')
{
reverseIntRange(&start, &end, nib->size);
seq = nibInfoLoadSeq(nib, start, size);
reverseComplement(seq->dna, seq->size);
}
else
{
seq = nibInfoLoadSeq(nib, start, size);
}
return seq;
}
/*
struct hash *readPslToHash(char *pslFileName)
{
int size;
struct psl *psl;
struct lineFile *pf = lineFileOpen(pslFileName , TRUE);
struct hash *hash = newHash(0);
char *chromRow[2];
char *row[21] ;
}
while (lineFileNextRow(pf, row, ArraySize(row)))
{
psl = pslLoad(row);
if (hashFindVal(hash, psl->qName) == NULL)
hashAdd(hash, name, psl);
}
lineFileClose(&pf);
return hash;
}
*/
struct dnaSeq *readSeqFromFaPos(struct seqFilePos *sfp, FILE *f)
/* Read part of FA file. */
{
struct dnaSeq *seq;
fseek(f, sfp->pos, SEEK_SET);
if (!faReadNext(f, "", TRUE, NULL, &seq))
errAbort("Couldn't faReadNext on %s in %s\n", sfp->name, sfp->file);
return seq;
}
FILE *openFromCache(struct dlList *cache, char *fileName)
/* Return open file handle via cache. The simple logic here
* depends on not more than N files being returned at once. */
{
static int maxCacheSize=32;
int cacheSize = 0;
struct dlNode *node;
struct cachedFile *cf;
/* First loop through trying to find it in cache, counting
* cache size as we go. */
for (node = cache->head; !dlEnd(node); node = node->next)
{
++cacheSize;
cf = node->val;
if (sameString(fileName, cf->name))
{
dlRemove(node);
dlAddHead(cache, node);
return cf->f;
}
}
/* If cache has reached max size free least recently used. */
if (cacheSize >= maxCacheSize)
{
node = dlPopTail(cache);
cf = node->val;
carefulClose(&cf->f);
freeMem(cf->name);
freeMem(cf);
freeMem(node);
}
/* Cache new file. */
AllocVar(cf);
cf->name = cloneString(fileName);
cf->f = mustOpen(fileName, "rb");
dlAddValHead(cache, cf);
return cf->f;
}
void readCachedSeqPart(char *seqName, int start, int size,
struct hash *hash, struct dlList *fileCache,
struct dnaSeq **retSeq, int *retOffset, boolean *retIsNib)
/* Read sequence hopefully using file cashe. If sequence is in a nib
* file just read part of it. */
{
struct seqFilePos *sfp = hashMustFindVal(hash, seqName);
FILE *f = openFromCache(fileCache, sfp->file);
unsigned options = NIB_MASK_MIXED;
if (sfp->isNib)
{
*retSeq = nibLdPartMasked(options, sfp->file, f, sfp->pos, start, size);
*retOffset = start;
*retIsNib = TRUE;
}
else
{
*retSeq = readSeqFromFaPos(sfp, f);
*retOffset = 0;
*retIsNib = FALSE;
}
}
struct axt *axtCreate(char *q, char *t, int size, struct psl *psl)
/* create axt */
{
int qs = psl->qStart, qe = psl->qEnd;
int ts = psl->tStart, te = psl->tEnd;
int symCount = 0;
struct axt *axt = NULL;
AllocVar(axt);
if (psl->strand[0] == '-')
reverseIntRange(&qs, &qe, psl->qSize);
if (psl->strand[1] == '-')
reverseIntRange(&ts, &te, psl->tSize);
axt->qName = cloneString(psl->qName);
axt->tName = cloneString(psl->tName);
axt->qStart = qs+1;
axt->qEnd = qe;
axt->qStrand = psl->strand[0];
axt->tStrand = '+';
if (psl->strand[1] != 0)
{
axt->tStart = ts+1;
axt->tEnd = te;
}
else
{
axt->tStart = psl->tStart+1;
axt->tEnd = psl->tEnd;
}
axt->symCount = symCount = strlen(t);
axt->tSym = cloneString(t);
if (strlen(q) != symCount)
warn("Symbol count %d != %d inconsistent at t %s:%d and qName %s\n%s\n%s\n",
- symCount, strlen(q), psl->tName, psl->tStart, psl->qName, t, q);
+ symCount, (int)strlen(q), psl->tName, psl->tStart, psl->qName, t, q);
axt->qSym = cloneString(q);
axt->score = axtScoreFilterRepeats(axt, ss);
verbose(1,"axt score = %d\n",axt->score);
//for (i=0; i<size ; i++)
// fputc(t[i],f);
//for (i=0; i<size ; i++)
// fputc(q[i],f);
return axt;
}
struct axt *pslToAxt(struct psl *psl, struct hash *qHash, char *tNibDir,
struct dlList *fileCache)
{
static char *tName = NULL, *qName = NULL;
static struct dnaSeq *tSeq = NULL;
struct dyString *q = newDyString(16*1024);
struct dyString *t = newDyString(16*1024);
int blockIx;
int qs, ts ;
int lastQ = 0, lastT = 0, size;
int qOffset = 0;
int tOffset = 0;
struct axt *axt = NULL;
boolean qIsNib = FALSE;
boolean tIsNib = FALSE;
int cnt = 0;
//struct dnaSeq *tSeq = NULL;
struct nibInfo *tNib = NULL;
struct dnaSeq *qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, 0, 0);
// hGenBankGetMrna(psl->qName, NULL);
/*
freeDnaSeq(&qSeq);
freez(&qName);
assert(mrnaList != NULL);
for (mrna = mrnaList; mrna != NULL ; mrna = mrna->next)
{
assert(mrna != NULL);
cnt++;
if (sameString(mrna->name, psl->qName))
{
qSeq = cloneDnaSeq(mrna);
assert(qSeq != NULL);
break;
}
}
*/
if (qSeq == NULL)
{
warn("mrna sequence data not found %s, searched %d sequences\n",psl->qName,cnt);
dyStringFree(&q);
dyStringFree(&t);
dnaSeqFree(&tSeq);
dnaSeqFree(&qSeq);
return NULL;
}
if (qSeq->size != psl->qSize)
{
warn("sequence %s aligned is different size %d from mrna.fa file %d \n",psl->qName,psl->qSize,qSeq->size);
dyStringFree(&q);
dyStringFree(&t);
dnaSeqFree(&tSeq);
dnaSeqFree(&qSeq);
return NULL;
}
qName = cloneString(psl->qName);
if (qIsNib && psl->strand[0] == '-')
qOffset = psl->qSize - psl->qEnd;
else
qOffset = 0;
verbose(5,"qString len = %d qOffset = %d\n",qSeq->size,qOffset);
if (tName == NULL || !sameString(tName, psl->tName) || tIsNib)
{
freeDnaSeq(&tSeq);
freez(&tName);
tName = cloneString(psl->tName);
tNib = nibInfoFromCache(nibHash, tNibDir, tName);
assert(tNib !=NULL);
tSeq = nibInfoLoadStrand(tNib, psl->tStart, psl->tEnd, '+');
assert(tSeq !=NULL);
tOffset = psl->tStart;
//readCachedSeqPart(tName, psl->tStart, psl->tEnd-psl->tStart,
// tHash, fileCache, &tSeq, &tOffset, &tIsNib);
}
verbose(4,"strand t %s \n",psl->strand);
if (tSeq != NULL)
verbose(5,"tString len = %d tOffset = %d\n",tSeq->size,tOffset);
else
errAbort("tSeq is NULL\n");
if (psl->strand[0] == '-')
reverseComplement(qSeq->dna, qSeq->size);
//if (strlen(psl->strand) > 1 )
// if (psl->strand[1] == '-')
// reverseComplement(tSeq->dna, tSeq->size);
for (blockIx=0; blockIx < psl->blockCount; ++blockIx)
{
qs = psl->qStarts[blockIx] - qOffset;
ts = psl->tStarts[blockIx] - tOffset;
if (blockIx != 0)
{
int qGap, tGap, minGap;
qGap = qs - lastQ;
tGap = ts - lastT;
minGap = min(qGap, tGap);
if (minGap > 0)
{
writeGap(q, qGap, qSeq->dna + lastQ, t, tGap, tSeq->dna + lastT);
}
else if (qGap > 0)
{
writeInsert(q, t, qSeq->dna + lastQ, qGap);
}
else if (tGap > 0)
{
writeInsert(t, q, tSeq->dna + lastT, tGap);
}
}
size = psl->blockSizes[blockIx];
assert(qSeq != NULL);
dyStringAppendN(q, qSeq->dna + qs, size);
lastQ = qs + size;
dyStringAppendN(t, tSeq->dna + ts, size);
lastT = ts + size;
}
if (strlen(q->string) != strlen(t->string))
warn("Symbol count(t) %d != %d inconsistent at t %s:%d and qName %s\n%s\n%s\n",
- strlen(t->string), strlen(q->string), psl->tName, psl->tStart, psl->qName, t->string, q->string);
+ (int)strlen(t->string), (int)strlen(q->string), psl->tName, psl->tStart, psl->qName, t->string, q->string);
if (psl->strand[0] == '-')
{
reverseComplement(q->string, q->stringSize);
reverseComplement(t->string, t->stringSize);
}
axt = axtCreate(q->string, t->string, min(q->stringSize,t->stringSize), psl);
dyStringFree(&q);
dyStringFree(&t);
//dnaSeqFree(&tSeq);
dnaSeqFree(&qSeq);
if (qIsNib)
freez(&qName);
//if (tIsNib)
// freez(&tName);
return axt;
}
struct misMatch *matchFound(struct misMatch **misMatchList, int retroLoc)
{
struct misMatch *el;
for (el = *misMatchList; el != NULL ; el = el->next)
{
verbose(5,"%d == %d %c %c\n",el->retroLoc, retroLoc, el->retroBase, el->parentBase);
if (el->retroLoc == retroLoc && el->retroBase != '-' && el->parentBase != '-')
return el;
}
return NULL;
}
void addMisMatch(struct misMatch **misMatchList, struct axt *axt, int qSize)
{
int i;
struct misMatch *misMatch = NULL;
for (i = 0 ; i <= axt->symCount ; i++)
{
if (axt->tSym[i] != axt->qSym[i])
{
/* check to see if we already have this one
* if not, add it */
if (matchFound(misMatchList, axt->tStart+i) == NULL)
{
AllocVar(misMatch);
misMatch->retroLoc = axt->tStart+i;
if (axt->qStrand == '+')
misMatch->parentLoc = axt->qStart+i;
else
misMatch->parentLoc = qSize-(axt->qStart+i);
misMatch->retroBase = axt->tSym[i];
misMatch->parentBase = axt->qSym[i];
slAddHead(misMatchList, misMatch);
verbose(5, "listLen = %d\n",slCount(misMatchList));
verbose(4,"add [%d] %d %d R %c P %c\n",
i,
misMatch->retroLoc ,
misMatch->parentLoc,
misMatch->retroBase,
misMatch->parentBase);
}
}
else
{
verbose(8,"don't add [%d] %d \n",i, axt->tStart+i);
}
}
}
void checkExp(char *bedFileName, char *tNibDir, char *nibList)
{
struct lineFile *bf = lineFileOpen(bedFileName , TRUE), *af = NULL;
char *row[PSEUDOGENELINK_NUM_COLS] ;
struct pseudoGeneLink *ps;
char *tmpName[512], cmd[512];
struct axt *axtList = NULL, *axt, *mAxt = NULL;
struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seqList = NULL;
struct nibInfo *qNib = NULL, *tNib = NULL;
FILE *op;
int ret;
if (nibHash == NULL)
nibHash = hashNew(0);
while (lineFileNextRow(bf, row, ArraySize(row)))
{
struct misMatch *misMatchList = NULL;
struct binKeeper *bk = NULL;
struct binElement *el, *elist = NULL;
struct psl *mPsl = NULL, *rPsl = NULL, *pPsl = NULL, *psl ;
struct misMatch *mf = NULL;
ps = pseudoGeneLinkLoad(row);
tmpName[0] = cloneString(ps->name);
chopByChar(tmpName[0], '.', tmpName, sizeof(tmpName));
verbose(2,"name %s %s:%d-%d\n",
ps->name, ps->chrom, ps->chromStart,ps->chromEnd);
/* get expressed retro from hash */
bk = hashFindVal(mrnaHash, ps->chrom);
elist = binKeeperFindSorted(bk, ps->chromStart, ps->chromEnd ) ;
for (el = elist; el != NULL ; el = el->next)
{
rPsl = el->val;
verbose(2,"retroGene %s %s:%d-%d\n",rPsl->qName, ps->chrom, ps->chromStart,ps->chromEnd);
}
/* find mrnas that overlap parent gene */
bk = hashFindVal(mrnaHash, ps->gChrom);
elist = binKeeperFindSorted(bk, ps->gStart , ps->gEnd ) ;
for (el = elist; el != NULL ; el = el->next)
{
pPsl = el->val;
verbose(2,"parent %s %s:%d %d,%d\n",
pPsl->qName, pPsl->tName,pPsl->tStart,
pPsl->match, pPsl->misMatch);
}
/* find self chain */
bk = hashFindVal(chainHash, ps->chrom);
elist = binKeeperFind(bk, ps->chromStart , ps->chromEnd ) ;
slSort(&elist, chainCmpScoreDesc);
for (el = elist; el != NULL ; el = el->next)
{
struct chain *chain = el->val, *subChain, *retChainToFree, *retChainToFree2;
int qs = chain->qStart;
int qe = chain->qEnd;
int id = chain->id;
if (chain->qStrand == '-')
{
qs = chain->qSize - chain->qEnd;
qe = chain->qSize - chain->qStart;
}
if (!sameString(chain->qName , ps->gChrom) ||
!positiveRangeIntersection(qs, qe, ps->gStart, ps->gEnd))
{
verbose(2," wrong chain %s:%d-%d %s:%d-%d parent %s:%d-%d\n",
chain->qName, qs, qe,
chain->tName,chain->tStart,chain->tEnd,
ps->gChrom,ps->gStart,ps->gEnd);
continue;
}
verbose(2,"chain id %d %4.0f",chain->id, chain->score);
chainSubsetOnT(chain, ps->chromStart+7, ps->chromEnd-7,
&subChain, &retChainToFree);
if (subChain != NULL)
chain = subChain;
chainSubsetOnQ(chain, ps->gStart, ps->gEnd,
&subChain, &retChainToFree2);
if (subChain != NULL)
chain = subChain;
if (chain->qStrand == '-')
{
qs = chain->qSize - chain->qEnd;
qe = chain->qSize - chain->qStart;
}
verbose(2," %s:%d-%d %s:%d-%d ",
chain->qName, qs, qe,
chain->tName,chain->tStart,chain->tEnd);
if (subChain != NULL)
verbose(2,"subChain %s:%d-%d %s:%d-%d\n",
subChain->qName, subChain->qStart, subChain->qEnd,
subChain->tName,subChain->tStart,subChain->tEnd);
qNib = nibInfoFromCache(nibHash, tNibDir, chain->qName);
tNib = nibInfoFromCache(nibHash, tNibDir, chain->tName);
tSeq = nibInfoLoadStrand(tNib, chain->tStart, chain->tEnd, '+');
qSeq = nibInfoLoadStrand(qNib, chain->qStart, chain->qEnd, chain->qStrand);
axtList = chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart,
maxGap, BIGNUM);
verbose(2,"axt count %d misMatch cnt %d\n",slCount(axtList), slCount(misMatchList));
for (axt = axtList; axt != NULL ; axt = axt->next)
{
addMisMatch(&misMatchList, axt, chain->qSize);
}
verbose(2,"%d in mismatch list %s id %d \n",slCount(misMatchList), chain->qName, id);
chainFree(&retChainToFree);
chainFree(&retChainToFree2);
break;
}
/* create axt of each expressed retroGene to parent gene */
/* get alignment for each mrna overlapping retroGene */
bk = hashFindVal(mrnaHash, ps->chrom);
elist = binKeeperFindSorted(bk, ps->chromStart , ps->chromEnd ) ;
{
char queryName[512];
char axtName[512];
char pslName[512];
safef(queryName, sizeof(queryName), "/tmp/query.%s.fa", ps->chrom);
safef(axtName, sizeof(axtName), "/tmp/tmp.%s.axt", ps->chrom);
safef(pslName, sizeof(pslName), "/tmp/tmp.%s.psl", ps->chrom);
op = fopen(pslName,"w");
for (el = elist ; el != NULL ; el = el->next)
{
psl = el->val;
pslOutput(psl, op, '\t','\n');
qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, 0, 0);
if (qSeq != NULL)
slAddHead(&seqList, qSeq);
else
errAbort("seq %s not found \n", psl->qName);
}
fclose(op);
faWriteAll(queryName, seqList);
safef(cmd,sizeof(cmd),"pslPretty -long -axt %s %s %s %s",pslName , nibList, queryName, axtName);
ret = system(cmd);
if (ret != 0)
errAbort("ret is %d %s\n",ret,cmd);
verbose(2, "ret is %d %s\n",ret,cmd);
af = lineFileOpen(axtName, TRUE);
while ((axt = axtRead(af)) != NULL)
slAddHead(&mAxt, axt);
lineFileClose(&af);
}
slReverse(&mAxt);
/* for each parent/retro pair, count bases matching retro and parent better */
for (el = elist; el != NULL ; el = el->next)
{
int i, scoreRetro=0, scoreParent=0, scoreNeither=0;
struct dyString *parentMatch = newDyString(16*1024);
struct dyString *retroMatch = newDyString(16*1024);
mPsl = el->val;
if (mAxt != NULL)
{
verbose(2,"mrna %s %s:%d %d,%d axt %s\n",
mPsl->qName, mPsl->tName,mPsl->tStart,
mPsl->match, mPsl->misMatch,
mAxt->qName);
assert(sameString(mPsl->qName, mAxt->qName));
for (i = 0 ; i< (mPsl->tEnd-mPsl->tStart) ; i++)
{
int j = mAxt->tStart - mPsl->tStart;
verbose(5, "listLen = %d\n",slCount(&misMatchList));
if ((mf = matchFound(&misMatchList, (mPsl->tStart)+i)) != NULL)
+ {
if (toupper(mf->retroBase) == toupper(mAxt->qSym[j+i]))
{
verbose (3,"match retro[%d] %d %c == %c parent %c %d\n",
i,mf->retroLoc, mf->retroBase, mAxt->qSym[j+i],
mf->parentBase, mf->parentLoc);
dyStringPrintf(retroMatch, "%d,", mf->retroLoc);
scoreRetro++;
}
else if (toupper(mf->parentBase) == toupper(mAxt->qSym[j+i]))
{
verbose (3,"match parent[%d] %d %c == %c retro %c %d\n",
i,mf->parentLoc, mf->parentBase, mAxt->qSym[j+i],
mf->retroBase, mf->retroLoc);
dyStringPrintf(parentMatch, "%d,", mf->parentLoc);
scoreParent++;
}
else
{
verbose (3,"match neither[%d] %d %c != %c retro %c %d\n",
i,mf->parentLoc, mf->parentBase, mAxt->tSym[j+i],
mf->retroBase, mf->retroLoc);
scoreNeither++;
}
}
+ }
verbose(2,"final score %s parent %d retro %d neither %d\n",
mPsl->qName, scoreParent, scoreRetro, scoreNeither);
fprintf(outFile,"%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%d\t%d\t%s\t%s\n",
ps->chrom, ps->chromStart, ps->chromEnd, ps->name, ps->score,
mPsl->tName, mPsl->tStart, mPsl->tEnd, mPsl->qName,
scoreParent, scoreRetro, scoreNeither, parentMatch->string, retroMatch->string);
mAxt = mAxt->next;
}
dyStringFree(&parentMatch);
dyStringFree(&retroMatch);
}
}
}
int main(int argc, char *argv[])
/* Process command line. */
{
optionHash(&argc, argv);
if (argc != 9)
usage();
fileCache = newDlList();
maxGap = optionInt("maxGap", maxGap);
verboseSetLogFile("stdout");
ss = axtScoreSchemeDefault();
verbose(1,"Reading alignments from %s\n",argv[3]);
mrnaHash = readPslToBinKeeper(argv[2], argv[3]);
twoBitFile = twoBitOpen(argv[5]);
//verbose(1,"Reading alignments from %s\n",argv[]);
//pseudoHash = readPslToBinKeeper(argv[3], argv[]);
//verbose(1,"Reading mRNA sequences from %s\n",argv[5]);
//mrnaList = faReadAllMixed(argv[5]);
//if (mrnaList == NULL)
//errAbort("could not open %s\n",argv[5]);
//faHash = newHash(0);
//for (el = mrnaList; el != NULL ; el = el->next)
//hashAdd(faHash, el->name, el);
verbose(1,"Reading chains from %s\n",argv[6]);
chainHash = readChainToBinKeeper(argv[2], argv[6]);
outFile = fopen(argv[8],"w");
verbose(1,"Scoring %s\n",argv[1]);
checkExp(argv[1], argv[7], argv[4]);
fclose(outFile);
freeDnaSeqList(&mrnaList);
return(0);
}