src/hg/pslCDnaGenomeMatch/pslCDnaGenomeMatch.c fc93e4649cf7f0c6b3ac1cf3bf176e1b39012cd5

fc93e4649cf7f0c6b3ac1cf3bf176e1b39012cd5
hartera
  Sat Jan 3 15:58:54 2015 -0800
Changed code to use only 2bit files instead of nib files.
diff --git src/hg/pslCDnaGenomeMatch/pslCDnaGenomeMatch.c src/hg/pslCDnaGenomeMatch/pslCDnaGenomeMatch.c
index 628f500..a99877f 100644
--- src/hg/pslCDnaGenomeMatch/pslCDnaGenomeMatch.c
+++ src/hg/pslCDnaGenomeMatch/pslCDnaGenomeMatch.c
@@ -1,29 +1,29 @@
 /* pslCDnaGenomeMatch - check if retroGene aligns better to parent or retroGene */
 
 /* Copyright (C) 2011 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "memalloc.h"
 #include "psl.h"
 #include "hdb.h"
 #include "axt.h"
 #include "dnautil.h"
 #include "dnaseq.h"
-#include "nib.h"
+#include "twoBit.h"
 #include "fa.h"
 #include "dlist.h"
 #include "dystring.h"
 #include "binRange.h"
 #include "options.h"
 #include "genePred.h"
 #include "genePredReader.h"
 #include "dystring.h"
 #include "pseudoGeneLink.h"
 #include "verbose.h"
 #include "nibTwo.h"
 #include "bed.h"
 #include "snp125.h"
 #include "pipeline.h"
 #define MINDIFF 5
@@ -34,35 +34,36 @@
 static char na[3] = "NA";
 struct axtScoreScheme *ss = NULL; /* blastz scoring matrix */
 struct hash *snpHash = NULL, *mrnaHash = NULL, *faHash = NULL, *tHash = NULL, *species1Hash = NULL, *species2Hash = NULL;
 int maxGap = 100;
 int minDiff = MINDIFF;
 int notAlignPenalty = NOTALIGNEDFACTOR;
 int aliCount = 0; /* number of alignments read */
 int mrnaCount = 0; /* number of mrna read */
 int filterCount = 0; /* number of mrna where best hit can be determined  */
 int outputCount = 0; /* number of alignments written */
 int verbosity = 1;
 int lociCounter = 0;
 bool passthru = FALSE;
 bool computeSS = FALSE;
 struct dlList *fileCache = NULL;
-struct hash *nibHash = NULL;
+//struct hash *nibHash = NULL;
 FILE *outFile = NULL; /* output tab sep file */
 FILE *bedFile = NULL; /* output bed file */
 FILE *scoreFile = NULL; /* output score file */
-struct twoBitFile *twoBitFile = NULL;
+struct twoBitFile *cdnaSeqFile = NULL;
+struct twoBitFile *genomeSeqFile = NULL;
 char *species1 = NULL;
 char *species2 = NULL;
 char *nibdir1 = NULL;
 char *nibdir2 = NULL;
 char *mrna1 = NULL;
 char *mrna2 = NULL;
 char *bedOut = NULL;
 char *scoreOut = NULL;
 char *snpFile = NULL; /* snp tab file (browser format)*/
 int histogram[256][256];       /* histogram of counts for suff statistics, index is a,c,g,t,-,. */
 float  histoNorm[256][256]; /* normalized histogram for suff statistics, index is a,c,g,t,-,. */
 
 /* command line */
 static struct optionSpec optionSpecs[] = {
     {"species1", OPTION_STRING},
@@ -109,66 +110,57 @@
     char **snpObs;              /* snp observed bases */
     char **snps;                /* snp ids */
     /* not used */
     int retroLoc;               /* genomic location of mismatch in retroGene */
     int parentLoc;              /* genomic location of mismatch in parent Gene */
     char retroBase;             /* contents of base in retroGene */
     char parentBase;            /* contents of base in parent gene */
     };
 
 struct alignment
 /* alignment with context info */
     {
     struct alignment *next;
     struct psl *psl;        /* alignment of mrna for this species */
     char *db;                   /* name of database of species */
-    char *nibDir;               /* directory containing nib files */
+    struct twoBitFile *twoBitSeqFile;           /* open two bit file */
     char *mrnaPath;             /* path to location of mrna sequence */
     struct dnaSeq *tSeq;        /* sequence in genome */
     struct dnaSeq *qSeq;        /* mrna sequence */
     char **qSequence;  /* query sequence for each block */
     char **tSequence;  /* target sequence for each block */
     };
 
 struct cachedFile
 /* File in cache. */
     {
     struct cachedFile *next;	/* next in list. */
     char *name;		/* File name (allocated here) */
     FILE *f;		/* Open file. */
     };
 
-struct seqFilePos
-/* Where a sequence is in a file. */
-    {
-    struct filePos *next;	/* Next in list. */
-    char *name;	/* Sequence name. Allocated in hash. */
-    char *file;	/* Sequence file name, allocated in hash. */
-    long pos; /* Position in fa file/size of nib. */
-    bool isNib;	/* True if a nib file. */
-    };
 void usage()
 /* Print usage instructions and exit. */
 {
 errAbort(
     "pslCDnaGenomeMatch - check if retroGene aligns better to parent or retroGene \n"
     "usage:\n"
-    "    pslCDnaGenomeMatch input.psl chrom.sizes cdna.2bit nibDir output.psl\n"
+    "    pslCDnaGenomeMatch input.psl chrom.sizes cdna.2bit genome.2bit output.psl\n"
     "input.psl contains input mRNA/EST alignment psl file SORTED by qName\n"
     "chrom.sizes is a list of chromosome followed by size\n"
-    "cdna.2bit contains fasta sequences of all mrnas/EST \n"
-    "directory containing nibs of genome\n"
+    "cdna.2bit contains fasta sequences of all mrnas/EST in 2bit format\n"
+    "genome.2bit contains fasta genome sequences in 2bit format\n"
     "output.psl contains filtered alignments for best matches and cases where no filtering occurred.\n\n"
     "Options: \n"
     "    -score=output.tab  is output containing mismatch info\n" 
     "    -computeSS compute sufficient statistics instead of filtering\n"
     "    -passthru  if best hit cannot be decided, then pass through all alignments\n"
     "    -minDiff=N minimum difference in score to filter out 2nd best hit (default %d)\n"
     "    -notAlignPenalty=N score non-aligning bases with 1/N (default %d)\n"
     "    -bedOut=bed output file of mismatches.\n"
     "    -species1=psl file with alignment of mrna/EST to other species.\n"
     "    -species2=psl file with alignment of mrna/EST to other species.\n"
     "    -snp=snp.tab.gz contains snps with or without bin field in snp125 format\n"
     "    -nibdir1=sequence of species 1 \n"
     "    -nibdir2=sequence of species 2 \n"
     "    -mrna1=fasta file with sequence of mrna/EST used in alignment 1\n"
     "    -mrna2=fasta file with sequence of mrna/EST used in alignment 2\n",
@@ -178,103 +170,99 @@
 void alignFree(struct alignment  **pEl)
 /* Free a single dynamically allocated alignment */
 {
 struct alignment *el;
 struct psl *p;
 struct dnaSeq *tSeq;
 struct dnaSeq *qSeq;
 
 if ((el = *pEl) == NULL) return;
 p = el->psl;
 pslFree(&p);
 tSeq = el->tSeq;
 qSeq = el->qSeq;
 freeDnaSeq(&tSeq);
 freeDnaSeq(&qSeq);
-//freeMem(el->db);
 freez(pEl);
 }
 
 void alignFreeList(struct alignment **pList)
 /* Free a list of dynamically allocated alignment's */
 {
 struct alignment *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     //pslFreeList(&el->psl);
     alignFree(&el);
     }
 *pList = NULL;
 }
 
-struct dnaSeq *nibInfoLoadSeq(struct nibInfo *nib, int start, int size)
-/* Load in a sequence in mixed case from nib file. */
-{
-return nibLdPartMasked(NIB_MASK_MIXED, nib->fileName, nib->f, nib->size, 
-	start, size);
-}
-
 bool purine(char a)
 {
 if (toupper(a) == 'A' || toupper(a) == 'G')
     return(TRUE);
 else
     return(FALSE);
 }
 
 bool transversion(char a, char b)
 {
 if (purine(a) != purine(b))
     return TRUE;
 else
     return FALSE;
 }
 
 bool transition(char a, char b)
 {
 if (!transversion(a,b))
     return TRUE;
 else
     return FALSE;
 }
 
-struct dnaSeq *nibInfoLoadStrand(struct nibInfo *nib, char *seqName, int start, int end,
+struct dnaSeq *twoBitGetSeqForStrand(struct twoBitFile *tbf, char *seqName, int start, int end,
 	char strand)
-/* Load in a mixed case sequence from nib file, from reverse strand if
+/* Load in a mixed case sequence from from a twoBit file, from reverse strand if
  * strand is '-'. */
 {
 struct dnaSeq *seq;
 int size = end - start;
+int seqSize = twoBitSeqSize(tbf, seqName);
 assert(size >= 0);
+
 if (strand == '-')
     {
     verbose(6,"before start %d end %d size %d %s\n",
-            start, end, nib->size, seqName);
-    reverseIntRange(&start, &end, nib->size);
+            start, end, size, seqName);
+    reverseIntRange(&start, &end, seqSize);
     verbose(6,"after start %d end %d size %d %s\n",
-            start, end, nib->size, seqName);
+            start, end, size, seqName);
     assert(start >= 0);
-    seq = nibInfoLoadSeq(nib, start, size);
+    seq = twoBitReadSeqFrag(tbf, seqName, start, end);
+    //seq = nibInfoLoadSeq(nib, start, size);
 //    seq = nibTwoCacheSeqPart(ntc, seqName, 
 //	start, size, &retFullSeqSize);
     reverseComplement(seq->dna, seq->size);
     }
 else
     {
-    seq = nibInfoLoadSeq(nib, start, size);
+    seq = twoBitReadSeqFrag(tbf, seqName, start, end);
+    //seq = nibInfoLoadSeq(nib, start, size);
 //    seq = nibTwoCacheSeqPart(ntc, seqName, 
 //	start, size, &retFullSeqSize);
     }
 return seq;
 }
 
 void addLoci(struct loci **lociList, struct psl *psl)
     /* add a new loci from psl, if not already in list */
 {
 struct loci *loci = NULL;
 bool found = FALSE;
 
 if (!found)
     {
     AllocVar(loci);
@@ -948,69 +936,70 @@
 
 for (i = 0 ; i < 256 ; i++)
     for (j = 0 ; j < 256 ; j++)
         {
         histogram[i][j] = 0;
         histoNorm[i][j] = 0.0;
         }
 }
 
 void computeSuffStats(struct misMatch **misMatchList, struct alignment *align, struct loci *lociList)
 /* compute sufficient statistics from each alignment */
 {
 int blockIx = 0;
 //int i, j,    sum = 0;
 struct psl *psl = align->psl;
-struct nibInfo *tNib = nibInfoFromCache(nibHash, align->nibDir, psl->tName);
+//struct nibInfo *tNib = nibInfoFromCache(nibHash, align->nibDir, psl->tName);
 //nibTwoCache
 static struct dnaSeq *tSeq = NULL;
 int tStart = psl->tStart;
 int tEnd   = psl->tEnd;
 //int misMatchCount = 0;
 char genomeStrand = psl->strand[1] == '-' ? '-' : '+';
 if (genomeStrand == '-')
     reverseIntRange(&tStart, &tEnd, psl->tSize);
 for (blockIx=0; blockIx < psl->blockCount; ++blockIx)
     /* for each alignment block get sequence for both strands */
     {
     struct snp125 *snp = NULL, *snpList = NULL;
-    if (hashFindVal(twoBitFile->hash, psl->qName) == NULL)
+    if (hashFindVal(cdnaSeqFile->hash, psl->qName) == NULL)
         {
         printf("skipping %s not found \n",psl->qName);
         return;
         }
-    int size = twoBitSeqSize(twoBitFile, psl->qName);
+    int size = twoBitSeqSize(cdnaSeqFile, psl->qName);
     int i = 0;
     int ts = psl->tStarts[blockIx];
     int te = psl->tStarts[blockIx]+(psl->blockSizes[blockIx]);
     int qe = psl->qStarts[blockIx]+(psl->blockSizes[blockIx]);
     struct dnaSeq *qSeq = NULL;
     if (qe > size)
         {
         printf("skipping %s %d > %d \n",psl->qName, qe, size );
         return;
         }
-    qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, psl->qStarts[blockIx], 
+    qSeq = twoBitReadSeqFrag(cdnaSeqFile, psl->qName, psl->qStarts[blockIx], 
             psl->qStarts[blockIx]+(psl->blockSizes[blockIx]));
 
     if (genomeStrand == '-')
         reverseIntRange(&ts, &te, psl->tSize);
 
     verbose(5,"  xyz %s t %s:%d-%d q %d-%d %s strand %c\n",
             psl->qName, psl->tName, ts, te, psl->qStarts[blockIx], 
             psl->qStarts[blockIx]+psl->blockSizes[blockIx], psl->strand, genomeStrand);
-    tSeq = nibInfoLoadStrand(tNib, psl->tName, psl->tStarts[blockIx], 
+    tSeq = twoBitGetSeqForStrand(align->twoBitSeqFile, psl->tName,
+            psl->tStarts[blockIx], 
             psl->tStarts[blockIx]+(psl->blockSizes[blockIx]), genomeStrand);
 
     verbose(6,"tSeq %s len %d %d\n",tSeq->dna, tSeq->size, (int)strlen(tSeq->dna));
     verbose(6,"qSeq %s\n",qSeq->dna);
     assert(psl->strand[0] == '+');
 
     /* count match and mismatches in each alignment block */
     for (i = 0 ; i < tSeq->size; i++)
         {
         char t = toupper(tSeq->dna[i]);
         char q = toupper(qSeq->dna[i]);
         int genomeStart = ts+i;
         //int mrnaLoc = psl->qStarts[blockIx]+i;
 
         if (q == 'n' || q == 'N')
@@ -1040,81 +1029,81 @@
             verbose(4,"       [%d] snp match %s %s %s:%d %s %s offset %d gs %c ts %d genomic%c %c valid %s\n",
                     getLoci(lociList, psl->tName, psl->tStart), snp->name, psl->qName, snp->chrom, snp->chromStart, snp->observed, 
                     snp->strand, offset, genomeStrand, ts, genomeStrand, t, snp->valid);
             }
         }
     /* add indel for portions of mrna that do not align at all */
 //    for (i = 0 ; i < psl->qStarts[0] ; i++)
 //        {
 //        char q = toupper(qSeq->dna[i]);
 //        }
     freeDnaSeq(&tSeq);
     freeDnaSeq(&qSeq);
     }
 }
 
-void addOtherAlignments( struct alignment *alignList, struct hash *speciesHash, char *name, char *nibDir, char *mrnaPath)
+void addOtherAlignments( struct alignment *alignList, struct hash *speciesHash, char *name, struct twoBitFile *twoBitFile, char *mrnaPath)
 {
 /* add alignemnts from other species to the list */
 struct hashEl *speciesList = NULL , *el = NULL;
 struct alignment *align = NULL;
 
 if (speciesHash != NULL)
     speciesList = hashLookup(speciesHash,name);
 for (el = speciesList ; el != NULL ; el = el->next)
     {
     struct psl *psl = el->val;
     AllocVar(align);
     align->psl = psl;
-    align->nibDir = nibDir;
+    align->twoBitSeqFile = twoBitFile;
     align->mrnaPath = mrnaPath;
     slAddHead(&alignList, align);
     }
 slReverse(&alignList);
 }
 
 void fillinMatches(struct misMatch **misMatchList, struct alignment *align, struct loci *lociList)
 /* for each mismatch , lookup bases in other loci */
 {
     int blockIx = 0;
     static struct dnaSeq *tSeq = NULL;
 
     struct psl *psl = align->psl;
-    struct nibInfo *tNib = nibInfoFromCache(nibHash, align->nibDir, psl->tName);
+    //struct nibInfo *tNib = nibInfoFromCache(nibHash, align->nibDir, psl->tName);
     int tStart = psl->tStart;
     int tEnd   = psl->tEnd;
     char genomeStrand = psl->strand[1] == '-' ? '-' : '+';
     int index = getLoci(lociList, psl->tName, psl->tStart);
 
 
     if (genomeStrand == '-')
         reverseIntRange(&tStart, &tEnd, psl->tSize);
     for (blockIx=0; blockIx < psl->blockCount; ++blockIx)
         /* for each alignment block get sequence for both strands */
         {
-        struct dnaSeq *qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, psl->qStarts[blockIx], 
+        struct dnaSeq *qSeq = twoBitReadSeqFrag(cdnaSeqFile, psl->qName, psl->qStarts[blockIx], 
                 psl->qStarts[blockIx]+(psl->blockSizes[blockIx]));
         int ts = psl->tStarts[blockIx];
         int te = psl->tStarts[blockIx]+(psl->blockSizes[blockIx]);
         int qs = psl->qStarts[blockIx];
         int qe = psl->qStarts[blockIx]+(psl->blockSizes[blockIx]);
         struct misMatch *mm = NULL;
 
         if (genomeStrand == '-')
             reverseIntRange(&ts, &te, psl->tSize);
 
-        tSeq = nibInfoLoadStrand(tNib, psl->tName, psl->tStarts[blockIx], 
+        tSeq = twoBitGetSeqForStrand(align->twoBitSeqFile, psl->tName, psl->tStarts[blockIx], 
                 psl->tStarts[blockIx]+(psl->blockSizes[blockIx]), genomeStrand);
 
         for (mm = *misMatchList ; mm != NULL ; mm = mm->next)
             {
             /* i = offset within the block */
             int i = mm->mrnaLoc - qs;
             int genomeStart = ts+i;
             if (genomeStrand == '-')
                 genomeStart = te-i-1;
             /* if genomic chr not filled in yet (only mismatches are filled in ), and we are within the block, lookup genomic loc and  base*/
             if (index == mm->loci)
                 {
                 if (sameString(mm->chrom , na) && 
                         mm->mrnaLoc >= qs && mm->mrnaLoc < qe && i >= 0)
                     {
@@ -1163,53 +1152,53 @@
             verbose(4,"       snp %s %s %s:%d offset %d %s %s gs %c ts %d genomic%c valid %s\n",
                     snp->name, psl->qName, snp->chrom, snp->chromStart, offset, snp->observed, 
                     snp->strand, genomeStrand, ts, genomeStrand, snp->valid);
             }
             */
         freeDnaSeq(&tSeq);
         freeDnaSeq(&qSeq);
         }
 }
 
 void buildMisMatches(struct misMatch **misMatchList, struct alignment *align, struct loci *lociList)
 /* build list of mismatches for each alignment (loci) */
 {
 int blockIx = 0;
 struct psl *psl = align->psl;
-struct nibInfo *tNib = nibInfoFromCache(nibHash, align->nibDir, psl->tName);
+//struct nibInfo *tNib = nibInfoFromCache(nibHash, align->nibDir, psl->tName);
 static struct dnaSeq *tSeq = NULL;
 int tStart = psl->tStart;
 int tEnd   = psl->tEnd;
 int misMatchCount = 0;
 int transitionCount = 0;
 int transversionCount = 0;
 char genomeStrand = psl->strand[1] == '-' ? '-' : '+';
 int matchCount = 0;
 for (blockIx=0; blockIx < psl->blockCount; ++blockIx)
     /* for each alignment block get sequence for both strands */
     {
     struct snp125 *snp = NULL, *snpList = NULL;
-    struct dnaSeq *qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, psl->qStarts[blockIx], 
+    struct dnaSeq *qSeq = twoBitReadSeqFrag(cdnaSeqFile, psl->qName, psl->qStarts[blockIx], 
             psl->qStarts[blockIx]+(psl->blockSizes[blockIx]));
     int i = 0;
     int ts = psl->tStarts[blockIx];
     int te = psl->tStarts[blockIx]+(psl->blockSizes[blockIx]);
 
     if (genomeStrand == '-')
         reverseIntRange(&ts, &te, psl->tSize);
 
-    tSeq = nibInfoLoadStrand(tNib, psl->tName, psl->tStarts[blockIx], 
+    tSeq = twoBitGetSeqForStrand(align->twoBitSeqFile, psl->tName, psl->tStarts[blockIx], 
             psl->tStarts[blockIx]+(psl->blockSizes[blockIx]), genomeStrand);
 
     verbose(5,"  buildMisMatches for blk %s t %s:%d-%d q %d-%d %s strand %c\n",
             psl->qName, psl->tName, ts, te, psl->qStarts[blockIx], 
             psl->qStarts[blockIx]+psl->blockSizes[blockIx], psl->strand, genomeStrand);
     verbose(6,"tSeq %s len %d %d\n",tSeq->dna, tSeq->size, (int)strlen(tSeq->dna));
     verbose(6,"qSeq %s\n",qSeq->dna);
     assert(psl->strand[0] == '+');
 
     /* check for mismatches in exon (alignment block)*/
     for (i = 0 ; i < tSeq->size; i++)
         {
         char t = toupper(tSeq->dna[i]);
         char q = toupper(qSeq->dna[i]);
         
@@ -1377,31 +1366,31 @@
             if (align->psl->strand[1] == '+' || align->psl->strand[1] == '-') align->psl->strand[1] = '\0';
             pslTabOut(align->psl, outFile);
             outputCount++;
             }
         filterCount++;;
         }
     misMatchFreeList(&misMatchList);
     }
 else if (computeSS)
     {
     }
 
 lociFreeList(&lociList);
 }
 
-void pslCDnaGenomeMatch(char *inName, char *tNibDir)
+void pslCDnaGenomeMatch(char *inName)
 {
 struct psl *psl = NULL, *pslList = NULL ;
 struct alignment *subList = NULL;
 char lastName[512];
 unsigned int i , j;
 
 pslList = readPslList(inName);
 
 /* rev compl alignments so mrna is always on + strand */
 /* Note: this should flip the loci also */
 safef(lastName,sizeof(lastName), "%s", pslList->qName);
 psl = pslList;
 while (psl != NULL )
     {
     bool rm = FALSE;
@@ -1415,31 +1404,31 @@
         }
     if (differentString(lastName, psl->qName) && subList != NULL && lastName != NULL)
 	{
         slSort(&subList, pslCmpMatch);
         verbose(2, "sort on match pslList %d subList %d last %s\n",slCount(pslList), slCount(subList), lastName);
 	doOneMrna(lastName, subList);
 	alignFreeList(&subList);
         verbose(5, "3 pslList %d\n",slCount(pslList));
 	safef(lastName, sizeof(lastName), "%s", psl->qName);
 	}
     AllocVar(align);
     newPsl = psl;
     psl = psl->next;
     align->psl = newPsl; 
     align->db = "db";
-    align->nibDir = tNibDir;
+    align->twoBitSeqFile = genomeSeqFile;
     align->mrnaPath = NULL;
     rm = slRemoveEl(&pslList, newPsl);
     if (rm)
         {
         slAddHead(&subList, align);
         verbose(5, "add %s %s\n",align->psl->qName, align->psl->tName);
         }
     }
 verbose(5,"last doOneMrna\n");
 slSort(&subList, pslCmpMatch);
 doOneMrna(lastName, subList);
 alignFreeList(&subList);
 //pslFreeList(&pslList);
 verbose(1,"Wrote %d alignments out of %d \n", outputCount, aliCount);
 verbose(1,"%d out of %d mRNAs passed strict criteria (pass thru not counted)\n", filterCount, mrnaCount);
@@ -1480,63 +1469,65 @@
             getHistoNorm('n','a')+ getHistoNorm('n','c')+ getHistoNorm('n','g')+ getHistoNorm('n','t') );
 
     for (i = 0 ; i < 128 ; i++)
         for (j = 0 ; j < 128 ; j++)
             if (getHistogram(i,j) > 0)
                 fprintf(outFile, "%c %c = %f \n",(char)i, (char)j, getHistoNorm(i,j)) ;
     }
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, optionSpecs);
 if (argc != 6)
     usage();
-if (nibHash == NULL)
-    nibHash = hashNew(0);
+//if (nibHash == NULL)
+//    nibHash = hashNew(0);
 lociCounter = 0;
 fileCache = newDlList();
 verbosity = optionInt("verbose", verbosity);
 verboseSetLogFile("stdout");
 verboseSetLevel(verbosity);
 ss = axtScoreSchemeDefault();
-twoBitFile = twoBitOpen(argv[3]);
+cdnaSeqFile = twoBitOpen(argv[3]);
+genomeSeqFile = twoBitOpen(argv[4]);
 outFile = fopen(argv[5],"w");
 
 initSS();
 minDiff = optionInt("minDiff", MINDIFF);
 notAlignPenalty = optionInt("notAlignPenalty", NOTALIGNEDFACTOR);
 snpFile = optionVal("snp", NULL);
 if (snpFile != NULL)
     {
     verbose(1,"Reading snps from %s\n",snpFile);
     snpHash = readSnpToBinKeeper(argv[2],snpFile);
     }
 scoreOut = optionVal("score", NULL);
 if (scoreOut != NULL)
     scoreFile = fopen(scoreOut,"w");
 bedOut = optionVal("bedOut", NULL);
 if (bedOut != NULL)
     bedFile = fopen(bedOut,"w");
 species1 = optionVal("species1", NULL);
 if (species1 != NULL)
     species1Hash = readPslQnameToHash(species1);
 species2 = optionVal("species2", NULL);
 if (species2 != NULL)
     species2Hash = readPslQnameToHash(species2);
 nibdir1 = optionVal("nibdir1", NULL);
 nibdir2 = optionVal("nibdir2", NULL);
 mrna1 = optionVal("mrna1", NULL);
 mrna2 = optionVal("mrna2", NULL);
 passthru = optionExists("passthru");
 verbose(1,"Reading alignments from %s\n",argv[1]);
-pslCDnaGenomeMatch(argv[1], argv[4]);
+pslCDnaGenomeMatch(argv[1]);
 fclose(outFile);
 if (bedOut != NULL)
     fclose(bedFile);
 if (scoreOut != NULL)
     fclose(scoreFile);
 freeDlList(&fileCache);
-twoBitClose(&twoBitFile);
+twoBitClose(&cdnaSeqFile);
+twoBitClose(&genomeSeqFile);
 return(0);
 }