a44421a79fb36cc2036fe116b97ea3bc9590cd0c
braney
  Fri Dec 2 09:34:39 2011 -0800
removed rcsid (#295)
diff --git src/utils/pslToPslx/pslToPslx.c src/utils/pslToPslx/pslToPslx.c
index 9cdf39c..a1bb6e8 100644
--- src/utils/pslToPslx/pslToPslx.c
+++ src/utils/pslToPslx/pslToPslx.c
@@ -1,164 +1,163 @@
 /* pslToPslx - Convert from psl to xa alignment format. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "portable.h"
 #include "dnaseq.h"
 #include "fa.h"
 #include "nibTwo.h"
 #include "psl.h"
 
-static char const rcsid[] = "$Id";
 
 /* command line options */
 static struct optionSpec optionSpecs[] = {
     {"masked", OPTION_BOOLEAN},
     {NULL, 0}
 };
 static boolean gMasked = FALSE;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "pslToPslx - Convert from psl to pslx format, which includes sequences\n"
   "usage:\n"
   "   pslToPslx [options] in.psl qSeqSpec tSeqSpec out.pslx\n"
   "\n"
   "qSeqSpec and tSeqSpec can be nib directory, a 2bit file, or a FASTA file.\n"
   "FASTA files should end in .fa, .fa.gz, .fa.Z, or .fa.bz2 and are read into\n"
   "memory.\n"
   "\n"
   "Options:\n"
   "  -masked - if specified, repeats are in lower case cases, otherwise entire\n"
   "            sequence is loader case.\n"
   );
 }
 
 struct seqReader
 /* Access to sequences from a seqSpec */
 {
     struct seqFile *next;
     char *spec;                  /* file specification */
     boolean isFasta;
     struct nibTwoCache *nibTwo;  /* object for accessing nib or 2bit files */
     struct hash *seqTbl;       /* hash of dnaSeq objects from a FASTA file */
     };
 
 static void seqReaderLoadFasta(struct seqReader *seqReader)
 /* load a FASTA file into a seqReader object */
 {
 seqReader->seqTbl = hashNew(21);
 struct dnaSeq *seq, *seqs = faReadAllMixed(seqReader->spec);
 while ((seq = slPopHead(&seqs)) != NULL)
     hashAdd(seqReader->seqTbl, seq->name, seq);
 }
 
 static struct seqReader *seqReaderNew(char *seqSpec)
 /* construct a new seqReader object */
 {
 struct seqReader *seqReader;
 AllocVar(seqReader);
 seqReader->spec = cloneString(seqSpec);
 if (endsWith(seqSpec, ".fa") || endsWith(seqSpec, ".fa.gz") || endsWith(seqSpec, ".fa.Z") || endsWith(seqSpec, ".fa.bz2"))
     seqReaderLoadFasta(seqReader);
 else
     seqReader->nibTwo = nibTwoCacheNew(seqSpec);
 return seqReader;
 }
 
 static struct dnaSeq *seqReaderTblGet(struct seqReader *seqReader, char *seqName, int start, int end,
                                       int *retFullSeqSize)
 /* get a sequence from the seqTbl */
 {
 struct dnaSeq *fullSeq = hashFindVal(seqReader->seqTbl, seqName);
 if (fullSeq == NULL)
     errAbort("can't find sequence %s in %s", seqName, seqReader->spec);
 if (end > fullSeq->size)
     errAbort("range %d-%d longer than sequence %s length %d in %s",
              start, end, seqName, fullSeq->size, seqReader->spec);
 if (retFullSeqSize != NULL)
     *retFullSeqSize = fullSeq->size;
 int len = (end-start);
 struct dnaSeq *seq = newDnaSeq(cloneStringZ(fullSeq->dna+start, len), len, seqName);
 if (!gMasked)
     tolowers(seq->dna);
 return seq;
 }
 
 static struct dnaSeq *seqReaderGet(struct seqReader *seqReader, char *seqName, int start, int end,
                                    int *retFullSeqSize)
 /* return part of a sequence from a seqReader.  dnaSeqFree result when
  * complete. */
 {
 assert(start <= end);
 if (seqReader->nibTwo != NULL)
     return nibTwoCacheSeqPartExt(seqReader->nibTwo, seqName, start, end-start, gMasked, retFullSeqSize);
 else
     return seqReaderTblGet(seqReader, seqName, start, end, retFullSeqSize);
 }
 
 static void outputBlocks(FILE *pslOutFh, struct seqReader *seqReader, char *seqName, char strand,
                          int start, int end, int blockCount, unsigned *starts, unsigned *sizes)
 /* output sequences for of the sequence columns */
 {
 int fullSeqSize = 0;;
 struct dnaSeq *seq = seqReaderGet(seqReader, seqName, start, end, &fullSeqSize);
 int seqOff = start;
 if (strand == '-')
     {
     reverseComplement(seq->dna, seq->size);
     seqOff = fullSeqSize - end;
     }
 int iBlk;
 for (iBlk = 0; iBlk < blockCount; iBlk++)
     {
     mustWrite(pslOutFh, seq->dna+starts[iBlk]-seqOff, sizes[iBlk]);
     fputc(',', pslOutFh);
     }
 dnaSeqFree(&seq);
 }
 
 static void writePslx(FILE *pslOutFh, struct seqReader *qSeqReader, struct seqReader *tSeqReader, struct psl *psl)
 /* output a pslx, cheating in not creating the whole pslx to avoid more sequence
  * manipulation */
 {
 if (pslIsProtein(psl))
     errAbort("doesn't support protein PSLs: qName: %s", psl->qName);
 pslOutput(psl, pslOutFh, '\t', '\t');
 outputBlocks(pslOutFh, qSeqReader, psl->qName, psl->strand[0],
              psl->qStart, psl->qEnd, psl->blockCount, psl->qStarts, psl->blockSizes);
 fputc('\t', pslOutFh);
 outputBlocks(pslOutFh, tSeqReader, psl->tName, psl->strand[1],
              psl->tStart, psl->tEnd, psl->blockCount, psl->tStarts, psl->blockSizes);
 fputc('\n', pslOutFh);
 }
 
 static void pslToPslx(char *inPslFile, char *qSeqSpec, char *tSeqSpec, char *outPslFile)
 /* pslToPslx - Convert from psl to pslx alignment format. */
 {
 struct lineFile *pslInLf = pslFileOpen(inPslFile);
 struct seqReader *qSeqReader = seqReaderNew(qSeqSpec);
 struct seqReader *tSeqReader = seqReaderNew(tSeqSpec);
 FILE *pslOutFh = mustOpen(outPslFile, "w");
 struct psl *psl;
 while ((psl = pslNext(pslInLf)) != NULL)
     {
     writePslx(pslOutFh, qSeqReader, tSeqReader, psl);
     pslFree(&psl);
     }
 lineFileClose(&pslInLf);
 carefulClose(&pslOutFh);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, optionSpecs);
 if (argc != 5)
     usage();
 gMasked = optionExists("masked");
 pslToPslx(argv[1], argv[2], argv[3], argv[4]);
 return 0;
 }