a44421a79fb36cc2036fe116b97ea3bc9590cd0c braney Fri Dec 2 09:34:39 2011 -0800 removed rcsid (#295) diff --git src/hg/pslIntronsOnly/pslIntronsOnly.c src/hg/pslIntronsOnly/pslIntronsOnly.c index 0f88c6c..2abd70a 100644 --- src/hg/pslIntronsOnly/pslIntronsOnly.c +++ src/hg/pslIntronsOnly/pslIntronsOnly.c @@ -1,88 +1,87 @@ /* pslIntronsOnly - Filter psl files to only include those with introns. */ #include "common.h" #include "linefile.h" #include "psl.h" #include "dnautil.h" #include "dnaseq.h" #include "nib.h" #include "twoBit.h" #include "fa.h" -static char const rcsid[] = "$Id: pslIntronsOnly.c,v 1.10 2005/08/21 15:49:45 markd Exp $"; void usage() /* Explain usage and exit. */ { errAbort( "pslIntronsOnly - Filter psl files to only include those with introns\n" "usage:\n" " pslIntronsOnly in.psl genoFile out.psl\n" "\n" "The genoFile can be a fasta file, a chromosome nib, or a nib subrange.\n" "Subranges of nib files may specified using the syntax:\n" " /path/file.nib:seqid:start-end\n" "or\n" " /path/file.2bit:seqid:start-end\n" "or\n" " /path/file.nib:start-end\n" "With the second form, a sequence id of file:start-end will be used.\n"); } struct hash *loadGeno(char *genoFile) /* load genome sequences into a hash. This supports the multi-sequence * specs of twoBitLoadAll */ { struct dnaSeq *genos = NULL, *geno; struct hash *genoHash = hashNew(0); if (nibIsFile(genoFile)) genos = nibLoadAllMasked(NIB_MASK_MIXED|NIB_BASE_NAME, genoFile); else if (twoBitIsSpec(genoFile)) genos = twoBitLoadAll(genoFile); else genos = faReadDna(genoFile); while ((geno = slPopHead(&genos)) != NULL) { tolowers(geno->dna); hashAdd(genoHash, geno->name, geno); } return genoHash; } void pslIntronsOnly(char *inPslName, char *genoFile, char *outPslName) /* pslIntronsOnly - Filter psl files to only include those with introns. */ { struct lineFile *lf = NULL; FILE *outFile = NULL; struct hash *genoHash = loadGeno(genoFile); struct psl *psl; int count = 0, intronCount = 0; lf = pslFileOpen(inPslName); outFile = mustOpen(outPslName, "w"); while ((psl = pslNext(lf)) != NULL) { struct dnaSeq *geno = hashMustFindVal(genoHash, psl->tName); if (pslHasIntron(psl, geno, 0)) { ++intronCount; pslTabOut(psl, outFile); } pslFree(&psl); ++count; } carefulClose(&outFile); lineFileClose(&lf); printf("%d of %d in %s have introns\n", intronCount, count, inPslName); } int main(int argc, char *argv[]) /* Process command line. */ { if (argc != 4) usage(); dnaUtilOpen(); pslIntronsOnly(argv[1], argv[2], argv[3]); return 0; }