917964e6471bfb6f8c021d6a38b36254855c4ce9 max Sun Sep 16 21:45:32 2012 -0700 added -bedPos option and a test for it diff --git src/utils/twoBitToFa/twoBitToFa.c src/utils/twoBitToFa/twoBitToFa.c index b9785ab..daf7925 100644 --- src/utils/twoBitToFa/twoBitToFa.c +++ src/utils/twoBitToFa/twoBitToFa.c @@ -15,57 +15,60 @@ { errAbort( "twoBitToFa - Convert all or part of .2bit file to fasta\n" "usage:\n" " twoBitToFa input.2bit output.fa\n" "options:\n" " -seq=name - restrict this to just one sequence\n" " -start=X - start at given position in sequence (zero-based)\n" " -end=X - end at given position in sequence (non-inclusive)\n" " -seqList=file - file containing list of the desired sequence names \n" " in the format seqSpec[:start-end], e.g. chr1 or chr1:0-189\n" " where coordinates are half-open zero-based, i.e. [start,end)\n" " -noMask - convert sequence to all upper case\n" " -bpt=index.bpt - use bpt index instead of built in one\n" " -bed=input.bed - grab sequences specified by input.bed. Will exclude introns\n" + " -bedPos - use with -bed to put chrom:start-end as the fasta ID\n" "\n" "Sequence and range may also be specified as part of the input\n" "file name using the syntax:\n" " /path/input.2bit:name\n" " or\n" " /path/input.2bit:name\n" " or\n" " /path/input.2bit:name:start-end\n" ); } char *clSeq = NULL; /* Command line sequence. */ int clStart = 0; /* Start from command line. */ int clEnd = 0; /* End from command line. */ char *clSeqList = NULL; /* file containing list of seq names */ bool noMask = FALSE; /* convert seq to upper case */ char *clBpt = NULL; /* External index file. */ char *clBed = NULL; /* Bed file that specifies bounds of sequences. */ +bool clBedPos = FALSE; static struct optionSpec options[] = { {"seq", OPTION_STRING}, {"seqList", OPTION_STRING}, {"start", OPTION_INT}, {"end", OPTION_INT}, {"noMask", OPTION_BOOLEAN}, {"bpt", OPTION_STRING}, {"bed", OPTION_STRING}, + {"bedPos", OPTION_BOOLEAN}, {NULL, 0}, }; void outputOne(struct twoBitFile *tbf, char *seqSpec, FILE *f, int start, int end) /* Output sequence. */ { struct dnaSeq *seq = twoBitReadSeqFrag(tbf, seqSpec, start, end); if (noMask) toUpperN(seq->dna, seq->size); faWriteNext(f, seq->name, seq->dna, seq->size); dnaSeqFree(&seq); } static void processAllSeqs(struct twoBitFile *tbf, FILE *outFile) @@ -114,31 +117,40 @@ dnaSeqFree(&exon); } } if (bed->strand[0] == '-') reverseComplement(seq->dna, seq->size); return seq; } static void processSeqsFromBed(struct twoBitFile *tbf, char *bedFileName, FILE *outFile) /* Get sequences defined by beds. Exclude introns. */ { struct bed *bed, *bedList = bedLoadAll(bedFileName); for (bed = bedList; bed != NULL; bed = bed->next) { struct dnaSeq *seq = twoBitAndBedToSeq(tbf, bed); - faWriteNext(outFile, seq->name, seq->dna, seq->size); + char* seqName = NULL; + if (clBedPos) + { + char buf[1024]; + safef(buf, 1024, "%s:%d-%d", bed->chrom, bed->chromStart, bed->chromEnd); + seqName = buf; + } + else + seqName = seq->name; + faWriteNext(outFile, seqName, seq->dna, seq->size); dnaSeqFree(&seq); } } void twoBitToFa(char *inName, char *outName) /* twoBitToFa - Convert all or part of twoBit file to fasta. */ { struct twoBitFile *tbf; FILE *outFile = mustOpen(outName, "w"); struct twoBitSpec *tbs; if (clSeq != NULL) { char seqSpec[2*PATH_LEN]; if (clEnd > clStart) @@ -171,31 +183,36 @@ twoBitClose(&tbf); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 3) usage(); clSeq = optionVal("seq", clSeq); clStart = optionInt("start", clStart); clEnd = optionInt("end", clEnd); clSeqList = optionVal("seqList", clSeqList); clBpt = optionVal("bpt", clBpt); clBed = optionVal("bed", clBed); +clBedPos = optionExists("bedPos"); +noMask = optionExists("noMask"); + +if (clBedPos && !clBed) + errAbort("the -bedPos option requires the -bed option"); if (clBed != NULL) { if (clSeqList != NULL) errAbort("Can only have seqList or bed options, not both."); if (clSeq != NULL) errAbort("Can only have seq or bed options, not both."); } if ((clStart > clEnd) && (clSeq == NULL)) errAbort("must specify -seq with -start and -end"); if ((clSeq != NULL) && (clSeqList != NULL)) errAbort("can't specify both -seq and -seqList"); -noMask = optionExists("noMask"); + dnaUtilOpen(); twoBitToFa(argv[1], argv[2]); return 0; }