e98c85437ca0f563e15b62b624836938de72e339 max Thu May 22 09:13:08 2014 -0700 as long as we don't have bigPsl this is some replacement diff --git src/hg/pslToBed/pslToBed.c src/hg/pslToBed/pslToBed.c index 05a9471..8fb0fc0 100644 --- src/hg/pslToBed/pslToBed.c +++ src/hg/pslToBed/pslToBed.c @@ -6,36 +6,41 @@ #include "options.h" void usage() /* print usage infomation and exit */ { errAbort("pslToBed: tranform a psl format file to a bed format file.\n" "usage:\n" " pslToBed psl bed\n" "options:\n" " -cds=cdsFile\n" "cdsFile specifies a input cds tab-separated file which contains\n" "genbank-style CDS records showing cdsStart..cdsEnd\n" "e.g. NM_123456 34..305\n" "These coordinates are assumed to be in the query coordinate system\n" - "of the psl, like those that are created from genePredToFakePsl\n"); + "of the psl, like those that are created from genePredToFakePsl\n" + " -posName\n" + "write the full position in chrom:start-end format into the name field\n" + "(can be used to create links to source position on details page)\n" + ); } static struct optionSpec options[] = { {"cds", OPTION_STRING}, + {"posName", OPTION_BOOLEAN}, {NULL, 0}, }; struct cds { int start, end; // cds start and end }; static unsigned getTargetForQuery(struct psl *psl, int queryAddress) // get target address for query address from PSL { int blockNum; for (blockNum=0; blockNum < psl->blockCount; blockNum++) { @@ -81,40 +86,48 @@ // we subtract one from start to convert to PSL coordinate system thickStart = getTargetForQuery(psl, cdsStart); // cdsEnd actually points to one base after the end, so // we translate the base address, then add one thickEnd = getTargetForQuery(psl, cdsEnd - 1) + 1; // if thickStart equals thickEnd, then there is no CDS if (thickStart == thickEnd) thickStart = thickEnd = 0; bed->thickStart = thickStart; bed->thickEnd = thickEnd; } -void pslToBed(char *pslFile, char *bedFile, struct hash *cdsHash) +void pslToBed(char *pslFile, char *bedFile, struct hash *cdsHash, bool doPosName) /* pslToBed -- tranform a psl format file to a bed format file */ { struct lineFile *pslLf = pslFileOpen(pslFile); FILE *bedFh = mustOpen(bedFile, "w"); struct psl *psl; while ((psl = pslNext(pslLf)) != NULL) { struct bed *bed = bedFromPsl(psl); + if (doPosName) + { + char *newName = needMem(512); + safef(newName, 512, "%s:%d-%d", psl->qName, psl->qStart, psl->qEnd); + freeMem(bed->name); + bed->name = newName; + } + if (cdsHash) { struct cds *cds = hashFindVal(cdsHash, psl->qName); if (cds == NULL) bed->thickStart = bed->thickEnd = bed->chromStart; else setThick(psl, bed, cds); } bedTabOutN(bed, 12, bedFh); bedFree(&bed); pslFree(&psl); } carefulClose(&bedFh); lineFileClose(&pslLf); } @@ -137,25 +150,26 @@ if (cds->start > cds->end) errAbort("CDS start(%d) is before end(%d) on line %d", cds->start, cds->end, lf->lineIx); hashAdd(hash, row[0], cds); } lineFileClose(&lf); return hash; } int main(int argc, char* argv[]) { optionInit(&argc, argv, options); if (argc != 3) usage(); char *cdsFile = optionVal("cds", NULL); +bool doPosName = optionExists("posName"); struct hash *cdsHash = NULL; if (cdsFile != NULL) cdsHash = getCdsHash(cdsFile); -pslToBed(argv[1], argv[2], cdsHash); +pslToBed(argv[1], argv[2], cdsHash, doPosName); return 0; }