164d54ed22c894be328c9c4d7ebcdbc9c40f407e hiram Thu Feb 29 10:53:41 2024 -0800 loadSizes is now a src/hg/common.c function refs #33064 diff --git src/hg/utils/pslLiftSubrangeBlat/pslLiftSubrangeBlat.c src/hg/utils/pslLiftSubrangeBlat/pslLiftSubrangeBlat.c index ca4f7c4..30abd3c 100644 --- src/hg/utils/pslLiftSubrangeBlat/pslLiftSubrangeBlat.c +++ src/hg/utils/pslLiftSubrangeBlat/pslLiftSubrangeBlat.c @@ -1,133 +1,121 @@ /* pslLiftSubrangeBlat - lift PSLs from blat subrange alignments. */ /* Copyright (C) 2011 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "linefile.h" #include "psl.h" #include "pslReader.h" #include "hash.h" #include "options.h" #include "jksql.h" void usage() /* Explain usage and exit. */ { errAbort( "pslLiftSubrangeBlat - lift PSLs from blat subrange alignments\n" "usage:\n" " pslLiftSubrangeBlat isPsl outPsl\n" "\n" "Lift a PSL with target coordinates from a blat subrange query\n" "(e.g. blah/hg18.2bit:chr1:1000-20000) which has subrange\n" "coordinates as the target name (e.g. chr1:1000-200000) to\n" "actual target coordinates.\n" "\n" "options:\n" " -tSizes=szfile - lift target side based on tName, using target sizes from\n" " this tab separated file.\n" " -qSizes=szfile - lift query side based on qName, using query sizes from\n" " this tab separated file.\n" "Must specify at least on of -tSizes or -qSize or both.\n" ); } static struct optionSpec options[] = { {"tSizes", OPTION_STRING}, {"qSizes", OPTION_STRING}, {NULL, 0}, }; -static struct hash *loadSizes(char *szFile) -/* load sizes into a hash */ -{ -struct hash *sizes = hashNew(0); -struct lineFile *lf = lineFileOpen(szFile, TRUE); -char *row[2]; -while (lineFileNextRowTab(lf, row, 2)) - hashAddInt(sizes, row[0], sqlSigned(row[1])); - -return sizes; -} - static boolean parseName(char *desc, char *name, int *start, int *end) /* parse and validate name into it's parts. Return FALSE if name doesn't * contain a subrange specification. */ { char *nameEnd = strchr(name, ':'); if (nameEnd == NULL) return FALSE; char *p = nameEnd+1; char *pp = strchr(p, '-'); if (pp == NULL) errAbort("invalid %s subrange: %s", desc, name); *pp = '\0'; *start = sqlUnsigned(p); *pp = '-'; pp++; *end = sqlUnsigned(pp); if (*end <= *start) errAbort("range zero or negative length in %s subrange: %s", desc, name); *nameEnd = '\0'; return TRUE; } static void liftSide(char *desc, struct hash *seqSizes, struct psl *psl, char *name, char strand, unsigned *seqSize, int *start, int *end, unsigned *starts) /* life one side of the alignment */ { int regStart, regEnd, i; if (parseName(desc, name, ®Start, ®End)) { *seqSize = hashIntVal(seqSizes, name); if (*end > *seqSize) errAbort("subrange %s:%d-%d extends past sequence end %ud", name, regStart, regEnd, *seqSize); *start += regStart; *end += regStart; if (strand == '-') reverseIntRange(®Start, ®End, *seqSize); for (i = 0; i < psl->blockCount; i++) starts[i] += regStart; } } static void liftIt(struct psl *psl, struct hash *qSizes, struct hash *tSizes) /* lift one subrange PSL based on tName */ { if (qSizes != NULL) liftSide("qName", qSizes, psl, psl->qName, pslQStrand(psl), &psl->qSize, &psl->qStart, &psl->qEnd, psl->qStarts); if (tSizes != NULL) liftSide("tName", tSizes, psl, psl->tName, pslTStrand(psl), &psl->tSize, &psl->tStart, &psl->tEnd, psl->tStarts); } static void pslLiftSubrangeBlat(char *inPsl, char *outPsl, char *qSizesFile, char *tSizesFile) /* pslLiftSubrangeBlat - lift PSLs from blat subrange alignments. */ { struct hash *qSizes = (qSizesFile != NULL) ? loadSizes(qSizesFile) : NULL; struct hash *tSizes = (tSizesFile != NULL) ? loadSizes(tSizesFile) : NULL; struct pslReader *inRd = pslReaderFile(inPsl, NULL); FILE *outFh = mustOpen(outPsl, "w"); struct psl *psl; while ((psl = pslReaderNext(inRd)) != NULL) { liftIt(psl, qSizes, tSizes); pslTabOut(psl, outFh); pslFree(&psl); } carefulClose(&outFh); pslReaderFree(&inRd); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 3) usage(); char *qSizesFile = optionVal("qSizes", NULL); char *tSizesFile = optionVal("tSizes", NULL); if ((qSizesFile == NULL) && (tSizesFile == NULL)) errAbort("must specify at least one of -tSizes or -qSizes"); pslLiftSubrangeBlat(argv[1], argv[2], qSizesFile, tSizesFile); return 0; }