fff0062e7c441f814c4aecfc74ac8ed60ae4eeb3 markd Wed Jun 5 03:41:37 2024 -0700 add option for overlapSelect statistics output to have a TSV header on output diff --git src/hg/utils/overlapSelect/overlapSelect.c src/hg/utils/overlapSelect/overlapSelect.c index e0d887f..dc45b60 100644 --- src/hg/utils/overlapSelect/overlapSelect.c +++ src/hg/utils/overlapSelect/overlapSelect.c @@ -33,30 +33,31 @@ {"excludeSelf", OPTION_BOOLEAN}, {"idMatch", OPTION_BOOLEAN}, {"dropped", OPTION_STRING}, {"overlapThreshold", OPTION_FLOAT}, {"overlapThresholdCeil", OPTION_FLOAT}, {"overlapSimilarity", OPTION_FLOAT}, {"overlapSimilarityCeil", OPTION_FLOAT}, {"overlapBases", OPTION_INT}, {"merge", OPTION_BOOLEAN}, {"mergeOutput", OPTION_BOOLEAN}, {"statsOutput", OPTION_BOOLEAN}, {"statsOutputAll", OPTION_BOOLEAN}, {"statsOutputBoth", OPTION_BOOLEAN}, {"idOutput", OPTION_BOOLEAN}, {"aggregate", OPTION_BOOLEAN}, + {"tsv", OPTION_BOOLEAN}, {NULL, 0} }; /* incompatible with aggregate */ static char *aggIncompatible[] = { "overlapSimilarity", "overlapSimilarityCeil", "overlapThresholdCeil", "overlapBases", "merge", "mergeOutput", "idMatch", NULL }; /* file format constants */ enum recordFmt { UNKNOWN_FMT, PSL_FMT, PSLQ_FMT, CHAIN_FMT, @@ -73,30 +74,31 @@ struct coordCols selectCoordCols; unsigned selectCaOpts = 0; unsigned inFmt = UNKNOWN_FMT; struct coordCols inCoordCols; unsigned inCaOpts = 0; unsigned selectOpts = 0; boolean useAggregate = FALSE; boolean nonOverlapping = FALSE; boolean mergeOutput = FALSE; boolean idOutput = FALSE; boolean statsOutput = FALSE; boolean outputAll = FALSE; boolean outputBoth = FALSE; +boolean tsvOutput = FALSE; struct overlapCriteria criteria = {0.0, 1.1, 0.0, 1.1, -1}; enum recordFmt parseFormatSpec(char *fmt) /* parse a format specification */ { if (sameString(fmt, "psl")) return PSL_FMT; if (sameString(fmt, "pslq")) return PSLQ_FMT; if (sameString(fmt, "chain")) return CHAIN_FMT; if (sameString(fmt, "chainq")) return CHAINQ_FMT; if (sameString(fmt, "genePred")) return GENEPRED_FMT; @@ -333,34 +335,35 @@ = createChromAnnReader(inFile, inFmt, inCaOpts, &inCoordCols); loadSelectTable(selectFile); FILE *outFh = mustOpen(outFile, "w"); FILE *dropFh = NULL; if (dropFile != NULL) dropFh = mustOpen(dropFile, "w"); if (idOutput) { if (useAggregate) fputs("#inId\n", outFh); else fputs("#inId\t" "selectId\n", outFh); } if (statsOutput) { + char *headerStart = tsvOutput ? "" : "#"; if (useAggregate) - fputs("#inId\t" "inOverlap\t" "inOverBases\t" "inBases\n", outFh); + fprintf(outFh, "%sinId\t" "inOverlap\t" "inOverBases\t" "inBases\n", headerStart); else - fputs("#inId\t" "selectId\t" "inOverlap\t" "selectOverlap\t" "overBases\t" "similarity\t" "inBases\t" "selectBases\n", outFh); + fprintf(outFh, "%sinId\t" "selectId\t" "inOverlap\t" "selectOverlap\t" "overBases\t" "similarity\t" "inBases\t" "selectBases\n", headerStart); } if (useAggregate) doAggregateOverlaps(inCar, outFh, dropFh); else doItemOverlaps(inCar, outFh, dropFh); inCar->carFree(&inCar); if (statsOutput && outputBoth) outputStatsSelNotUsed(outFh); carefulClose(&outFh); carefulClose(&dropFh); /* enable for memory analysis */ #if 0 @@ -484,30 +487,31 @@ if (outputBoth) outputAll = TRUE; if (mergeOutput) { if (nonOverlapping) errAbort("can't use -mergeOutput with -nonOverlapping"); if (useAggregate) errAbort("can't use -mergeOutput with -aggregate"); if ((selectFmt == CHAIN_FMT) || (selectFmt == CHAINQ_FMT) || (inFmt == CHAIN_FMT) || (inFmt == CHAINQ_FMT)) if (useAggregate) errAbort("can't use -mergeOutput with chains"); selectCaOpts |= chromAnnSaveLines; } dropFile = optionVal("dropped", NULL); +tsvOutput = optionExists("tsv"); /* check for options incompatible with aggregate mode */ if (useAggregate) { int i; for (i = 0; aggIncompatible[i] != NULL; i++) { if (optionExists(aggIncompatible[i])) errAbort("-%s is not allowed -aggregate", aggIncompatible[i]); } } overlapSelect(selectFile, inFile, outFile, dropFile); return 0; }