bea3a22e1454e8fee62a026ffcec8c57af4005be markd Sat Jun 6 13:46:13 2026 -0700 added -tsv option blastToPsl and blastXmlToPsl to allow parsing scores with other programs diff --git src/hg/blastToPsl/blastXmlToPsl.c src/hg/blastToPsl/blastXmlToPsl.c index 0bd143800ba..e067c9133a2 100644 --- src/hg/blastToPsl/blastXmlToPsl.c +++ src/hg/blastToPsl/blastXmlToPsl.c @@ -9,60 +9,62 @@ #include "ncbiBlast.h" #include "pslBuild.h" void usage() /* Explain usage and exit. */ { errAbort( "blastXmlToPsl - convert blast XML output to PSLs\n" "usage:\n" " blastXmlToPsl [options] blastXml psl\n" "\n" "options:\n" " -scores=file - Write score information to this file. Format is:\n" " strands qName qStart qEnd tName tStart tEnd bitscore eVal qDef tDef\n" + " -tsv - Write score information with a TSV header.\n" " -verbose=n - n >= 3 prints each line of file after parsing.\n" " n >= 4 dumps the result of each query\n" " -eVal=n n is e-value threshold to filter results. Format can be either\n" " an integer, double or 1e-10. Default is no filter.\n" " -pslx - create PSLX output (includes sequences for blocks)\n" " -convertToNucCoords - convert protein to nucleic alignments to nucleic\n" " to nucleic coordinates\n" " -qName=src - define element used to obtain the qName. The following\n" " values are support:\n" " o query-ID - use contents of the <Iteration_query-ID> element if it\n" " exists, otherwise use <BlastOutput_query-ID>\n" " o query-def0 - use the first white-space separated word of the\n" " <Iteration_query-def> element if it exists, otherwise the first word\n" " of <BlastOutput_query-def>.\n" " Default is query-def0.\n" " -tName=src - define element used to obtain the tName. The following\n" " values are support:\n" " o Hit_id - use contents of the <Hit-id> element.\n" " o Hit_def0 - use the first white-space separated word of the\n" " <Hit_def> element.\n" " o Hit_accession - contents of the <Hit_accession> element.\n" " Default is Hit-def0.\n" " -forcePsiBlast - treat as output of PSI-BLAST. blast-2.2.16 and maybe\n" " others indentify psiblast as blastp." "\n" "Output only results of last round from PSI BLAST\n"); } static struct optionSpec options[] = { {"scores", OPTION_STRING}, + {"tsv", OPTION_BOOLEAN}, {"eVal", OPTION_DOUBLE}, {"pslx", OPTION_BOOLEAN}, {"convertToNucCoords", OPTION_BOOLEAN}, {"qName", OPTION_STRING}, {"tName", OPTION_STRING}, {"forcePsiBlast", OPTION_BOOLEAN}, {NULL, 0}, }; enum qNameSrc { qNameSrcQueryId, qNameSrcQueryDef0 }; enum tNameSrc { @@ -254,39 +256,39 @@ } return iterRec; } static void convertPsiBlast(struct ncbiBlastBlastOutput *outputRec, unsigned flags, FILE *pslFh, FILE *scoreFh) /* convert psi-blast */ { struct ncbiBlastBlastOutputIterations *itersRec; for (itersRec = outputRec->ncbiBlastBlastOutputIterations; itersRec != NULL; itersRec = itersRec->next) { struct ncbiBlastIteration *iterRec = findLastIterForQuery(itersRec->ncbiBlastIteration); processIterRec(outputRec, iterRec, flags, pslFh, scoreFh); } } -static void blastXmlToPsl(char *blastXmlFile, char *pslFile, char *scoreFile) +static void blastXmlToPsl(char *blastXmlFile, char *pslFile, char *scoreFile, boolean tsv) /* blastXmlToPsl - convert blast XML output to PSLs. */ { struct xap *xap = xapNew(ncbiBlastStartHandler, ncbiBlastEndHandler, blastXmlFile); xapParseFile(xap, blastXmlFile); FILE *pslFh = mustOpen(pslFile, "w"); FILE *scoreFh = NULL; if (scoreFile != NULL) - scoreFh = pslBuildScoresOpen(scoreFile, TRUE); + scoreFh = pslBuildScoresOpen(scoreFile, TRUE, tsv); if (xap->topObject == NULL) errAbort("empty BLAST XML file: %s", blastXmlFile); char *expectType = "BlastOutput"; if (!sameString(xap->topType, expectType)) errAbort("expected top XML element of type \"%s\", got \"%s\"", expectType, xap->topType); struct ncbiBlastBlastOutput *outputRec = xap->topObject; unsigned flags = getFlags(outputRec); if (flags & psiblast) convertPsiBlast(outputRec, flags, pslFh, scoreFh); else convertOnePassBlast(outputRec, flags, pslFh, scoreFh); carefulClose(&scoreFh); @@ -312,20 +314,20 @@ else if (sameString(qNameSrcStr, "query-def0")) qNameSrc = qNameSrcQueryDef0; else errAbort("invalid value for -qName, expect on of: \"query-ID\", or \"query-def0\", got \"%s\"", qNameSrcStr); char *tNameSrcStr = optionVal("tName", "Hit_def0"); if (sameString(tNameSrcStr, "Hit_id")) tNameSrc = tNameSrcHitId; else if (sameString(tNameSrcStr, "Hit_def0")) tNameSrc = tNameSrcHitDef0; else if (sameString(tNameSrcStr, "Hit_accession")) tNameSrc = tNameSrcHitAccession; else errAbort("invalid value for -tName, expect on of: \"Hit_id\", \"Hit_def0\", or \"Hit_accession\", got \"%s\"", tNameSrcStr); -blastXmlToPsl(argv[1], argv[2], optionVal("scores", NULL)); +blastXmlToPsl(argv[1], argv[2], optionVal("scores", NULL), optionExists("tsv")); if (errCount > 0) errAbort("%d invalid PSLs created", errCount); return 0; }