051d8d750d9ee269ea2a26575a90a8591fb4cd84 markd Mon Jun 8 09:34:32 2026 -0700 added option to get accession with version from blast xml diff --git src/hg/blastToPsl/blastXmlToPsl.c src/hg/blastToPsl/blastXmlToPsl.c index e067c9133a2..cc186105a48 100644 --- src/hg/blastToPsl/blastXmlToPsl.c +++ src/hg/blastToPsl/blastXmlToPsl.c @@ -28,59 +28,62 @@ " an integer, double or 1e-10. Default is no filter.\n" " -pslx - create PSLX output (includes sequences for blocks)\n" " -convertToNucCoords - convert protein to nucleic alignments to nucleic\n" " to nucleic coordinates\n" " -qName=src - define element used to obtain the qName. The following\n" " values are support:\n" " o query-ID - use contents of the <Iteration_query-ID> element if it\n" " exists, otherwise use <BlastOutput_query-ID>\n" " o query-def0 - use the first white-space separated word of the\n" " <Iteration_query-def> element if it exists, otherwise the first word\n" " of <BlastOutput_query-def>.\n" " Default is query-def0.\n" " -tName=src - define element used to obtain the tName. The following\n" " values are support:\n" " o Hit_id - use contents of the <Hit-id> element.\n" + " o Hit_id_id - with an id like 'gb|CM102538.1|' pull out `CM102538.1'\n" " o Hit_def0 - use the first white-space separated word of the\n" " <Hit_def> element.\n" " o Hit_accession - contents of the <Hit_accession> element.\n" + " WARNING: this drops the version.\n" " Default is Hit-def0.\n" " -forcePsiBlast - treat as output of PSI-BLAST. blast-2.2.16 and maybe\n" " others indentify psiblast as blastp." "\n" "Output only results of last round from PSI BLAST\n"); } static struct optionSpec options[] = { {"scores", OPTION_STRING}, {"tsv", OPTION_BOOLEAN}, {"eVal", OPTION_DOUBLE}, {"pslx", OPTION_BOOLEAN}, {"convertToNucCoords", OPTION_BOOLEAN}, {"qName", OPTION_STRING}, {"tName", OPTION_STRING}, {"forcePsiBlast", OPTION_BOOLEAN}, {NULL, 0}, }; enum qNameSrc { qNameSrcQueryId, qNameSrcQueryDef0 }; enum tNameSrc { tNameSrcHitId, + tNameSrcHitIdId, tNameSrcHitDef0, tNameSrcHitAccession }; static double eVal = -1; /* default Expect value signifying no filtering */ static boolean pslxFmt = FALSE; /* output in pslx format */ static int errCount = 0; /* count of PSLs failing checks */ static boolean convertToNucCoords = FALSE; /* adjust query coordinates */ static boolean forcePsiBlast = FALSE; /* assume PSI-BLAST output */ static enum qNameSrc qNameSrc = qNameSrcQueryDef0; /* source of qName */ static enum tNameSrc tNameSrc = tNameSrcHitDef0; /* source of tName */ struct coords /* structure to return converted coordinates */ @@ -165,30 +168,42 @@ return buf->string; } static char *getTName(struct ncbiBlastHit *hitRec) /* obtain the tName give the requested source */ { static struct dyString *buf = NULL; if (buf == NULL) buf = dyStringNew(32); dyStringClear(buf); switch (tNameSrc) { case tNameSrcHitId: dyStringAppend(buf, hitRec->ncbiBlastHitId->text); break; + case tNameSrcHitIdId: + { + char *id = cloneString(hitRec->ncbiBlastHitId->text); + char *words[4]; + int n = chopByChar(id, '|', words, ArraySize(words)); + if (n >= 2) + dyStringAppend(buf, words[1]); + else + dyStringAppend(buf, hitRec->ncbiBlastHitId->text); + freeMem(id); + break; + } case tNameSrcHitDef0: appendFirstWord(buf, hitRec->ncbiBlastHitDef->text); break; case tNameSrcHitAccession: dyStringAppend(buf, hitRec->ncbiBlastHitAccession->text); break; } return buf->string; } static void processHspRec(struct ncbiBlastBlastOutput *outputRec, struct ncbiBlastIteration *iterRec, struct ncbiBlastHit *hitRec, struct ncbiBlastHsp *hspRec, unsigned flags, FILE *pslFh, FILE *scoreFh) /* process one HSP record, converting to a PSL */ { int queryLen = (iterRec->ncbiBlastIterationQueryLen != NULL) @@ -307,27 +322,29 @@ pslxFmt = optionExists("pslx"); convertToNucCoords = optionExists("convertToNucCoords"); forcePsiBlast = optionExists("forcePsiBlast"); char *qNameSrcStr = optionVal("qName", "query-def0"); if (sameString(qNameSrcStr, "query-ID")) qNameSrc = qNameSrcQueryId; else if (sameString(qNameSrcStr, "query-def0")) qNameSrc = qNameSrcQueryDef0; else errAbort("invalid value for -qName, expect on of: \"query-ID\", or \"query-def0\", got \"%s\"", qNameSrcStr); char *tNameSrcStr = optionVal("tName", "Hit_def0"); if (sameString(tNameSrcStr, "Hit_id")) tNameSrc = tNameSrcHitId; +else if (sameString(tNameSrcStr, "Hit_id_id")) + tNameSrc = tNameSrcHitIdId; else if (sameString(tNameSrcStr, "Hit_def0")) tNameSrc = tNameSrcHitDef0; else if (sameString(tNameSrcStr, "Hit_accession")) tNameSrc = tNameSrcHitAccession; else - errAbort("invalid value for -tName, expect on of: \"Hit_id\", \"Hit_def0\", or \"Hit_accession\", got \"%s\"", tNameSrcStr); + errAbort("invalid value for -tName, expect on of: \"Hit_id\", \"Hit_id_id\", \"Hit_def0\", or \"Hit_accession\", got \"%s\"", tNameSrcStr); blastXmlToPsl(argv[1], argv[2], optionVal("scores", NULL), optionExists("tsv")); if (errCount > 0) errAbort("%d invalid PSLs created", errCount); return 0; }