051d8d750d9ee269ea2a26575a90a8591fb4cd84
markd
  Mon Jun 8 09:34:32 2026 -0700
added option to get accession with version from blast xml

diff --git src/hg/blastToPsl/blastXmlToPsl.c src/hg/blastToPsl/blastXmlToPsl.c
index e067c9133a2..cc186105a48 100644
--- src/hg/blastToPsl/blastXmlToPsl.c
+++ src/hg/blastToPsl/blastXmlToPsl.c
@@ -28,59 +28,62 @@
   "          an integer, double or 1e-10. Default is no filter.\n"
   "  -pslx - create PSLX output (includes sequences for blocks)\n"
   "  -convertToNucCoords - convert protein to nucleic alignments to nucleic\n"
   "   to nucleic coordinates\n"
   "  -qName=src - define element used to obtain the qName.  The following\n"
   "   values are support:\n"
   "     o query-ID - use contents of the <Iteration_query-ID> element if it\n"
   "       exists, otherwise use <BlastOutput_query-ID>\n"
   "     o query-def0 - use the first white-space separated word of the\n"
   "       <Iteration_query-def> element if it exists, otherwise the first word\n"
   "       of <BlastOutput_query-def>.\n"
   "   Default is query-def0.\n"
   "  -tName=src - define element used to obtain the tName.  The following\n"
   "   values are support:\n"
   "     o Hit_id - use contents of the <Hit-id> element.\n"
+  "     o Hit_id_id - with an id like 'gb|CM102538.1|' pull out `CM102538.1'\n"
   "     o Hit_def0 - use the first white-space separated word of the\n"
   "       <Hit_def> element.\n"
   "     o Hit_accession - contents of the <Hit_accession> element.\n"
+  "        WARNING: this drops the version.\n"
   "   Default is Hit-def0.\n"
   "  -forcePsiBlast - treat as output of PSI-BLAST. blast-2.2.16 and maybe\n"
   "   others indentify psiblast as blastp."
   "\n"
   "Output only results of last round from PSI BLAST\n");
 }
 
 static struct optionSpec options[] = {
     {"scores", OPTION_STRING},
     {"tsv", OPTION_BOOLEAN},
     {"eVal", OPTION_DOUBLE},
     {"pslx", OPTION_BOOLEAN},
     {"convertToNucCoords", OPTION_BOOLEAN},
     {"qName", OPTION_STRING},
     {"tName", OPTION_STRING},
     {"forcePsiBlast", OPTION_BOOLEAN},
     {NULL, 0},
 };
 
 enum qNameSrc {
     qNameSrcQueryId,
     qNameSrcQueryDef0
 };
 
 enum tNameSrc {
     tNameSrcHitId,
+    tNameSrcHitIdId,
     tNameSrcHitDef0,
     tNameSrcHitAccession
 };
 
 
 static double eVal = -1; /* default Expect value signifying no filtering */
 static boolean pslxFmt = FALSE; /* output in pslx format */
 static int errCount = 0; /* count of  PSLs failing checks */
 static boolean convertToNucCoords = FALSE; /* adjust query coordinates */
 static boolean forcePsiBlast = FALSE; /* assume PSI-BLAST output  */
 static enum qNameSrc qNameSrc = qNameSrcQueryDef0;   /* source of qName */
 static enum tNameSrc tNameSrc = tNameSrcHitDef0;   /* source of tName */
 
 struct coords
 /* structure to return converted coordinates */
@@ -165,30 +168,42 @@
 return buf->string;
 }
 
 static char *getTName(struct ncbiBlastHit *hitRec)
 /* obtain the tName give the requested source */
 {
 static struct dyString *buf = NULL;
 if (buf == NULL)
     buf = dyStringNew(32);
 dyStringClear(buf);
 switch (tNameSrc)
     {
     case tNameSrcHitId:
         dyStringAppend(buf, hitRec->ncbiBlastHitId->text);
         break;
+    case tNameSrcHitIdId:
+        {
+        char *id = cloneString(hitRec->ncbiBlastHitId->text);
+        char *words[4];
+        int n = chopByChar(id, '|', words, ArraySize(words));
+        if (n >= 2)
+            dyStringAppend(buf, words[1]);
+        else
+            dyStringAppend(buf, hitRec->ncbiBlastHitId->text);
+        freeMem(id);
+        break;
+        }
     case tNameSrcHitDef0:
         appendFirstWord(buf, hitRec->ncbiBlastHitDef->text);
         break;        
     case tNameSrcHitAccession:
         dyStringAppend(buf, hitRec->ncbiBlastHitAccession->text);
         break;
     }
 return buf->string;
 }
 
 static void processHspRec(struct ncbiBlastBlastOutput *outputRec, struct ncbiBlastIteration *iterRec, struct ncbiBlastHit *hitRec,
                           struct ncbiBlastHsp *hspRec, unsigned flags, FILE *pslFh, FILE *scoreFh)
 /* process one HSP record, converting to a PSL */
 {
 int queryLen = (iterRec->ncbiBlastIterationQueryLen != NULL) 
@@ -307,27 +322,29 @@
 pslxFmt = optionExists("pslx");
 convertToNucCoords = optionExists("convertToNucCoords");
 forcePsiBlast = optionExists("forcePsiBlast");
 
 char *qNameSrcStr = optionVal("qName", "query-def0");
 if (sameString(qNameSrcStr, "query-ID"))
     qNameSrc = qNameSrcQueryId;
 else if (sameString(qNameSrcStr, "query-def0"))
     qNameSrc = qNameSrcQueryDef0;
 else
     errAbort("invalid value for -qName, expect on of: \"query-ID\", or \"query-def0\", got \"%s\"", qNameSrcStr);
 
 char *tNameSrcStr = optionVal("tName", "Hit_def0");
 if (sameString(tNameSrcStr, "Hit_id"))
     tNameSrc = tNameSrcHitId;
+else if (sameString(tNameSrcStr, "Hit_id_id"))
+    tNameSrc = tNameSrcHitIdId;
 else if (sameString(tNameSrcStr, "Hit_def0"))
     tNameSrc = tNameSrcHitDef0;
 else if (sameString(tNameSrcStr, "Hit_accession"))
     tNameSrc = tNameSrcHitAccession;
 else
-    errAbort("invalid value for -tName, expect on of: \"Hit_id\",  \"Hit_def0\", or \"Hit_accession\", got \"%s\"", tNameSrcStr);
+    errAbort("invalid value for -tName, expect on of: \"Hit_id\", \"Hit_id_id\", \"Hit_def0\", or \"Hit_accession\", got \"%s\"", tNameSrcStr);
 
 blastXmlToPsl(argv[1], argv[2], optionVal("scores", NULL), optionExists("tsv"));
 if (errCount > 0)
     errAbort("%d invalid PSLs created", errCount);
 return 0;
 }