c31d6f7174025d44270f64f086b06df10b4196a6
hiram
  Sat Nov 22 17:14:37 2025 -0800
now correctly managing GenArk GCA and GCF assembly names refs #34370

diff --git src/hg/ratStuff/mafSplit/mafSplit.c src/hg/ratStuff/mafSplit/mafSplit.c
index 102d5cd20d4..d671d296afe 100644
--- src/hg/ratStuff/mafSplit/mafSplit.c
+++ src/hg/ratStuff/mafSplit/mafSplit.c
@@ -37,30 +37,33 @@
   "                     Instead of auto-incrementing an integer to determine\n"
   "                     output filename, use the target sequence name\n"
   "                     to tack onto outRoot.\n"
   "   -useHashedName=N  For use only with -byTarget.\n"
   "                     Instead of auto-incrementing an integer or requiring\n"
   "                     a unique number in the sequence name, use a hash\n"
   "                     function on the sequence name to compute an N-bit\n"
   "                     number.  This limits the max #filenames to 2^N and\n"
   "                     ensures that even if different subsets of sequences\n"
   "                     appear in different pairwise mafs, the split file\n"
   "                     names will be consistent (due to hash function).\n"
   "                     This option is useful when a \"scaffold-based\"\n"
   "                     assembly has more than one sequence name pattern,\n"
   "                     e.g. both chroms and scaffolds.\n"
   "\n"
+  "NOTE: as of November 2025 - can manage GenArk assembly names GCA_...\n"
+  "      and GCF_... with their .n extensions.  Can only work with such\n"
+  "      such names that begin with GC."
   );
 }
 
 static struct optionSpec options[] = {
    {"byTarget", OPTION_BOOLEAN},
    {"outDirDepth", OPTION_INT},
    {"useSequenceName", OPTION_BOOLEAN},
    {"useFullSequenceName", OPTION_BOOLEAN},
    {"useHashedName", OPTION_INT},
    {NULL, 0},
 };
 
 /* Option variables */
 static boolean byTarget = FALSE;
 static int outDirDepth = 0;
@@ -164,31 +167,43 @@
 		    targetName, path);
 	    f = mustOpen(path, "a");
 	    }
 	else
 	    {
 	    int seqNum = (*pSeqNum)++;
 	    if (useSequenceName)
 		seqNum = numberFromName(targetName);
 	    else if (hashedNameBits > 0)
 		seqNum = numberFromHashedName(targetName, hashedNameBits);
 	    if (useFullSequenceName)
 		{
 		/* skip over db. prefix if any */
 		char *target = strchr(targetName,'.');
 		if (target)
+		    {
 		    ++target;
+		    /* if GenArk assembly, get to the next dot */
+		    if (startsWith("GC", targetName))
+		        {
+		        char *nextDot = strchr(target,'.');
+		        if (nextDot)
+			    {
+			    ++nextDot;
+			    target = nextDot;
+			    }
+		        }   /* else: no next dot, leave target it where it is */
+		    }
 		else
 		    target = targetName;
 		path = mkOutPath(outRootDir, outRootFile, seqNum, target);
 		}
 	    else
 		path = mkOutPath(outRootDir, outRootFile, seqNum, NULL);
 	    verbose(3, "Opening path %s for writing and adding it to hash "
 		    "for %s\n", path, targetName);
 	    f = mustOpen(path, "w");
 	    fprintf(f, "##maf version=1 scoring=blastz\n");
 	    if (hashedNameBits > 0)
 		hashAdd(pathHash, path, path);
 	    else
 		hashAdd(pathHash, targetName, path);
 	    }
@@ -219,31 +234,41 @@
         hashAdd(splitHash, bed->chrom, cloneBed(bed));
         }
     else
         slAddTail(&bedList, cloneBed(bed));
     freeMem(bed);
     }
 return splitHash;
 }
 
 char *chromFromSrc(char *src)
 /* get chrom name from <db>.<chrom> */
 {
 char *p;
 if ((p = strchr(src, '.')) == NULL)
     errAbort("Can't find chrom in MAF component src: %s\n", src);
-return ++p;
+++p;	/* skip the dot to the word following */
+if (startsWith("GC", src))
+    {
+    char *nextDot = strchr(p,'.');
+    if (nextDot)
+        {
+        ++nextDot;	/* skip the dot to the word following */
+        p = nextDot;	/* new answer */
+        }
+    }   /* else: no next dot, leave p it where it is */
+return p;
 }
 
 void splitMafFile(char *file, char *outDir, char *outPrefix, 
                         struct hash *splitHash)
 /* split file based on positions in hash */
 {
 char *chrom = NULL;
 char outFile[PATH_LEN];
 int ix = 0;
 FILE *f;
 boolean nextFile = TRUE;
 struct bed *bed, *bedList = NULL;
 int splitPos = 0;
 struct mafFile *mf = mafOpen(file);
 struct mafAli *maf = NULL;