b6f7fd1e76673d2fd96e46ede11a11f421e15675
hiram
  Sat Nov 22 17:13:04 2025 -0800
now correctly managing GenArk GCA and GCF assembly names refs #34370

diff --git src/hg/ratStuff/mafAddIRows/mafAddIRows.c src/hg/ratStuff/mafAddIRows/mafAddIRows.c
index 3875bf785d5..5c2b861d1eb 100644
--- src/hg/ratStuff/mafAddIRows/mafAddIRows.c
+++ src/hg/ratStuff/mafAddIRows/mafAddIRows.c
@@ -26,30 +26,33 @@
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "mafAddIRows - add 'i' rows to a maf\n"
   "usage:\n"
   "   mafAddIRows mafIn twoBitFile mafOut\n"
   "WARNING:  requires a maf with only a single target sequence\n"
   "options:\n"
   "   -nBeds=listOfBedFiles\n"
   "       reads in list of bed files, one per species, with N locations\n"
   "   -addN\n"
   "       adds rows of N's into maf blocks (rather than just annotating them)\n"
   "   -addDash\n"
   "       adds rows of -'s into maf blocks (rather than just annotating them)\n"
+  "NOTE: as of November 2025 - can manage GenArk assembly names GCA_...\n"
+  "      and GCF_... with their .n extensions.  Can only work with such\n"
+  "      such names that begin with GC."
   );
 }
 
 static struct optionSpec options[] = {
    {"nBeds", OPTION_STRING},
    {"addN", OPTION_BOOLEAN},
    {"addDash", OPTION_BOOLEAN},
    {NULL, 0},
 };
 
 struct bedHead
 {
     struct bed *list;
 };
 
@@ -76,80 +79,100 @@
     struct cBlock cb;
     struct mafComp *mc;
 };
 
 struct strandHead
 {
     struct strandHead *next;
     char strand;
     char *name;
     char *qName;
     int qSize;
     char *species;
     struct linkBlock *links;
 };
 
+static char *chromFromSrc(char *src)
+/* get chrom name from <db>.<chrom>
+   returned pointer should be on the . separator */
+{
+char *p;
+if ((p = strchr(src, '.')) == NULL)
+    errAbort("Can't find chrom in MAF component src: %s\n", src);
+char *skipDot = p;
+++skipDot;	/* skip the dot to the word following */
+if (startsWith("GC", src))
+    {
+    char *nextDot = strchr(skipDot,'.');
+    if (nextDot)
+        {
+        p = nextDot;	/* new answer */
+        }
+    }   /* else: no next dot, leave p it where it is */
+return p;
+}
+
 struct mafAli *readMafs(struct mafFile *mf)
 {
 struct mafAli *maf;
 char buffer[2048];
 char buffer2[2048];
 struct strandHead *strandHead;
 struct mafAli *mafList = NULL;
 char *ourChrom = NULL;
 
 while((maf = mafNext(mf)) != NULL)
     {
     struct mafComp *mc, *masterMc = maf->components;
     char *species = buffer;
     char *chrom;
 
     if (ourChrom == NULL)
 	ourChrom = masterMc->src;
     else
 	{
 	if (differentString(masterMc->src, ourChrom))
 	    errAbort("ERROR: mafAddIrows requires maf have only one target sequence.\n"
 		"Use mafSplit -byTarget -useFullSequenceName to split maf");
 	}
 
     strcpy(species, masterMc->src);
-    chrom = strchr(species,'.');
+    chrom = chromFromSrc(species);
     if (chrom)
 	*chrom++ = 0;
     else
 	errAbort("reference species has no chrom name\n");
 
     if (masterSpecies == NULL)
 	{
 	masterSpecies = cloneString(species);
 	masterChrom = cloneString(chrom);
 	//printf("master %s %s\n",masterSpecies,masterChrom);
 	}
     else
 	{
 	if (!sameString(masterSpecies, species))
 	    errAbort("first species (%s) not master species (%s)\n",species,masterSpecies);
 	}
 
     for(mc= masterMc->next; mc; mc = mc->next)
 	{
 	struct linkBlock *linkBlock;
 	struct subSpecies *subSpecies = NULL;
 
 	strcpy(species, mc->src);
-	chrom = strchr(species,'.');
+        chrom = chromFromSrc(species);
 	*chrom++ = 0;
 
 	if ((subSpecies = hashFindVal(speciesHash, species)) == NULL)
 	    {
 	    //printf("new species %s\n",species);
 	    AllocVar(subSpecies);
 	    subSpecies->name = cloneString(species);
 	    subSpecies->hash = newHash(6);
 	    subSpecies->blockStatus.strand = '+';
 	    subSpecies->blockStatus.masterStart = masterMc->start;
 	    slAddHead(&speciesList, subSpecies);
 	    hashAdd(speciesHash, species, subSpecies);
 	    }
 	subSpecies->blockStatus.masterEnd = masterMc->start + masterMc->size ;
 	sprintf(buffer2, "%s%c%s", masterChrom,mc->strand,chrom);
@@ -351,66 +374,64 @@
     {
     struct mafComp *mc = NULL, *masterMc, *lastMc = NULL;
     struct mafAli *newMaf = NULL;
     struct blockStatus *blockStatus;
 
     nextMaf = maf->next;
 
     masterMc=maf->components;
     if (masterMc->start > lastEnd)
 	{
 	struct subSpecies *species;
 
 	for(species = speciesList; species; species = species->next)
 	    {
 	    mc = NULL;
-//	    printf("looking at %s\n",species->name);
 	    blockStatus = &species->blockStatus;
 	    if (blockStatus->mc)
 		{
-//	    printf("should match at %s\n",blockStatus->mc->src);
 		switch (blockStatus->mc->rightStatus)
 		    {
 		    case MAF_MISSING_STATUS:
 		    //printf("missing right\n");
 		    case MAF_NEW_NESTED_STATUS:
 		    case MAF_MAYBE_NEW_NESTED_STATUS:
 		    case MAF_CONTIG_STATUS:
 		    case MAF_TANDEM_STATUS:
 		    case MAF_INSERT_STATUS:
 			AllocVar(mc);
 			mc->rightStatus = mc->leftStatus = blockStatus->mc->rightStatus;
 			mc->rightLen = mc->leftLen = blockStatus->mc->rightLen;
 			mc->src = blockStatus->mc->src;
 			mc->srcSize = blockStatus->mc->srcSize;
 			mc->strand = blockStatus->mc->strand;
 			mc->start = blockStatus->mc->start + blockStatus->mc->size;
 			if (lastMc == NULL)
 			    {
 			    struct mafComp *miniMasterMc = NULL;
 			    char *seqName;
 			    struct dnaSeq *seq;
 
 			    AllocVar(miniMasterMc);
 			    miniMasterMc->next = mc;
 			    miniMasterMc->strand = '+';
 			    miniMasterMc->srcSize = masterMc->srcSize;
 			    miniMasterMc->src = masterMc->src;
 			    miniMasterMc->start = lastEnd;
 			    miniMasterMc->size =  masterMc->start - lastEnd;
 
-			    if ((seqName = strchr(miniMasterMc->src, '.')) != NULL)
+			    if ((seqName = chromFromSrc(miniMasterMc->src)) != NULL)
 				seqName++;
 			    else
 			    	seqName = miniMasterMc->src;
 
 //			    printf("hole filled from %d to %d\n",lastEnd, masterMc->start);
 			    seq = twoBitReadSeqFrag(twoBit, seqName, lastEnd, masterMc->start);
 			    miniMasterMc->text = seq->dna;
 
 			    AllocVar(newMaf);
 			    newMaf->textSize = maf->textSize;
 			    newMaf->components = miniMasterMc;
 			    newMaf->next = maf;
 			    if (prevMaf)
 				prevMaf->next = newMaf;
 			    else
@@ -502,31 +523,30 @@
 			    mc->srcSize = 200000;
 			    mc->size = maf->textSize;
 			    mc->text = needMem(mc->size + 1);
 			    memset(mc->text, 'N', mc->size);
 			    }
 			else if (addDash)
 			    {
 				mc->size = masterMc->size;
 			    mc->text = needMem(mc->size + 1);
 			    if (mc->size == 0)
 				errAbort("bad dash add");
 			    memset(mc->text, '-', mc->size);
 			    mc->text[mc->size] = 0;
 			    mc->size = 0;
 			    }
-			    
 			break;
 		    default:
 			break;
 		    }
 		}
 	    }
 	if (mc)
 	    {
 	    blockStatus->mc = mc;
 	    }
 	}
     }
 }
 
 struct hash *readBed(char *fileName)
@@ -541,33 +561,30 @@
 
 while (lineFileRow(lf, row))
     {
     hel = hashLookup(hash, row[0]);
     if ((lastHel) && (hel != lastHel))
 	{
 	assert(bedHead != NULL);
 	slReverse(&bedHead->list);
 	}
 
     if (hel == NULL)
        {
 	char *ptr;
 
 	AllocVar(bedHead);
-	if ((ptr = strchr(row[0], '.')) != NULL)
-	    ptr++;
-	else 
 	ptr = row[0];
 	hel = hashAdd(hash, ptr, bedHead);
 	}
     bedHead = hel->val;
     AllocVar(bed);
     bed->chrom = hel->name;
     bed->chromStart = lineFileNeedNum(lf, row, 1);
     bed->chromEnd = lineFileNeedNum(lf, row, 2);
     if (bed->chromStart > bed->chromEnd)
         errAbort("start after end line %d of %s", lf->lineIx, lf->fileName);
     slAddHead(&bedHead->list, (struct bed *)bed);
     lastHel = hel;
     }
 
 if (bedHead != NULL)