ae63ce55185746c5fabb2ead8cc20215faada36d max Mon Nov 3 08:03:14 2025 -0800 adding ancient dna track, refs #36592 diff --git src/hg/makeDb/hgLoadMaf/hgLoadMafSummary.c src/hg/makeDb/hgLoadMaf/hgLoadMafSummary.c index 8f31bd9e10c..9e0355fa181 100644 --- src/hg/makeDb/hgLoadMaf/hgLoadMafSummary.c +++ src/hg/makeDb/hgLoadMaf/hgLoadMafSummary.c @@ -105,87 +105,107 @@ } struct mafComp *mafMaster(struct mafAli *maf, struct mafFile *mf, char *fileName) /* Get master component from maf. Error abort if no master component */ { struct mafComp *mcMaster = mafMayFindCompPrefix(maf, database, "."); if (mcMaster == NULL) { errAbort("Couldn't find %s. sequence line %d of %s\n", database, mf->lf->lineIx, fileName); } return mcMaster; } -char *mafSplitSrcGetChrom(char *src) +char *mafSplitSrcGetChrom(char *src, char* database) /* src can be in format chrom, db|chrom or db.chrom: split string on separator and return pointer to chrom. - * Can even handle a db with a single dot in it and the db.chrom format, but only if db has a single dot (=NCBI format) - * The side effect of this function is that src contains only the db, not the chrom anymore. + * the db part of src can also have a dot in it, but only if the 'database' argument is not null. + * Changes 'src': The side effect of this function is that src contains only the db, not the chrom anymore. * */ { char *pipe = strchr(src, '|'); // pipe found? It's the new format, db|chrom if (pipe) { *pipe = '\0'; return pipe+1; } char *dot1 = strchr(src, '.'); if (!dot1) return src; // if there are no dots, assume the name is the chrom -else + +if (database) + { + // if 'database' is not NULL we can resolve a situation like GCF_1234.3.CJS12323.4 because we know that + // GCF_1234.3 is the db part + if (differentString(src, database)) { - // the most common format: db.chrom + // the database name isn't matching the first part of the component source, + // look to see if maybe the database has a dot in it + *dot1 = '.'; // replace the dot + char *dot2 = strchr(dot1 + 1, '.'); // look for the next dot + if (dot2 != NULL) + { + *dot2 = 0; + char *chrom = dot2 + 1; + return chrom; + } + + if ((dot2 == NULL) || differentString(src, database)) + errAbort("expecting first component to have assembly name with no more than one dot"); + } + } + +// if database is NULL and there is no pipe character, just split on the first dot and that's it char* chrom = dot1 + 1; *dot1 = '\0'; return chrom; } -} long processMaf(struct mafAli *maf, struct hash *componentHash, FILE *f, struct mafFile *mf, char *fileName) /* Compute scores for each pairwise component in the maf and output to .tab file */ { struct mafComp *mc = NULL, *nextMc = NULL; struct mafSummary *ms, *msPending; struct mafAli pairMaf; long componentCount = 0; struct mafComp *mcMaster = mafMaster(maf, mf, fileName); struct mafComp *oldMasterNext = mcMaster->next; char *chrom; char src[256]; strcpy(src, mcMaster->src); -chrom = mafSplitSrcGetChrom(src); +chrom = mafSplitSrcGetChrom(src, database); for (mc = maf->components; mc != NULL; mc = nextMc) { nextMc = mc->next; if (sameString(mcMaster->src, mc->src) || mc->size == 0) continue; /* create maf summary for this alignment component */ AllocVar(ms); ms->chrom = cloneString(chrom); /* both MAF and BED format define chromStart as 0-based */ ms->chromStart = mcMaster->start; /* BED chromEnd is start+size */ ms->chromEnd = mcMaster->start + mcMaster->size; ms->src = cloneString(mc->src); - mafSplitSrcGetChrom(ms->src); + mafSplitSrcGetChrom(ms->src, database); /* construct pairwise maf for scoring */ ZeroVar(&pairMaf); pairMaf.textSize = maf->textSize; pairMaf.components = mcMaster; mcMaster->next = mc; mc->next = NULL; ms->score = scorePairwise(&pairMaf); ms->leftStatus[0] = mc->leftStatus; ms->rightStatus[0] = mc->rightStatus; /* restore component links to allow memory recovery */ mcMaster->next = oldMasterNext; mc->next = nextMc;