68f95eec388eb3895787fc885fa01fe7ee3447ac angie Thu Aug 26 11:36:26 2021 -0700 Recently usher started prepending node_ to numeric internal node names stored in the protobuf, so node names in usher outputs are now node_<number> instead of just <number>. However, the protobuf still has numeric names, so it may be necessary to strip the prefix when looking up bigTree nodes. diff --git src/hg/hgPhyloPlace/vcfFromFasta.c src/hg/hgPhyloPlace/vcfFromFasta.c index 3275373..2b2e578 100644 --- src/hg/hgPhyloPlace/vcfFromFasta.c +++ src/hg/hgPhyloPlace/vcfFromFasta.c @@ -109,35 +109,34 @@ for (i = 0; i < seq->size; i++) if (seq->dna[i] != 'n' && isIupacAmbiguous(seq->dna[i])) ambigCount++; if (passes) { if (hashLookup(uniqNames, seq->name)) { struct dyString *dy = dyStringCreate("Sequence name '%s' has already been used; " "ignoring subsequent usage " "(%d bases, %d N's, %d ambiguous).", seq->name, seq->size, nCountTotal, ambigCount); slPairAdd(retFailedSeqs, dyStringCannibalize(&dy), seq); } else { - if (isAllDigits(seq->name) || hashLookup(treeNames, seq->name)) + if (isInternalNodeName(seq->name, 0) || hashLookup(treeNames, seq->name)) { - // Internal nodes of tree have numeric IDs, so usher may reject numeric name - // as a conflict. usher will definitely reject a sequence name already in the tree. - // Add a prefix so usher won't reject the sequence. + // usher will reject any sequence whose name is already in the tree, even a + // numeric internal node name. Add a prefix so usher won't reject sequence. char newName[strlen(seq->name)+32]; safef(newName, sizeof newName, "uploaded_%s", seq->name); freeMem(seq->name); seq->name = cloneString(newName); } struct seqInfo *si; AllocVar(si); si->seq = seq; si->nCountStart = nCountStart; si->nCountMiddle = nCountMiddle; si->nCountEnd = nCountEnd; si->ambigCount = ambigCount; hashAdd(uniqNames, seq->name, NULL); slAddHead(&filteredSeqs, si); }