0e66591e44d669ef83fa016069b1ceb283c01800
angie
  Thu Jul 15 12:40:43 2021 -0700
Support more flexible ID search: accessions without dot-versions and isolate names.  When loading FASTA, detect sequence names that are numeric or already in the tree and add a prefix to prevent usher from rejecting the sequences.

diff --git src/hg/hgPhyloPlace/phyloPlace.h src/hg/hgPhyloPlace/phyloPlace.h
index 64391d4..db22d6f 100644
--- src/hg/hgPhyloPlace/phyloPlace.h
+++ src/hg/hgPhyloPlace/phyloPlace.h
@@ -136,30 +136,31 @@
     char *origLab;      // Originating lab
     char *subLab;       // Submitting lab
     char *region;       // Continent on which sample was collected
     };
 
 struct geneInfo
 /* Information sufficient to determine whether a genome change causes a coding change. */
     {
     struct geneInfo *next;
     struct psl *psl;        // Alignment of transcript to genome
     struct dnaSeq *txSeq;   // Transcript sequence
     };
 
 struct tempName *vcfFromFasta(struct lineFile *lf, char *db, struct dnaSeq *refGenome,
                               boolean *informativeBases, struct slName **maskSites,
+                              struct hash *treeNames,
                               struct slName **retSampleIds, struct seqInfo **retSeqInfo,
                               struct slPair **retFailedSeqs, struct slPair **retFailedPsls,
                               int *pStartTime);
 /* Read in FASTA from lf and make sure each item has a reasonable size and not too high
  * percentage of N's.  Align to reference, extract SNVs from alignment, and save as VCF
  * with sample genotype columns. */
 
 struct usherResults *runUsher(char *usherPath, char *usherAssignmentsPath, char *vcfFile,
                               int subtreeSize, struct slName *userSampleIds,
                               struct hash *condensedNodes, int *pStartTime);
 /* Open a pipe from Yatish Turakhia's usher program, save resulting big trees and
  * subtrees to trash files, return list of slRef to struct tempName for the trash files
  * and parse other results out of stderr output. */
 
 struct usherResults *runMatUtilsExtractSubtrees(char *matUtilsPath, char *protobufPath,