b0039dec371d7665e25e2e39b0e79d3e33ba50a1 angie Tue Nov 28 13:54:07 2023 -0800 Fix amino acid translation to support transcipts that have UTR portions and skip noncoding transcripts. diff --git src/hg/hgPhyloPlace/phyloPlace.h src/hg/hgPhyloPlace/phyloPlace.h index 001dedf..326531a 100644 --- src/hg/hgPhyloPlace/phyloPlace.h +++ src/hg/hgPhyloPlace/phyloPlace.h @@ -145,30 +145,31 @@ char *subLab; // Submitting lab char *region; // Continent on which sample was collected char *nCladeUsher; // Nextstrain clade according to annotated tree char *lineageUsher; // Pango lineage according to annotated tree char *authors; // Sequence submitters/authors char *pubs; // PubMed ID numbers of publications associated with sequences char *nLineage; // Nextstrain letter-dot-numbers lineage assigned by nextclade }; struct geneInfo /* Information sufficient to determine whether a genome change causes a coding change. */ { struct geneInfo *next; struct psl *psl; // Alignment of transcript to genome struct dnaSeq *txSeq; // Transcript sequence + struct genbankCds *cds; // CDS (for those few pathogens that have transcript UTRs) }; struct tempName *vcfFromFasta(struct lineFile *lf, char *db, struct dnaSeq *refGenome, struct slName **maskSites, struct hash *treeNames, struct slName **retSampleIds, struct seqInfo **retSeqInfo, struct slPair **retFailedSeqs, struct slPair **retFailedPsls, int *pStartTime); /* Read in FASTA from lf and make sure each item has a reasonable size and not too high * percentage of N's. Align to reference, extract SNVs from alignment, and save as VCF * with sample genotype columns. */ struct usherResults *runUsher(char *db, char *usherPath, char *usherAssignmentsPath, char *vcfFile, int subtreeSize, struct slName **pUserSampleIds, struct treeChoices *treeChoices, int *pStartTime); /* Open a pipe from Yatish Turakhia's usher program, save resulting big trees and