205d43b94283eb56c77a64f5e86e2a77295ab750
angie
Mon Mar 12 10:38:40 2018 -0700
hgVai: Add options for HGVS output from Gencode Basic/Comp tracks (not just RefSeq). Use variantProjector instead of gpFx to do predictions for genePreds by converting to PSL+CDS.
refs #21142
The use of variantProjector resulted in significant changes to hg/lib/tests results:
pgSnpKgDbToGpFx.txt: right-shifting, 'Z' -> '*'
vepIndelTrimOut.txt:
* right-shifting
* per-allele consequences even for variants where allele doesn't affect consequence
e.g. intron... this is a drawback, would be good to filter these after the fact to
reduce to a single consequence
* right-shifting intron -> splice_region for gene LSP1
* right-shifting coding_sequence_variant into just 3_prime_UTR_variant for ORJ81
* recognition of genome missing a base for EI24 (so insC restores frame and restores
missing codon from genome-compensating transcripts)
* exon numbers changing due to not counting too-short introns
diff --git src/hg/hgVai/hgVai.c src/hg/hgVai/hgVai.c
index 4d4f90b..77e41c2 100644
--- src/hg/hgVai/hgVai.c
+++ src/hg/hgVai/hgVai.c
@@ -990,31 +990,33 @@
"RefSeq status of each transcript.
");
puts("");
}
printf("
",
somethingIsVisible ? "none" : "block");
puts("No transcript status data are available for the selected gene track.");
puts("
");
puts("
");
endCollapsibleSection();
}
static boolean canDoHgvsOut(char *geneTrack)
/* Return TRUE if we're able to make HGVS output terms for transcripts in geneTrack. */
{
-return sameString(geneTrack, "refGene") || startsWith("ncbiRefSeq", geneTrack);
+return (sameString(geneTrack, "refGene") || startsWith("ncbiRefSeq", geneTrack) ||
+ startsWith("wgEncodeGencodeBasic", geneTrack) ||
+ startsWith("wgEncodeGencodeComp", geneTrack));
}
static void selectHgvsOut(char *geneTrack)
/* Offer HGVS output choices if RefSeq Genes are selected */
{
startCollapsibleSection("hgvsOut", "HGVS variant nomenclature", TRUE);
printf("The Human Genome Variation Society (HGVS) "
"has established a "
"sequence variant nomenclature, "
"an international standard used to report variation in "
"genomic, transcript and protein sequences.
\n");
boolean hgvsOk = canDoHgvsOut(geneTrack);
printf("", hgvsOk ? "block" : "none");
cartMakeCheckBox(cart, "hgva_hgvsG", FALSE);
printf("Include HGVS genomic (g.) terms in output
\n");
@@ -1022,32 +1024,32 @@
printf("Include HGVS coding (c.) terms if applicable, otherwise noncoding (n.) terms, in output"
"
\n");
cartMakeCheckBox(cart, "hgva_hgvsP", FALSE);
printf("Include HGVS protein (p.) terms (if applicable) in output
\n");
cartMakeCheckBox(cart, "hgva_hgvsPAddParens", FALSE);
printf("When including HGVS protein (p.) terms, add parentheses around changes to emphasize "
"that they are predictions
\n");
cartMakeCheckBox(cart, "hgva_hgvsBreakDelIns", FALSE);
printf("For variants that involve both a deletion and insertion, "
"including multi-nucleotide variants, "
"include the deleted sequence (e.g. show \"delAGinsTT\" instead of only \"delinsTT\")"
"
\n");
puts("
");
printf("",
hgvsOk ? "none" : "block");
-printf("Select RefSeq Genes in the \"Select Genes\" section above "
- "in order to make options appear.\n");
+printf("Select RefSeq Genes or the latest official GENCODE release "
+ "in the \"Select Genes\" section above in order to make options appear.\n");
puts("
");
puts("
");
endCollapsibleSection();
}
boolean isHg19RegulatoryTrack(struct trackDb *tdb, void *filterData)
/* For now, just look for a couple specific tracks by tableName. */
{
//#*** NEED METADATA
return (sameString("wgEncodeRegDnaseClusteredV3", tdb->table) ||
sameString("wgEncodeRegTfbsClusteredV3", tdb->table));
}
boolean isHg38RegulatoryTrack(struct trackDb *tdb, void *filterData)
/* For now, just look for a couple specific tracks by tableName. */