69087d3a65af31c39337085920e99e5b2db13082
galt
  Fri Jun 17 15:05:28 2022 -0700
Ran the dbsnp pipeline designed by Angie for dbsnp v155. It produces huge bigBed output and I found and fixed a problem encountered on the bedToBigBed. I also tweaked dbSnpJsonToTab to deal with some dbsnp data having multiple study subversions, by ignoring the old datasets and just using the latest one. Added a track description page that has lots of content and counts to update. dbsnp155 is ready for QA on hgwdev. refs #rm27751

diff --git src/hg/makeDb/trackDb/human/trackDb.ra src/hg/makeDb/trackDb/human/trackDb.ra
index 0f566fb..ded35a9 100644
--- src/hg/makeDb/trackDb/human/trackDb.ra
+++ src/hg/makeDb/trackDb/human/trackDb.ra
@@ -1741,30 +1741,152 @@
     parent dbSnp153Composite
     view errs
     shortLabel Mapping Errors
     visibility dense
 
         track dbSnp153BadCoords
         parent dbSnp153ViewErrs off
         subGroups view=errs
         shortLabel Map Err dbSnp(153)
         longLabel Mappings with Inconsistent Coordinates from dbSNP 153
         bigDataUrl /gbdb/$D/snp/dbSnp153BadCoords.bb
         type bigBed 4
         color 100,100,100
         priority 5
 
+track dbSnp155Composite
+compositeTrack on
+shortLabel dbSNP 155
+longLabel Short Genetic Variants from dbSNP release 155
+type bed 3
+group varRep
+visibility pack
+url https://www.ncbi.nlm.nih.gov/snp/$$
+urlLabel dbSNP: 
+subGroup1 view Views variants=Variants errs=Mapping_Errors
+priority 0.8
+
+    track dbSnp155ViewVariants
+    view variants
+    parent dbSnp155Composite
+    shortLabel Variants
+    visibility dense
+    type bigDbSnp
+    detailsTabUrls _dataOffset=/gbdb/hgFixed/dbSnp/dbSnp155Details.tab.gz
+    freqSourceOrder 1000Genomes,dbGaP_PopFreq,TOPMED,KOREAN,SGDP_PRJ,Qatari,NorthernSweden,Siberian,TWINSUK,TOMMO,ALSPAC,GENOME_DK,GnomAD,GoNL,Estonian,Vietnamese,Korea1K,HapMap,PRJEB36033,HGDP_Stanford,Daghestan,PAGE_STUDY,Chileans,MGP,PRJEB37584,GoESP,ExAC,GnomAD_exomes,FINRISK,PharmGKB,PRJEB37766
+    classFilterValues snv,mnv,ins,del,delins,identity
+    classFilterType multipleListOr
+    showCfg on
+    ucscNotesFilterValues \
+        altIsAmbiguous|Alternate allele contains IUPAC ambiguous base(s),\
+        classMismatch|Variant class/type is inconsistent with allele sizes,\
+        clinvar|Present in ClinVar,\
+        clinvarBenign|ClinVar significance of benign and/or likely benign,\
+        clinvarConflicting|ClinVar includes both benign and pathogenic reports,\
+        clinvarPathogenic|ClinVar significance of pathogenic and/or likely pathogenic,\
+        clusterError|Overlaps a variant with the same type/class and position,\
+        commonAll|MAF >= 1% in all projects that report frequencies,\
+        commonSome|MAF >= 1% in at least one project that reports frequencies,\
+        diffMajor|Different projects report different major alleles,\
+        freqIncomplete|Frequency reported with incomplete allele data,\
+        freqIsAmbiguous|Frequency reported for allele with IUPAC ambiguous base(s),\
+        freqNotMapped|Frequency reported on different assembly but not mapped by dbSNP,\
+        freqNotRefAlt|Reference genome allele is not major allele in at least one project,\
+        multiMap|Variant is placed in more than one genomic position,\
+        otherMapErr|Another mapping of this variant has illegal coords (indel mapping error?),\
+        overlapDiffClass|Variant overlaps other variant(s) of different type/class,\
+        overlapSameClass|Variant overlaps other variant(s) of same type/class but different position,\
+        rareAll|MAF < 1% in all projects that report frequencies (or no frequency data),\
+        rareSome|MAF < 1% in at least one project that reports frequencies,\
+        refIsAmbiguous|Reference genome allele contains IUPAC ambiguous base(s),\
+        refIsMinor|Reference genome allele is minor allele in at least one project that reports frequencies,\
+        refIsRare|Reference genome allele frequency is <1% in at least one project,\
+        refIsSingleton|Reference genome frequency is 0 in all projects that report frequencies,\
+        refMismatch|Reference allele mismatches reference genome sequence,\
+        revStrand|Variant maps to opposite strand relative to dbSNP's preferred top-level placement
+#'
+    ucscNotesFilterType multipleListOr
+    maxFuncImpactFilterLabel Greatest functional impact on gene
+    maxFuncImpactFilterValues 0|(not annotated),\
+        0865|frameshift,\
+        1587|stop_gained,\
+        1574|splice_acceptor_variant,\
+        1575|splice_donor_variant,\
+        1821|inframe_insertion,\
+        1583|missense_variant,\
+        1590|terminator_codon_variant,\
+        1819|synonymous_variant,\
+        1580|coding_sequence_variant,\
+        1623|5_prime_UTR_variant,\
+        1624|3_prime_UTR_variant,\
+        1619|nc_transcript_variant,\
+        2|genic_upstream_transcript_variant,\
+        1986|upstream_transcript_variant,\
+        2152|genic_downstream_transcript_variant,\
+        1987|downstream_transcript_variant,\
+        1627|intron_variant
+    maxFuncImpactFilterType multipleListOr
+
+        track dbSnp155Common
+        parent dbSnp155ViewVariants on
+        subGroups view=variants
+        shortLabel Common dbSNP(155)
+        longLabel Common (1000 Genomes Phase 3 MAF >= 1%) Short Genetic Variants from dbSNP Release 155
+        bigDataUrl /gbdb/$D/snp/dbSnp155Common.bb
+        priority 1
+
+        track dbSnp155ClinVar
+        parent dbSnp155ViewVariants off
+        subGroups view=variants
+        shortLabel ClinVar dbSNP(155)
+        longLabel Short Genetic Variants from dbSNP Release 155 Included in ClinVar
+        bigDataUrl /gbdb/$D/snp/dbSnp155ClinVar.bb
+        priority 2
+
+        track dbSnp155Mult
+        parent dbSnp155ViewVariants off
+        subGroups view=variants
+        shortLabel Mult. dbSNP(155)
+        longLabel Short Genetic Variants from dbSNP Release 155 that Map to Multiple Genomic Loci
+        bigDataUrl /gbdb/$D/snp/dbSnp155Mult.bb
+        priority 3
+
+        track dbSnp155
+        parent dbSnp155ViewVariants off
+        subGroups view=variants
+        shortLabel All dbSNP(155)
+        longLabel All Short Genetic Variants from dbSNP Release 155
+        bigDataUrl /gbdb/$D/snp/dbSnp155.bb
+        maxWindowToDraw 1000000
+        priority 4
+
+    track dbSnp155ViewErrs
+    parent dbSnp155Composite
+    view errs
+    shortLabel Mapping Errors
+    visibility dense
+
+        track dbSnp155BadCoords
+        parent dbSnp155ViewErrs off
+        subGroups view=errs
+        shortLabel Map Err dbSnp(155)
+        longLabel Mappings with Inconsistent Coordinates from dbSNP 155
+        bigDataUrl /gbdb/$D/snp/dbSnp155BadCoords.bb
+        type bigBed 4
+        color 100,100,100
+        priority 5
+
 track hgdpGeo
 shortLabel HGDP Allele Freq
 longLabel Human Genome Diversity Project SNP Population Allele Frequencies
 group varRep
 visibility hide
 url http://hgdp.uchicago.edu/cgi-bin/gbrowse/HGDP/?name=$$
 urlLabel HGDP Selection Browser:
 type bed 4 +
 
 track hgdpFst
 shortLabel HGDP Smoothd FST
 longLabel Human Genome Diversity Project Smoothed Relative FST (Fixation Index)
 group varRep
 visibility hide
 chromosomes chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,
@@ -5392,30 +5514,70 @@
 searchMethod exact
 searchType bigBed
 #semiShortCircuit 1
 termRegex rs[0-9]+
 searchPriority 12.9453
 padding 100
 
 searchTable dbSnp153BadCoords
 searchMethod exact
 searchType bigBed
 #semiShortCircuit 1
 termRegex rs[0-9]+
 searchPriority 12.9454
 padding 100
 
+searchTable dbSnp155Common
+searchMethod exact
+searchType bigBed
+#semiShortCircuit 1
+termRegex rs[0-9]+
+searchPriority 12.9450
+padding 100
+
+searchTable dbSnp155ClinVar
+searchMethod exact
+searchType bigBed
+#semiShortCircuit 1
+termRegex rs[0-9]+
+searchPriority 12.9451
+padding 100
+
+searchTable dbSnp155
+searchMethod exact
+searchType bigBed
+semiShortCircuit 1
+termRegex rs[0-9]+
+searchPriority 12.9452
+padding 100
+
+searchTable dbSnp155Mult
+searchMethod exact
+searchType bigBed
+#semiShortCircuit 1
+termRegex rs[0-9]+
+searchPriority 12.9453
+padding 100
+
+searchTable dbSnp155BadCoords
+searchMethod exact
+searchType bigBed
+#semiShortCircuit 1
+termRegex rs[0-9]+
+searchPriority 12.9454
+padding 100
+
 include cloneEnd.trackDb.ra
 
 track spMut override
 bigDataUrl /gbdb/$D/uniprot/unipMut.bb
 
 track sgpGene override
 longLabel SGP Gene Predictions Using Mouse/$Organism Homology
 
 track uniprot override
 hideEmptySubtracks off
 
 searchTable cnvDevDelayCase
 semiShortCircuit 1
 termRegex nssv[0-9]+
 searchType bed