dc7ac233e8848cee5087dca606f4d638ca8a3be1
lrnassar
  Tue Apr 14 11:58:42 2026 -0700
Update InSiGHT scripts per v2.0.0 CSpec: fix PMS2 BS1 threshold, add PMS2 PVS1 Moderate region. refs #36582

diff --git src/hg/makeDb/doc/InSiGHT.txt src/hg/makeDb/doc/InSiGHT.txt
index 5e826d5b3bf..1f49a3e32fd 100644
--- src/hg/makeDb/doc/InSiGHT.txt
+++ src/hg/makeDb/doc/InSiGHT.txt
@@ -1,156 +1,156 @@
 #RM#36582
 
 # InSiGHT VCEP Track Hub
 # International Society for Gastrointestinal Hereditary Tumours (InSiGHT)
 # Variant Curation Expert Panel (VCEP)
 # Lynch syndrome mismatch repair genes: MLH1, MSH2, MSH6, PMS2
 # CSpec v2.0.0
 # Assemblies: hg38 and hg19
 
 # Working directory for all track data
 mkdir -p /hive/users/lrnassar/insightHub
 
 # Build scripts are located here:
 ~/kent/src/hg/makeDb/scripts/insight/
 
 # Quick link for github:
 # https://github.com/ucscGenomeBrowser/kent/tree/master/src/hg/makeDb/scripts/insight
 
 # Hub structure:
 #   /hive/users/lrnassar/insightHub/
 #     hub.txt, genomes.txt
 #     hg38/trackDb.txt, hg19/trackDb.txt
 #     insight.html (shared description page)
 #     clinDomains/   - Clinical Domains track data
 #     pvs1/          - PVS1 Regions track data
 #     afFrequencies/ - Allele Frequencies track data
 #     hciPriors/     - HCI Priors track data
 #     functionalAssays/ - Functional Assays track data
 #     lovdVars/      - InSiGHT Curated Variants track data
 
 # Canonical transcripts used across all tracks:
 #   MLH1: NM_000249.4 (chr3, + strand)
 #   MSH2: NM_000251.3 (chr2, + strand)
 #   MSH6: NM_000179.3 (chr2, + strand)
 #   PMS2: NM_000535.7 (chr7, - strand)
 
 ##############################################################################
 # Track 1: Clinical Domains (PM1)
 ##############################################################################
 
 # Clinically relevant protein domains for the 4 MMR genes.
 # Domain definitions are hardcoded in the script from the InSiGHT VCEP specs.
 # Generates bigBed 9+4 files for hg38 and hg19.
 
 cd /hive/users/lrnassar/insightHub/clinDomains
 python3 ~/kent/src/hg/makeDb/scripts/insight/insightClinDomains.py
 
 # Output: InSiGHTclinDomainsHg38.bb, InSiGHTclinDomainsHg19.bb
 
 ##############################################################################
 # Track 2: PVS1 Regions
 ##############################################################################
 
 # PVS1 decision tree regions based on NMD predictions and critical functional
 # regions. Gene-specific codon boundaries from the InSiGHT VCEP specs:
 #   MLH1: NMD <=684, CritRegion 685-753, FuncUnknown 754-756, n.a. >756
 #   MSH2: NMD <=861, CritRegion 862-891, FuncUnknown 892-934, n.a. >934
 #   MSH6: NMD <=1317, CritRegion 1318-1341, FuncUnknown 1342-1360, n.a. >1360
-#   PMS2: NMD <=798, n.a. >798
+#   PMS2: NMD <=798, FuncUnknown 799-862, n.a. >862
 # Generates bigBed 9+3 files for hg38 and hg19.
 
 cd /hive/users/lrnassar/insightHub/pvs1
 python3 ~/kent/src/hg/makeDb/scripts/insight/insightPVS1.py
 
 # Output: InSiGHTPVS1Hg38.bb, InSiGHTPVS1Hg19.bb
 
 ##############################################################################
 # Track 3: Allele Frequencies (BA1/BS1/PM2)
 ##############################################################################
 
 # ACMG allele frequency classifications from gnomAD v4.1 exomes.
 # Gene-specific thresholds from the InSiGHT VCEP specs.
 # Requires access to gnomAD v4.1 bigBed files in /gbdb/hg38/gnomAD/v4.1/exomes/
 # Generates bigBed 9+3 files for hg38 and hg19 (hg19 via liftOver).
 
 cd /hive/users/lrnassar/insightHub/afFrequencies
 python3 ~/kent/src/hg/makeDb/scripts/insight/insightAFfrequencies.py
 
 # Output: InSiGHTAFHg38.bb, InSiGHTAFHg19.bb
 
 ##############################################################################
 # Track 4: HCI Priors (PP3/BP4)
 ##############################################################################
 
 # HCI prior probability predictions for missense variants.
 # Source data: LOVD database exports (tab-delimited files downloaded manually
 # from the LOVD shared database for each gene's priors table).
 # Requires LOVD priors files in the hciPriors/ directory:
 #   LOVD_MLH1_priors_*.txt
 #   LOVD_MSH2_priors_*.txt
 #   LOVD_MSH6_priors_*.txt
 #   LOVD_PMS2_priors_*.txt
 # Thresholds: PP3_moderate >0.81, PP3_supporting 0.68-0.81, BP4_supporting <0.11
 # Generates bigBed 9+5 files for hg38 and hg19.
 
 cd /hive/users/lrnassar/insightHub/hciPriors
 python3 ~/kent/src/hg/makeDb/scripts/insight/insightHCIPriors.py
 
 # Output: InSiGHTHCIPriorsHg38.bb, InSiGHTHCIPriorsHg19.bb
 
 ##############################################################################
 # Track 5: Functional Assays (PS3/BS3)
 ##############################################################################
 
 # Functional assay evidence from 4 publications:
 #   Drost et al. 2018 (PMID:30504929) - 74 MLH1/MSH2 variants, CIMRA assay
 #   Drost et al. 2020 (PMID:31965077) - 87 MSH6 variants, CIMRA assay
 #   Jia et al. 2021 (PMID:33357406)  - 16,749 MSH2 variants, deep mutational scan
 #   Rath et al. 2022 (PMID:36054288) - 26 MLH1 variants, cell-based assay
 #
 # Requires supplementary data files in the functionalAssays/ directory:
 #   drost2020_supplement.docx (Drost 2020 S1/S3/S5 tables)
 #   mmc2.xlsx (Jia 2021 TableS4/S5)
 #   (Drost 2018 and Rath 2022 data are hardcoded from their supplements)
 #
 # Also requires openpyxl: pip install openpyxl
 # Generates bigBed 9+7 files for hg38 and hg19.
 
 cd /hive/users/lrnassar/insightHub/functionalAssays
 python3 ~/kent/src/hg/makeDb/scripts/insight/insightFunctionalAssays.py
 
 # Output: insightFunctionalAssaysHg38.bb, insightFunctionalAssaysHg19.bb
 
 ##############################################################################
 # Track 6: InSiGHT Curated Variants (from ClinVar)
 ##############################################################################
 
 # InSiGHT VCEP expert panel classifications fetched from ClinVar API.
 # Queries ClinVar for variants submitted by InSiGHT on MLH1, MSH2, MSH6, PMS2.
 # No local data files needed -- fetches directly from NCBI E-utilities.
 # This is the track that should be periodically rebuilt (ClinVar updates monthly).
 # Generates bigBed 9+7 files for hg38 and hg19.
 
 cd /hive/users/lrnassar/insightHub/lovdVars
 python3 ~/kent/src/hg/makeDb/scripts/insight/buildInsightClinVar.py
 
 # Output: insightClinVarHg38.bb, insightClinVarHg19.bb
 
 ##############################################################################
 # Hub deployment
 ##############################################################################
 
 # The hub is served from:
 #   https://hgwdev-lrnassar.gi.ucsc.edu/~lrnassar/track_hubs/insightHub/hub.txt
 #
 # The public_html symlink points to the working directory:
 #   /cluster/home/lrnassar/public_html/track_hubs/insightHub -> /hive/users/lrnassar/insightHub
 #
 # To rebuild all tracks from scratch:
 cd /hive/users/lrnassar/insightHub
 cd clinDomains && python3 ~/kent/src/hg/makeDb/scripts/insight/insightClinDomains.py && cd ..
 cd pvs1 && python3 ~/kent/src/hg/makeDb/scripts/insight/insightPVS1.py && cd ..
 cd afFrequencies && python3 ~/kent/src/hg/makeDb/scripts/insight/insightAFfrequencies.py && cd ..
 cd hciPriors && python3 ~/kent/src/hg/makeDb/scripts/insight/insightHCIPriors.py && cd ..
 cd functionalAssays && python3 ~/kent/src/hg/makeDb/scripts/insight/insightFunctionalAssays.py && cd ..
 cd lovdVars && python3 ~/kent/src/hg/makeDb/scripts/insight/buildInsightClinVar.py && cd ..