8efbcad3f2816dec50b5671a4445d2e6943f7f91 max Mon Apr 13 07:57:41 2026 -0700 Use light gray for monomorphic strVar loci (het=0), distinct from no-data gray, refs #36652 Add a separate color for loci where heterozygosity is exactly 0 (single allele observed) across all four strVar subtracks: light gray (200,200,200). This distinguishes them from the existing medium gray (128,128,128) used when no allele frequency data is available. Previously het=0 was lumped into the dark blue "nearly monomorphic" bin. Also expand the itemRgb field description in all four .as files to list the full color scheme. Co-Authored-By: Claude Opus 4.6 (1M context) diff --git src/hg/makeDb/scripts/viennaVntr/viennaVntrToBed.py src/hg/makeDb/scripts/viennaVntr/viennaVntrToBed.py index 5a859f5c52d..8f3bf107513 100644 --- src/hg/makeDb/scripts/viennaVntr/viennaVntrToBed.py +++ src/hg/makeDb/scripts/viennaVntr/viennaVntrToBed.py @@ -1,126 +1,129 @@ #!/usr/bin/env python3 """ Convert Vienna ONT 1000 Genomes VAMOS VNTR summary TSV to BED9+ for bigBed. Input: vamos-summary.tsv from the Vienna ONT project. Output: BED to stdout. Usage: python3 viennaVntrToBed.py vamos-summary.tsv | sort -k1,1 -k2,2n > viennaVntr.bed """ import sys HET_BINS = [ (0.1, "0,0,180"), # het < 0.1: dark blue (nearly monomorphic) (0.3, "70,130,230"), # het 0.1-0.3: medium blue (low diversity) (0.5, "180,130,200"), # het 0.3-0.5: light purple (moderate diversity) (0.7, "230,100,80"), # het 0.5-0.7: salmon (high diversity) (999, "180,0,0"), # het >= 0.7: dark red (very high diversity) ] -NO_DATA_COLOR = "128,128,128" # gray when no het data +NO_DATA_COLOR = "128,128,128" # medium gray when no het data +MONOMORPHIC_COLOR = "200,200,200" # light gray when het == 0 (fixed allele) def het_to_color(het): """Map heterozygosity value to an RGB color string.""" if het < 0: return NO_DATA_COLOR + if het == 0: + return MONOMORPHIC_COLOR for threshold, color in HET_BINS: if het < threshold: return color return HET_BINS[-1][1] def load_het(het_path): """Load het values from TSV (chrom, pos_1based, het).""" het_map = {} with open(het_path) as f: for line in f: chrom, pos, het = line.rstrip("\n").split("\t") het_map[(chrom, int(pos))] = float(het) return het_map def main(): if len(sys.argv) < 3: print("Usage: viennaVntrToBed.py vamos-summary.tsv het.tsv", file=sys.stderr) sys.exit(1) print("Loading het values...", file=sys.stderr) het_map = load_het(sys.argv[2]) print(f" Loaded {len(het_map)} het values", file=sys.stderr) count = 0 with open(sys.argv[1]) as f: header = None for line in f: if line.startswith("#"): header = line.strip().lstrip("#").split("\t") continue parts = line.strip().split("\t") if len(parts) < 20: continue def safe_int(s, default=0): try: v = float(s) if v != v: # NaN check return default return int(v) except (ValueError, OverflowError): return default def safe_float(s, default="0"): try: v = float(s) if v != v: # NaN return default return s except ValueError: return default chrom = parts[0] start = int(parts[1]) - 1 # 1-based to 0-based end = int(parts[2]) num_haps = safe_int(parts[3]) ru_len_avg = float(parts[4]) if parts[4] != "NaN" else 0.0 num_unique = safe_int(parts[6]) max_rus = safe_int(parts[7]) min_rus = safe_int(parts[8]) median_rus = safe_float(parts[9]) pct25_rus = safe_float(parts[12]) pct75_rus = safe_float(parts[13]) unique_num_rus = safe_int(parts[16]) max_bps = safe_int(parts[17]) min_bps = safe_int(parts[18]) median_bps = safe_float(parts[19]) # Lookup het (key is chrom, 1-based pos from VCF) het = het_map.get((chrom, start + 1), -1.0) color = het_to_color(het) score = max(0, int(het * 1000)) if het >= 0 else 0 # Name: motif size and median repeat count name = f"{int(round(ru_len_avg))}bp x{median_rus}" fields = [ chrom, str(start), str(end), name, str(score), ".", str(start), str(end), color, str(num_haps), f"{ru_len_avg:.1f}", str(num_unique), str(max_rus), str(min_rus), median_rus, pct25_rus, pct75_rus, str(unique_num_rus), str(max_bps), str(min_bps), median_bps, str(het), ] print("\t".join(fields)) count += 1 print(f"Wrote {count:,} loci", file=sys.stderr) if __name__ == "__main__": main()