ddb85ced5e8b6127a233b5cda5fcb1fbe2260578 max Wed Mar 25 04:22:06 2026 -0700 Add detailsScript trackDb mechanism for JS visualizations on bigBed details pages Changing based on feedback from Jonathan, Chris and Brian after group discussion. Refactored existing Claude-generated code, moving functions into libraries. This is the first use of ES6 modules in the kent js code. In 2026, this should be acceptable? New trackDb syntax: detailsScript.. The C code (bigBedClick.c) collects these settings, exports field values as JSON (bedDetails object), and dynamically imports hgc..js as an ES6 module. Fields used by detailsScript are shown in the HTML table with empty values, filled by JavaScript. Includes hgc.histogram.js module for drawing SVG bar chart histograms from logfmt-encoded data (space-separated key=value pairs). Applied to both the trexplorer and webstr tracks in the strVar supertrack. Also adds jsonWriteJsonElement() helper to jsonWrite.c for writing parsed jsonElement trees into a jsonWrite stream. max, refs #36652 Co-Authored-By: Claude Opus 4.6 (1M context) diff --git src/hg/makeDb/scripts/webstr/webstrToBed.py src/hg/makeDb/scripts/webstr/webstrToBed.py index 1562887b37b..3e8068fc0ec 100644 --- src/hg/makeDb/scripts/webstr/webstrToBed.py +++ src/hg/makeDb/scripts/webstr/webstrToBed.py @@ -1,126 +1,125 @@ #!/usr/bin/env python3 """Convert WebSTR CSV data to BED9+ format for bigBed conversion. Reads hg38_repeats_withlinks.csv.gz and hg38_afreqs.csv.gz from the input directory and writes a tab-separated BED file to stdout. Usage: webstrToBed.py > webstr.bed """ import csv import gzip import sys from collections import defaultdict PERIOD_COLORS = { 1: "255,0,0", # mono: red 2: "0,0,255", # di: blue 3: "0,128,0", # tri: green 4: "255,165,0", # tetra: orange 5: "128,0,128", # penta: purple 6: "70,130,180", # hexa: steel blue } DEFAULT_COLOR = "128,128,128" # gray for period > 6 def truncateMotif(motif, maxLen=25): """Truncate motif to maxLen characters with '..' in the middle.""" if len(motif) <= maxLen: return motif keepLen = maxLen - 2 leftLen = (keepLen + 1) // 2 rightLen = keepLen - leftLen return motif[:leftLen] + ".." + motif[-rightLen:] COHORT_ORDER = ["AFR", "AMR", "EAS", "EUR", "SAS"] COHORT_MAP = { "1000 Genomes AFR": "AFR", "1000 Genomes AMR": "AMR", "1000 Genomes EAS": "EAS", "1000 Genomes EUR": "EUR", "1000 Genomes SAS": "SAS", } def loadAlleleFreqs(inDir): """Load allele frequency data, grouped by repeatid and cohort.""" freqs = defaultdict(lambda: {c: {"alleles": [], "freqs": [], "n": 0} for c in COHORT_ORDER}) path = f"{inDir}/hg38_afreqs.csv.gz" with gzip.open(path, "rt") as f: reader = csv.reader(f) header = next(reader) # skip header for row in reader: cohort_raw, allele, freq, n, repeatid = row cohort = COHORT_MAP.get(cohort_raw) if cohort is None: continue entry = freqs[repeatid][cohort] entry["alleles"].append(allele) entry["freqs"].append(freq) entry["n"] = int(n) return freqs def main(): if len(sys.argv) != 2: print(__doc__, file=sys.stderr) sys.exit(1) inDir = sys.argv[1] print("Loading allele frequencies...", file=sys.stderr) afreqs = loadAlleleFreqs(inDir) print(f" Loaded frequencies for {len(afreqs)} repeats", file=sys.stderr) print("Processing repeats...", file=sys.stderr) repeatsPath = f"{inDir}/hg38_repeats_withlinks.csv.gz" count = 0 with gzip.open(repeatsPath, "rt") as f: reader = csv.reader(f) header = next(reader) # skip header for row in reader: repeatid, panel, chrom, motif, start, end, period, numcopies, _webstr_link = row period_int = int(period) color = PERIOD_COLORS.get(period_int, DEFAULT_COLOR) # Source coordinates are 1-based; convert start to 0-based for BED start = str(int(start) - 1) # BED9 fields fields = [ chrom, start, end, truncateMotif(motif) + "x" + numcopies, # name "0", # score ".", # strand start, # thickStart end, # thickEnd color, # itemRgb motif, period, numcopies, ] - # Allele frequency fields for each cohort + # Allele frequency fields for each cohort (logfmt: allele=freq pairs) af = afreqs.get(repeatid) for cohort in COHORT_ORDER: if af and af[cohort]["alleles"]: entry = af[cohort] - fields.append(",".join(entry["alleles"])) - fields.append(",".join(entry["freqs"])) + pairs = [a + "=" + f for a, f in zip(entry["alleles"], entry["freqs"])] + fields.append(" ".join(pairs)) fields.append(str(entry["n"])) else: - fields.append("") fields.append("") fields.append("0") print("\t".join(fields)) count += 1 if count % 500000 == 0: print(f" Processed {count} repeats...", file=sys.stderr) print(f"Done. Wrote {count} records.", file=sys.stderr) if __name__ == "__main__": main()