d2b7a0b56d985859c5deed015119edce22449b91 max Wed Mar 18 03:45:53 2026 -0700 Change webstr track name to motifxCopyCount format to match trexplorer, refs #36652 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> diff --git src/hg/makeDb/scripts/webstr/webstrToBed.py src/hg/makeDb/scripts/webstr/webstrToBed.py index a1da209378a..1562887b37b 100644 --- src/hg/makeDb/scripts/webstr/webstrToBed.py +++ src/hg/makeDb/scripts/webstr/webstrToBed.py @@ -11,30 +11,41 @@ import csv import gzip import sys from collections import defaultdict PERIOD_COLORS = { 1: "255,0,0", # mono: red 2: "0,0,255", # di: blue 3: "0,128,0", # tri: green 4: "255,165,0", # tetra: orange 5: "128,0,128", # penta: purple 6: "70,130,180", # hexa: steel blue } DEFAULT_COLOR = "128,128,128" # gray for period > 6 + +def truncateMotif(motif, maxLen=25): + """Truncate motif to maxLen characters with '..' in the middle.""" + if len(motif) <= maxLen: + return motif + keepLen = maxLen - 2 + leftLen = (keepLen + 1) // 2 + rightLen = keepLen - leftLen + return motif[:leftLen] + ".." + motif[-rightLen:] + + COHORT_ORDER = ["AFR", "AMR", "EAS", "EUR", "SAS"] COHORT_MAP = { "1000 Genomes AFR": "AFR", "1000 Genomes AMR": "AMR", "1000 Genomes EAS": "EAS", "1000 Genomes EUR": "EUR", "1000 Genomes SAS": "SAS", } def loadAlleleFreqs(inDir): """Load allele frequency data, grouped by repeatid and cohort.""" freqs = defaultdict(lambda: {c: {"alleles": [], "freqs": [], "n": 0} for c in COHORT_ORDER}) path = f"{inDir}/hg38_afreqs.csv.gz" with gzip.open(path, "rt") as f: reader = csv.reader(f) @@ -68,31 +79,31 @@ reader = csv.reader(f) header = next(reader) # skip header for row in reader: repeatid, panel, chrom, motif, start, end, period, numcopies, _webstr_link = row period_int = int(period) color = PERIOD_COLORS.get(period_int, DEFAULT_COLOR) # Source coordinates are 1-based; convert start to 0-based for BED start = str(int(start) - 1) # BED9 fields fields = [ chrom, start, end, - repeatid, # name + truncateMotif(motif) + "x" + numcopies, # name "0", # score ".", # strand start, # thickStart end, # thickEnd color, # itemRgb motif, period, numcopies, ] # Allele frequency fields for each cohort af = afreqs.get(repeatid) for cohort in COHORT_ORDER: if af and af[cohort]["alleles"]: entry = af[cohort]