src/hg/makeDb/scripts/webstr/webstrToBed.py d2b7a0b56d985859c5deed015119edce22449b91

d2b7a0b56d985859c5deed015119edce22449b91
max
  Wed Mar 18 03:45:53 2026 -0700
Change webstr track name to motifxCopyCount format to match trexplorer, refs #36652

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

diff --git src/hg/makeDb/scripts/webstr/webstrToBed.py src/hg/makeDb/scripts/webstr/webstrToBed.py
index a1da209378a..1562887b37b 100644
--- src/hg/makeDb/scripts/webstr/webstrToBed.py
+++ src/hg/makeDb/scripts/webstr/webstrToBed.py
@@ -11,30 +11,41 @@
 import csv
 import gzip
 import sys
 from collections import defaultdict
 
 PERIOD_COLORS = {
     1: "255,0,0",       # mono: red
     2: "0,0,255",       # di: blue
     3: "0,128,0",       # tri: green
     4: "255,165,0",     # tetra: orange
     5: "128,0,128",     # penta: purple
     6: "70,130,180",    # hexa: steel blue
 }
 DEFAULT_COLOR = "128,128,128"  # gray for period > 6
 
+
+def truncateMotif(motif, maxLen=25):
+    """Truncate motif to maxLen characters with '..' in the middle."""
+    if len(motif) <= maxLen:
+        return motif
+    keepLen = maxLen - 2
+    leftLen = (keepLen + 1) // 2
+    rightLen = keepLen - leftLen
+    return motif[:leftLen] + ".." + motif[-rightLen:]
+
+
 COHORT_ORDER = ["AFR", "AMR", "EAS", "EUR", "SAS"]
 COHORT_MAP = {
     "1000 Genomes AFR": "AFR",
     "1000 Genomes AMR": "AMR",
     "1000 Genomes EAS": "EAS",
     "1000 Genomes EUR": "EUR",
     "1000 Genomes SAS": "SAS",
 }
 
 def loadAlleleFreqs(inDir):
     """Load allele frequency data, grouped by repeatid and cohort."""
     freqs = defaultdict(lambda: {c: {"alleles": [], "freqs": [], "n": 0} for c in COHORT_ORDER})
     path = f"{inDir}/hg38_afreqs.csv.gz"
     with gzip.open(path, "rt") as f:
         reader = csv.reader(f)
@@ -68,31 +79,31 @@
         reader = csv.reader(f)
         header = next(reader)  # skip header
         for row in reader:
             repeatid, panel, chrom, motif, start, end, period, numcopies, _webstr_link = row
             period_int = int(period)
             color = PERIOD_COLORS.get(period_int, DEFAULT_COLOR)
 
             # Source coordinates are 1-based; convert start to 0-based for BED
             start = str(int(start) - 1)
 
             # BED9 fields
             fields = [
                 chrom,
                 start,
                 end,
-                repeatid,       # name
+                truncateMotif(motif) + "x" + numcopies,  # name
                 "0",            # score
                 ".",            # strand
                 start,          # thickStart
                 end,            # thickEnd
                 color,          # itemRgb
                 motif,
                 period,
                 numcopies,
             ]
 
             # Allele frequency fields for each cohort
             af = afreqs.get(repeatid)
             for cohort in COHORT_ORDER:
                 if af and af[cohort]["alleles"]:
                     entry = af[cohort]