bac95a147f49cd331052e597006e04b3deee40fc
max
  Wed Apr 22 10:43:20 2026 -0700
lrSv/srSv: human-readable SV type filter labels, script cleanups

Add human-readable labels to the supertrack-level svType filter on
both the lrSv and srSv supertracks using the "CODE|CODE (Long name)"
filterValues syntax: DEL -> "DEL (Deletion)", INS -> "INS (Insertion)",
etc. Labels keep the short code up front so users can match what
hgTracks shows next to each feature.

Also sweep in the in-progress converter/as-file cleanups under
scripts/lrSv/ and scripts/srSv/ (introduction of lrSvCommon.py
helpers, consistent insLen / svLen / AC column naming, tightened
field-description text) that had been piling up as an unstaged
working tree.

refs #36258

diff --git src/hg/makeDb/scripts/lrSv/lrSvVcfToBed.py src/hg/makeDb/scripts/lrSv/lrSvVcfToBed.py
index c50c3741e27..754dec4aa9b 100644
--- src/hg/makeDb/scripts/lrSv/lrSvVcfToBed.py
+++ src/hg/makeDb/scripts/lrSv/lrSvVcfToBed.py
@@ -1,25 +1,29 @@
 #!/usr/bin/env python3
 """Convert a SURVIVOR-merged SV VCF (site-only) to BED9+ for bigBed.
 
 Usage:
     lrSvVcfToBed.py input.vcf.gz output.bed
 """
 
 import gzip
+import os
 import sys
 
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from lrSvCommon import svName, normalizeSvType, insLenFor
+
 # Colors by SV type (R,G,B)
 SV_COLORS = {
     "DEL": "200,0,0",      # red
     "INS": "0,0,200",      # blue
     "DUP": "0,160,0",      # green
     "INV": "230,140,0",    # orange
     "TRA": "140,0,200",    # purple
 }
 
 def parseInfo(infoStr):
     """Parse INFO field into a dict."""
     d = {}
     for item in infoStr.split(";"):
         if "=" in item:
             k, v = item.split("=", 1)
@@ -40,87 +44,101 @@
     if len(sys.argv) != 3:
         print(__doc__, file=sys.stderr)
         sys.exit(1)
 
     inFile, outFile = sys.argv[1], sys.argv[2]
     opener = gzip.open if inFile.endswith(".gz") else open
 
     with opener(inFile, "rt") as fIn, open(outFile, "w") as fOut:
         for line in fIn:
             if line.startswith("#"):
                 continue
 
             fields = line.rstrip("\n").split("\t")
             chrom = fields[0]
             pos = int(fields[1])
-            name = fields[2]
             qual = fields[5]
             info = parseInfo(fields[7])
 
-            svType = info.get("SVTYPE", ".")
+            svTypeRaw = info.get("SVTYPE", ".")
+            svType = normalizeSvType(svTypeRaw)
             end = int(info.get("END", pos))
-            svLen = int(float(info.get("SVLEN", "0")))
+            svLenRaw = int(float(info.get("SVLEN", "0")))
             af = float(info.get("AF", "0"))
             supp = int(info.get("SUPP", "0"))
             ciPos = info.get("CIPOS", "0,0")
             ciEnd = info.get("CIEND", "0,0")
             chr2 = info.get("CHR2", ".")
             strands = info.get("STRANDS", "+-")
             suppVec = info.get("SUPP_VEC", "")
 
             # BED is 0-based half-open
             chromStart = pos - 1
 
             # For INS, END == POS so the item has zero width; expand by 1 bp
             chromEnd = end
             if chromEnd <= chromStart:
                 chromEnd = chromStart + 1
 
             # Score: map QUAL to 0-1000
             try:
                 score = min(int(round(float(qual) * 2)), 1000)
             except ValueError:
                 score = 0
 
             # Strand from first character of STRANDS field
             strand = strands[0] if strands and strands[0] in "+-" else "."
 
-            # Absolute SV length
-            absSvLen = abs(svLen)
-
             color = SV_COLORS.get(svType, "100,100,100")
 
             # sampleList from SUPP_VEC
             sampleList = suppVecToList(suppVec)
 
             # end2 for TRA; empty for non-TRA so skipEmptyFields hides them
             end2 = str(end) if svType == "TRA" else ""
             chr2Out = chr2 if svType == "TRA" else ""
 
             # For TRA, chromEnd is the position on chr1 side, not chr2
             if svType == "TRA":
                 chromEnd = chromStart + 1
 
+            # svLen: length on reference
+            svLen = chromEnd - chromStart
+            # insLen: for INS use abs(SVLEN); else 0 (except TRA which is 0)
+            if svType in ("INS", "MEI"):
+                insLen = abs(svLenRaw)
+            else:
+                insLen = 0
+
+            # AC: SURVIVOR input doesn't have AC, use supp*2 as approximation
+            # (SUPP is number of samples carrying; use 2*SUPP as proxy for diploid AC)
+            ac = supp * 2
+
+            featLen = insLen if svType in ("INS", "MEI") else svLen
+            name = svName(svType, featLen, ac)
+
             row = [
                 chrom,
                 str(chromStart),
                 str(chromEnd),
                 name,
                 str(score),
                 strand,
                 str(chromStart),   # thickStart
                 str(chromEnd),     # thickEnd
                 color,
                 svType,
-                str(absSvLen),
+                str(svLen),
+                str(insLen),
+                str(ac),
                 f"{af:.6f}",
                 str(supp),
                 ciPos,
                 ciEnd,
                 chr2Out,
                 end2,
                 sampleList,
             ]
             fOut.write("\t".join(row) + "\n")
 
 if __name__ == "__main__":
     main()