src/hg/makeDb/scripts/spliceAI/spliceAI.py 2ef50d75a8709d41ed0dd29eb872146a6a9c0152

2ef50d75a8709d41ed0dd29eb872146a6a9c0152
lrnassar
  Fri Aug 23 14:14:09 2024 -0700
Updating SpliceAI track colors due to feedback, refs #34336

diff --git src/hg/makeDb/scripts/spliceAI/spliceAI.py src/hg/makeDb/scripts/spliceAI/spliceAI.py
index efe714d..215ab0a 100644
--- src/hg/makeDb/scripts/spliceAI/spliceAI.py
+++ src/hg/makeDb/scripts/spliceAI/spliceAI.py
@@ -1,53 +1,64 @@
 import sys, csv, gzip
 import subprocess
 
 allFiles = {'hg19MaskedIndel': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.masked.indel.hg19.vcf.gz',\
            'hg38MaskedIndel': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.masked.indel.hg38.vcf.gz',\
            'hg19MaskedSnv': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.masked.snv.hg19.vcf.gz',\
            'hg38MaskedSnv': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.masked.snv.hg38.vcf.gz',\
            'hg19RawIndel': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.raw.indel.hg19.vcf.gz',\
            'hg38RawIndel': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.raw.indel.hg38.vcf.gz',\
            'hg19RawSnv': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.raw.snv.hg19.vcf.gz',\
            'hg38RawSnv': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.raw.snv.hg38.vcf.gz'}
 
+def colorByScore(score):
+    if score <= 0.1:
+        color = '0,0,255' #blue
+    elif score > 0.1 and score < 0.2:
+        color = '128,128,128' #grey
+    elif score >= 0.2:
+        color = '255,128,0' #orange
+    return(color)
+        
+
 def processAndMakeBedFile(dbsAndMasking,filePath):
     with gzip.open(filePath, 'rt') as f:
         with open('/hive/data/outside/spliceAi/'+dbsAndMasking+'.bed', 'w', newline='', encoding='utf-8') as outfile1:
             AIwriter = csv.writer(outfile1, delimiter='\t')
             atypes = {'acceptor_gain' : '255,0,0',
               'acceptor_loss' : '255,128,0',
               'donor_gain' : '0,0,255',
               'donor_loss' : '212,0,255'}
             for line in f:
                 if line.startswith('#'):
                     continue
                 [chrom, pos, id, ref, alt, qual, filter, info] = line.strip().split('\t')
                 startpos = int(pos) -1
                 # match scores with positions
                 name = info.split('|')[1]
                 scores = [float(s) for s in info.split('|')[2:6]]
                 positions = [int(s) for s in info.split('|')[6:10]]
                 # Iterate over the zipped data
                 for atype, score, position in zip(atypes.keys(), scores, positions):
                     # Check if the score is greater than or equal to 0.02
                     if score >= 0.02:
                         # make clear if position is upstream or downstream
                         if position > 0:
                             position = '+' + str(position)
+                        color = colorByScore(score)
                   #      print(f"Type: {atype}, Score: {score}, Position: {position}")
-                        AIwriter.writerow(['chr'+chrom, startpos, startpos+1, ref+'>'+alt, 0, '+', startpos, startpos, atypes[atype], score, atype, position, name])
+                        AIwriter.writerow(['chr'+chrom, startpos, startpos+1, ref+'>'+alt, 0, '+', startpos, startpos, color, score, atype, position, name])
 
 def bash(cmd):
     """Run the cmd in bash subprocess"""
     try:
         rawBashOutput = subprocess.run(cmd, check=True, shell=True,\
                                        stdout=subprocess.PIPE, universal_newlines=True, stderr=subprocess.STDOUT)
         bashStdoutt = rawBashOutput.stdout
     except subprocess.CalledProcessError as e:
         raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
     return(bashStdoutt)
 
 for file in allFiles:
     processAndMakeBedFile(file,allFiles[file])
     
 for track in allFiles: