2ef50d75a8709d41ed0dd29eb872146a6a9c0152 lrnassar Fri Aug 23 14:14:09 2024 -0700 Updating SpliceAI track colors due to feedback, refs #34336 diff --git src/hg/makeDb/scripts/spliceAI/spliceAI.py src/hg/makeDb/scripts/spliceAI/spliceAI.py index efe714d..215ab0a 100644 --- src/hg/makeDb/scripts/spliceAI/spliceAI.py +++ src/hg/makeDb/scripts/spliceAI/spliceAI.py @@ -1,53 +1,64 @@ import sys, csv, gzip import subprocess allFiles = {'hg19MaskedIndel': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.masked.indel.hg19.vcf.gz',\ 'hg38MaskedIndel': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.masked.indel.hg38.vcf.gz',\ 'hg19MaskedSnv': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.masked.snv.hg19.vcf.gz',\ 'hg38MaskedSnv': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.masked.snv.hg38.vcf.gz',\ 'hg19RawIndel': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.raw.indel.hg19.vcf.gz',\ 'hg38RawIndel': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.raw.indel.hg38.vcf.gz',\ 'hg19RawSnv': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.raw.snv.hg19.vcf.gz',\ 'hg38RawSnv': '/hive/data/outside/spliceAi/scoresFromIllumina/spliceai_scores.raw.snv.hg38.vcf.gz'} +def colorByScore(score): + if score <= 0.1: + color = '0,0,255' #blue + elif score > 0.1 and score < 0.2: + color = '128,128,128' #grey + elif score >= 0.2: + color = '255,128,0' #orange + return(color) + + def processAndMakeBedFile(dbsAndMasking,filePath): with gzip.open(filePath, 'rt') as f: with open('/hive/data/outside/spliceAi/'+dbsAndMasking+'.bed', 'w', newline='', encoding='utf-8') as outfile1: AIwriter = csv.writer(outfile1, delimiter='\t') atypes = {'acceptor_gain' : '255,0,0', 'acceptor_loss' : '255,128,0', 'donor_gain' : '0,0,255', 'donor_loss' : '212,0,255'} for line in f: if line.startswith('#'): continue [chrom, pos, id, ref, alt, qual, filter, info] = line.strip().split('\t') startpos = int(pos) -1 # match scores with positions name = info.split('|')[1] scores = [float(s) for s in info.split('|')[2:6]] positions = [int(s) for s in info.split('|')[6:10]] # Iterate over the zipped data for atype, score, position in zip(atypes.keys(), scores, positions): # Check if the score is greater than or equal to 0.02 if score >= 0.02: # make clear if position is upstream or downstream if position > 0: position = '+' + str(position) + color = colorByScore(score) # print(f"Type: {atype}, Score: {score}, Position: {position}") - AIwriter.writerow(['chr'+chrom, startpos, startpos+1, ref+'>'+alt, 0, '+', startpos, startpos, atypes[atype], score, atype, position, name]) + AIwriter.writerow(['chr'+chrom, startpos, startpos+1, ref+'>'+alt, 0, '+', startpos, startpos, color, score, atype, position, name]) def bash(cmd): """Run the cmd in bash subprocess""" try: rawBashOutput = subprocess.run(cmd, check=True, shell=True,\ stdout=subprocess.PIPE, universal_newlines=True, stderr=subprocess.STDOUT) bashStdoutt = rawBashOutput.stdout except subprocess.CalledProcessError as e: raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output)) return(bashStdoutt) for file in allFiles: processAndMakeBedFile(file,allFiles[file]) for track in allFiles: