c3e1d16778b3af004003425907b23ae7cdb94844 lrnassar Tue Jun 18 10:33:15 2024 -0700 Removing carrots from name, feedback from Anna. diff --git src/hg/makeDb/scripts/enigma/BRCAsplicing.py src/hg/makeDb/scripts/enigma/BRCAsplicing.py index e0fcf02..3ef06d5 100644 --- src/hg/makeDb/scripts/enigma/BRCAsplicing.py +++ src/hg/makeDb/scripts/enigma/BRCAsplicing.py @@ -1,164 +1,172 @@ import re import subprocess def bash(cmd): """Run the cmd in bash subprocess""" try: rawBashOutput = subprocess.run(cmd, check=True, shell=True,\ stdout=subprocess.PIPE, universal_newlines=True, stderr=subprocess.STDOUT) bashStdoutt = rawBashOutput.stdout except subprocess.CalledProcessError as e: raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output)) return(bashStdoutt) rawFilePath = "/hive/data/inside/enigmaTracksData/Anna_CSpec_BRCA12ACMG-Rules-Specifications_V1.1Table-4_2023-11-22.txt" def assignRGBcolor(lineToCheck): if "DEL" in lineToCheck[4]: itemRgb = '180,53,53' #Red elif "DUP" in lineToCheck[4]: itemRgb = '117,136,214' #Blue elif "RNA" in lineToCheck[5]: itemRgb = '146,64,190' #Black else: itemRgb = '0,0,0' return(itemRgb) +def checkExtraCarrotInPosition(position): + if position[len(position)-1] == ">": + correctPosition = position[0:len(position)-1] + else: + correctPosition = position + return(correctPosition) + rawFile = open(rawFilePath,'r') outputBedFile = open("/hive/data/inside/enigmaTracksData/outputBedFile.bed",'w', encoding='latin-1') for line in rawFile: line = line.rstrip("\n").split("\t") if line[0].startswith("Gene") or line[0]== "": continue elif line == "": continue else: # print(line) itemRgb = assignRGBcolor(line) NMacc = line[1] if line[0] == "BRCA1": strand = "-" chrom = "chr17" else: strand = "+" chrom = "chr13" if len(line[3].split("c")) > 2: firstPos=line[3].split(".")[1].split("-c")[0] secondPos=line[3].split("-c.")[1] queryPosition = bash("curl https://hgwdev.gi.ucsc.edu/cgi-bin/hgSearch?search="+NMacc+"%3Ac"+firstPos) for resultsLine in queryPosition.split("\n"): if resultsLine.startswith("<script"): if strand == "-": chromEnd = str(int(resultsLine.split("position=")[1].split("-")[0].split(":")[1])+5) elif strand == "+": chromStart = str(int(resultsLine.split("position=")[1].split("-")[0].split(":")[1])+4) queryPosition = bash("curl https://hgwdev.gi.ucsc.edu/cgi-bin/hgSearch?search="+NMacc+"%3Ac"+secondPos) for resultsLine in queryPosition.split("\n"): if resultsLine.startswith("<script"): if strand == "-": chromStart = str(int(resultsLine.split("position=")[1].split("-")[0].split(":")[1])+4) elif strand == "+": chromEnd = str(int(resultsLine.split("position=")[1].split("-")[0].split(":")[1])+5) else: p = re.compile('[0-9]') if "-" in line[3]: #c.-20-1G> if len(line[3].split("-")) > 2: pos = "-"+line[3].split("-")[1] adjustment = p.findall(line[3].split("-")[2]) adjustment = "-"+adjustment[0] elif line[3].startswith("c.-"): #c.-20+2T> pos = "-"+line[3].split("-")[1].split("+")[0] adjustment = p.findall(line[3].split("+")[1]) adjustment = adjustment[0] else: #c.20-2T> pos = line[3].split("c.")[1].split("-")[0] adjustment = p.findall(line[3].split("-")[1]) adjustment = "-"+adjustment[0] elif "+" in line[3]: #c.475+1G> pos = line[3].split("c.")[1].split("+")[0] adjustment = p.findall(line[3].split("+")[1])[0] else: #c.1 pos = line[3].split('c.')[1] adjustment = 0 queryPosition = bash("curl https://hgwdev.gi.ucsc.edu/cgi-bin/hgSearch?search="+NMacc+"%3Ac"+pos) for resultsLine in queryPosition.split("\n"): if resultsLine.startswith("<script"): position = int(resultsLine.split("position=")[1].split("-")[0].split(":")[1])+5 if strand == "-": position = int(position)-int(adjustment) elif strand == "+": position = int(position)+int(adjustment) chromStart = str(position-1) chromEnd = str(position) varType = line[4].replace('"','') ACMGcode = line[5].replace('"','') observation = line[6].replace("?","delta").replace('"','') + correctedPosition = checkExtraCarrotInPosition(line[3]) _mouseOver = "<b>Transcript:</b> "+line[1]+"<br><b>Exon:</b> "+line[2]+\ - "<br><b>Position:</b> "+line[3]+"<br><b>Var Type:</b> "+varType+\ + "<br><b>Position:</b> "+correctedPosition+"<br><b>Var Type:</b> "+varType+\ "<br><b>ACMG Code: </b>"+ACMGcode - outputBedFile.write(chrom+"\t"+chromStart+"\t"+chromEnd+"\t"+line[3]+"\t0\t"+\ + outputBedFile.write(chrom+"\t"+chromStart+"\t"+chromEnd+"\t"+correctedPosition+"\t0\t"+\ strand+"\t"+chromStart+"\t"+chromEnd+"\t"+itemRgb+"\t"+\ "\t".join(line[:4])+"\t"+varType+"\t"+\ ACMGcode+"\t"+observation+\ "\t"+line[7]+"\t"+_mouseOver+"\n") rawFile.close() outputBedFile.close() bash("bedSort /hive/data/inside/enigmaTracksData/outputBedFile.bed \ /hive/data/inside/enigmaTracksData/outputBedFile.bed") startOfAsFile="""table BRCAsplicing "BRCA1 and BRCA2 variant codes according to PVS1 decision trees (ENIGMA specifications version 1.1.0)" ( string chrom; "Reference sequence chromosome or scaffold" uint chromStart; "Start position in chromosome" uint chromEnd; "End position in chromosome" string name; "Var location" uint score; "Not used, all 0" char[1] strand; "- or +" uint thickStart; "Same as chromStart" uint thickEnd; "Same as chromEnd" uint reserved; "RGB value (use R,G,B string in input file)" """ line1 = bash("head -1 "+rawFilePath) line1 = line1.rstrip("\n").split("\t") name = [] for i in range(8): asFileAddition = " lstring extraField"+str(i)+';\t"'+line1[i]+'"\n' startOfAsFile = startOfAsFile+asFileAddition startOfAsFile = startOfAsFile+" string _mouseOver;"+'\t"'+'Field only used as mouseOver'+'"\n' asFileOutput = open("/hive/data/inside/enigmaTracksData/BRCAsplicing.as","w") for line in startOfAsFile.split("\n"): if "_mouseOver" in line: asFileOutput.write(line+"\n )") else: asFileOutput.write(line+"\n") asFileOutput.close() bash("bedToBigBed -as=/hive/data/inside/enigmaTracksData/BRCAsplicing.as -type=bed9+9 -tab \ /hive/data/inside/enigmaTracksData/outputBedFile.bed /cluster/data/hg38/chrom.sizes \ /hive/data/inside/enigmaTracksData/BRCAsplicingHg38.bb") bash("liftOver -bedPlus=9 -tab /hive/data/inside/enigmaTracksData/outputBedFile.bed \ /hive/data/genomes/hg38/bed/liftOver/hg38ToHg19.over.chain.gz \ /hive/data/inside/enigmaTracksData/outputBedFileHg19.bed /hive/data/inside/enigmaTracksData/unmapped.bed") bash("bedToBigBed -as=/hive/data/inside/enigmaTracksData/BRCAsplicing.as -type=bed9+9 -tab \ /hive/data/inside/enigmaTracksData/outputBedFileHg19.bed /cluster/data/hg19/chrom.sizes \ /hive/data/inside/enigmaTracksData/BRCAsplicingHg19.bb") bash("ln -sf /hive/data/inside/enigmaTracksData/BRCAsplicingHg38.bb /gbdb/hg38/bbi/enigma/BRCAsplicing.bb") bash("ln -sf /hive/data/inside/enigmaTracksData/BRCAsplicingHg19.bb /gbdb/hg19/bbi/enigma/BRCAsplicing.bb")