57105e3e6e8c590537fedcc559cb0fac2812d56d
lrnassar
  Thu Feb 22 14:33:24 2024 -0800
Staging the ENIGMA tracks, refs #32919

diff --git src/hg/makeDb/scripts/enigma/BRCAclinDomains.py src/hg/makeDb/scripts/enigma/BRCAclinDomains.py
new file mode 100644
index 0000000..dc222ab
--- /dev/null
+++ src/hg/makeDb/scripts/enigma/BRCAclinDomains.py
@@ -0,0 +1,66 @@
+import subprocess
+
+def bash(cmd):
+    """Run the cmd in bash subprocess"""
+    try:
+        rawBashOutput = subprocess.run(cmd, check=True, shell=True,\
+                                       stdout=subprocess.PIPE, universal_newlines=True, stderr=subprocess.STDOUT)
+        bashStdoutt = rawBashOutput.stdout
+    except subprocess.CalledProcessError as e:
+        raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
+    return(bashStdoutt)
+
+outputBedFile = open("/hive/data/inside/enigmaTracksData/outputBedFile.bed",'w')
+
+outputText="""chr17\t41256276\t41276109\tRING\t0\t.\t41256276\t41276109\t255,0,0\tBRCA1\tNM_007294.4\t2-101\t<b>Domain: </b>RING</br><b>Transcript: </b>NM_007294.4</br><b>Amino acid loc:</b> 2-101
+chr17\t41234506\t41242975\tCoiled-coil\t0\t.\t41234506\t41242975\t255,0,0\tBRCA1\tNM_007294.4\t1391-1424\t<b>Domain: </b>Coiled-coil</br><b>Transcript: </b>NM_007294.4</br><b>Amino acid loc:</b> 1391-1424
+chr17\t41197716\t41222983\tBRTC repeats\t0\t.\t41197716\t41222983\t255,0,0\tBRCA1\tNM_007294.4\t1650-1857\t<b>Domain: </b>BRTC repeats</br><b>Transcript: </b>NM_007294.4</br><b>Amino acid loc:</b> 1650-1857
+chr13\t32890627\t32893266\tPALB2 BD\t0\t.\t32890627\t32893266\t255,0,0\tBRCA2\tNM_000059.4\t10-40\t<b>Domain: </b>PALB2 BD</br><b>Transcript: </b>NM_000059.4</br><b>Amino acid loc:</b> 10-40
+chr13\t32930572\t32971091\tDNA BD\t0\t.\t32930572\t32971091\t255,0,0\tBRCA2\tNM_000059.4\t2481-3186\t<b>Domain: </b>DNA BD</br><b>Transcript: </b>NM_000059.4</br><b>Amino acid loc:</b> 2481-3186
+"""
+
+outputBedFile.write(outputText)
+
+            
+outputBedFile.close()
+rawFileNoHeader.close()
+
+bash("bedSort /hive/data/inside/enigmaTracksData/outputBedFile.bed \
+/hive/data/inside/enigmaTracksData/outputBedFile.bed")
+
+startOfAsFile="""table BRCAclinDomains
+"BRCA1 and BRCA2 ENIGMA clinically relevant protein domains (ENIGMA specifications version 1.1.0)"
+   (
+   string chrom;       "Reference sequence chromosome or scaffold"
+   uint   chromStart;  "Start position in chromosome"
+   uint   chromEnd;    "End position in chromosome"
+   string name;        "HGVS Nucleotide"
+   uint score;         "Not used, all 0"
+   char[1] strand;     "Not used, all ."
+   uint thickStart;    "Same as chromStart"
+   uint thickEnd;      "Same as chromEnd"
+   uint reserved;      "RGB value (use R,G,B string in input file)"
+   string geneSymbol;  "Gene symbol"
+   string NMaccession; "NCBI NM isoform accession"
+   string AAlocation;  "Amino acid location of domain"
+   string _mouseOver;  "Field only used as mouseOver"
+   )"""
+
+asFileOutput = open("/hive/data/inside/enigmaTracksData/BRCAclinDomains.as","w")
+asFileOutput.write(startOfAsFile)
+asFileOutput.close()
+
+bash("bedToBigBed -as=/hive/data/inside/enigmaTracksData/BRCAclinDomains.as -type=bed9+4 -tab \
+/hive/data/inside/enigmaTracksData/outputBedFile.bed /cluster/data/hg19/chrom.sizes \
+/hive/data/inside/enigmaTracksData/BRCAclinDomainsHg19.bb")
+
+bash("liftOver -bedPlus=9 -tab /hive/data/inside/enigmaTracksData/outputBedFile.bed \
+/hive/data/genomes/hg19/bed/liftOver/hg19ToHg38.over.chain.gz \
+/hive/data/inside/enigmaTracksData/outputBedFileHg38.bed /hive/data/inside/enigmaTracksData/unmapped.bed")
+
+bash("bedToBigBed -as=/hive/data/inside/enigmaTracksData/BRCAclinDomains.as -type=bed9+4 -tab \
+/hive/data/inside/enigmaTracksData/outputBedFileHg38.bed /cluster/data/hg38/chrom.sizes \
+/hive/data/inside/enigmaTracksData/BRCAclinDomainsHg38.bb")
+
+bash("ln -sf /hive/data/inside/enigmaTracksData/BRCAclinDomainsHg38.bb /gbdb/hg38/bbi/enigma/BRCAclinDomains.bb")
+bash("ln -sf /hive/data/inside/enigmaTracksData/BRCAclinDomainsHg19.bb /gbdb/hg19/bbi/enigma/BRCAclinDomains.bb")