f94190412d16ea558ace1ec9ea175db39aaa104b max Fri Sep 30 08:40:43 2022 -0700 panelApp otto job, refs #25568 diff --git src/hg/utils/otto/panelApp/updatePanelApp src/hg/utils/otto/panelApp/updatePanelApp new file mode 100755 index 0000000..3e12fcb --- /dev/null +++ src/hg/utils/otto/panelApp/updatePanelApp @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +from datetime import date +import os +import shutil + +# the script uses relative pathnames, so make sure we're always in the right directory +os.chdir("/hive/data/outside/otto/panelApp") + +# The code came in three files from Beagan with a lot of code duplication +# One per subtrack +# This is why it's still split over three separate Python files +import genes +import tandRep +import cnv + +def getArchDir(db): + " return hgwdev archive directory given db " + dateStr = date.today().strftime("%Y-%m-%d") + archDir = "/usr/local/apache/htdocs-hgdownload/goldenPath/archive/%s/panelApp/%s" % (db, dateStr) + if not os.path.isdir(archDir): + os.makedirs(archDir) + return archDir + +def writeBb(hg19Table, hg38Table, subTrack): + " sort the pandas tables, write to BED and convert " + for db in ["hg19", "hg38"]: + archDir = getArchDir(db) + + bedFname = "current/%s/%s.bed.tmp" % (db, subTrack) + bbFname = "current/%s/%s.bb.tmp" % (db, subTrack) + + if db=="hg19": + pdTable = hg19Table + else: + pdTable = hg38Table + + # for cnvs, one of the arguments can be None + if pdTable is None: + continue + + pdTable.sort_values(by=['chrom','chromStart'], ascending = (True, True), inplace=True) + pdTable.to_csv(bedFname, sep='\t', index=False, header=None) + + asFname = subTrack+".as" + + # -extraIndex=geneName + cmd = "bedToBigBed -tab -as=%s -type=bed9+26 %s /hive/data/genomes/%s/chrom.sizes %s" % (asFname, bedFname, db, bbFname) + assert(os.system(cmd)==0) + + # put a copy into the archive + archBbFname = archDir+"/%s.bb" % subTrack + shutil.copyfile(bbFname, archBbFname) + +def updateGbdbSymlinks(): + " update the symlinks in /gbdb. Not really necessary but kept this code just in case. " + for db in ["hg19", "hg38"]: + archDir = getArchDir(db) + for subTrack in ["genes", "tandRep", "cnv"]: + cmd = "ln -sf `pwd`/current/%s/%s.bb /gbdb/%s/panelApp/%s.bb" % (db, subTrack, db, subTrack) + assert(os.system(cmd)==0) + +def flipFiles(): + " rename the .tmp files to the final filenames " + for db in ["hg19", "hg38"]: + archDir = getArchDir(db) + for subTrack in ["genes", "tandRep", "cnv"]: + if db=="hg19" and subTrack=="cnv": + # no cnvs for hg19 yet + continue + oldFname = "current/%s/%s.bb.tmp" % (db, subTrack) + newFname = "current/%s/%s.bb" % (db, subTrack) + os.replace(oldFname, newFname) + +def main(): + " create the 2 x three BED files and convert each to bigBed and update the archive " + hg19Bed, hg38Bed = genes.downloadGenes() + writeBb(hg19Bed, hg38Bed, "genes") + + hg19Bed, hg38Bed = tandRep.downloadTandReps() + writeBb(hg19Bed, hg38Bed, "tandRep") + + hg38Bed = cnv.downloadCnvs() + # no hg19 CNV data yet from PanelApp + writeBb(None, hg38Bed, "cnv") + + flipFiles() + updateGbdbSymlinks() + + print("PanelApp otto update: OK") + +main()