da377fdc8a33c352274795c5817d8d117e03ae10 max Tue Oct 4 03:30:08 2022 -0700 more panelApp fixes, partially found by QA, refs #25568 diff --git src/hg/utils/otto/panelApp/updatePanelApp src/hg/utils/otto/panelApp/updatePanelApp index 3e12fcb..cf71149 100755 --- src/hg/utils/otto/panelApp/updatePanelApp +++ src/hg/utils/otto/panelApp/updatePanelApp @@ -1,91 +1,93 @@ #!/usr/bin/env python3 from datetime import date import os import shutil # the script uses relative pathnames, so make sure we're always in the right directory os.chdir("/hive/data/outside/otto/panelApp") # The code came in three files from Beagan with a lot of code duplication # One per subtrack # This is why it's still split over three separate Python files import genes import tandRep import cnv def getArchDir(db): " return hgwdev archive directory given db " dateStr = date.today().strftime("%Y-%m-%d") archDir = "/usr/local/apache/htdocs-hgdownload/goldenPath/archive/%s/panelApp/%s" % (db, dateStr) if not os.path.isdir(archDir): os.makedirs(archDir) return archDir def writeBb(hg19Table, hg38Table, subTrack): " sort the pandas tables, write to BED and convert " for db in ["hg19", "hg38"]: archDir = getArchDir(db) bedFname = "current/%s/%s.bed.tmp" % (db, subTrack) bbFname = "current/%s/%s.bb.tmp" % (db, subTrack) if db=="hg19": pdTable = hg19Table else: pdTable = hg38Table # for cnvs, one of the arguments can be None if pdTable is None: continue pdTable.sort_values(by=['chrom','chromStart'], ascending = (True, True), inplace=True) pdTable.to_csv(bedFname, sep='\t', index=False, header=None) asFname = subTrack+".as" # -extraIndex=geneName cmd = "bedToBigBed -tab -as=%s -type=bed9+26 %s /hive/data/genomes/%s/chrom.sizes %s" % (asFname, bedFname, db, bbFname) assert(os.system(cmd)==0) # put a copy into the archive archBbFname = archDir+"/%s.bb" % subTrack shutil.copyfile(bbFname, archBbFname) def updateGbdbSymlinks(): " update the symlinks in /gbdb. Not really necessary but kept this code just in case. " for db in ["hg19", "hg38"]: archDir = getArchDir(db) for subTrack in ["genes", "tandRep", "cnv"]: + if subTrack=="cnv" and db=="hg19": + continue # no cnv on hg19 cmd = "ln -sf `pwd`/current/%s/%s.bb /gbdb/%s/panelApp/%s.bb" % (db, subTrack, db, subTrack) assert(os.system(cmd)==0) def flipFiles(): " rename the .tmp files to the final filenames " for db in ["hg19", "hg38"]: archDir = getArchDir(db) for subTrack in ["genes", "tandRep", "cnv"]: if db=="hg19" and subTrack=="cnv": # no cnvs for hg19 yet continue oldFname = "current/%s/%s.bb.tmp" % (db, subTrack) newFname = "current/%s/%s.bb" % (db, subTrack) os.replace(oldFname, newFname) def main(): " create the 2 x three BED files and convert each to bigBed and update the archive " hg19Bed, hg38Bed = genes.downloadGenes() writeBb(hg19Bed, hg38Bed, "genes") hg19Bed, hg38Bed = tandRep.downloadTandReps() writeBb(hg19Bed, hg38Bed, "tandRep") hg38Bed = cnv.downloadCnvs() # no hg19 CNV data yet from PanelApp writeBb(None, hg38Bed, "cnv") flipFiles() updateGbdbSymlinks() print("PanelApp otto update: OK") main()