0f3ca3eaf5792df01b7c600a5428d2d0b2809fcd max Fri Sep 20 13:18:01 2024 -0700 Revert "more features to hubtools: search in both parent and subdirs, better docs" This reverts commit 05e67c59a20a5d00b810a981aef3b00c5bef82e1. diff --git src/hg/makeDb/cadd/caddToWig.py src/hg/makeDb/cadd/caddToWig.py index 99bf4a1..0eb21ab 100644 --- src/hg/makeDb/cadd/caddToWig.py +++ src/hg/makeDb/cadd/caddToWig.py @@ -1,64 +1,63 @@ -import sys import subprocess def inputLineChunk(): " yield all values at consecutive positions as a tuple (chrom, pos, list of (nucl, phred value)) " ## CADD GRCh37-v1.6 (c) University of Washington, Hudson-Alpha Institute for Biotechnology and Berlin Institute of Health 2013-2019. All rights reserved. #Chrom Pos Ref Alt RawScore PHRED #1 10001 T A 0.088260 4.066 values = [] firstChrom = None firstPos = None lastChrom = None lastPos = None #for line in gzip.open("whole_genome_SNVs.tsv.gz"): - for line in subprocess.Popen(['zcat', sys.argv[1]], stdout=subprocess.PIPE, encoding="ascii").stdout: + for line in subprocess.Popen(['zcat', "whole_genome_SNVs.tsv.gz"], stdout=subprocess.PIPE, encoding="ascii").stdout: if line.startswith("#"): continue row = line.rstrip("\n").split("\t") chrom, pos, ref, alt, raw, phred = row chrom = "chr"+chrom pos = int(row[1]) phred = float(phred) if firstPos is None: firstChrom = chrom firstPos = pos if chrom!=lastChrom or pos-lastPos > 1 and firstChrom is not None: yield firstChrom, firstPos, values firstPos, firstChrom = None, None values = [] if pos != lastPos: values.append([0.0,0.0,0.0,0.0]) nuclIdx = "ACGT".find(alt) values[-1][nuclIdx] = phred lastPos = pos lastChrom = chrom # last line of file yield firstChrom, firstPos, values outFhs = { "A" : open("a.wig", "w"), "C" : open("c.wig", "w"), "T" : open("t.wig", "w"), "G" : open("g.wig", "w") } for chrom, pos, nuclValues in inputLineChunk(): if len(nuclValues)==0: continue for ofh in outFhs.values(): ofh.write("fixedStep chrom=%s span=1 step=1 start=%d\n" % (chrom, pos)) for nuclVals in nuclValues: for nucIdx in (0,1,2,3): alt = "ACGT"[nucIdx] ofh = outFhs[alt] phred = nuclVals[nucIdx] ofh.write(str(phred)) ofh.write("\n")