e9cd3eef710ba3141d58ef119006b7d2327e5b7f markd Wed Dec 9 13:52:01 2020 -0800 fixed accidently backout of master changes in the last merge diff --git src/hg/makeDb/gnomad/gnomadVcfBedToBigBed src/hg/makeDb/gnomad/gnomadVcfBedToBigBed new file mode 100755 index 0000000..39e34b7 --- /dev/null +++ src/hg/makeDb/gnomad/gnomadVcfBedToBigBed @@ -0,0 +1,329 @@ +#!/cluster/software/bin/python3 + +""" +Helper script to do some gnomAD specific stuff to the vcfToBed output + +NOTE: This script is dependent on the format of the VEP INFO field +in a particular VCF file. Use the -v option to pass the right version +options to this script, and if necessary, add a new one. + +Example format for the v2.1.1 version of gnomAD: +##INFO= +""" + +import sys, argparse +from collections import defaultdict,namedtuple + +# which version of gnomAD for parsing VEP string +versions = ["v2.1.1", "v3.1"] + +# the number of fields in the VEP string (depends on version): +# how to count: +# bcftools view -h in.vcf.gz | grep "^##INFO= 13 else ref + name += "/" + name += alt[:10]+"..." if len(alt) > 13 else alt + name += ")" + unshiftedStart = unshiftLeftPad(int(bed8Fields[1]), ref, alt) + pLoFCuration = getLofCuration(lofDict, version, bed8Fields[0], str(unshiftedStart), ref, alt) + savedPreFields = bed8Fields[:3] + [name] + bed8Fields[4:] + [color] + firstExtra + outfh.write("\t".join(savedPreFields + [rsId] + [gene] + [annot] + \ + consList + hgvscList + hgvspList + pLoFList + pLoFFlags + pLoFCuration + savedPostFields + [str(unshiftedStart)]) + "\n") + +def parseLofFile(fpath): + """Make a struct of the different loss of function flags for a curated variant.""" + gotHeader = False + lofHeader = [] + ret = {} + with open(fpath) as fh: + for line in fh: + if not gotHeader: + lofHeader = line.strip().split("\t") + gotHeader = True + else: + lofDetails = line.strip().split("\t") + ret[lofDetails[0]] = {lofHeader[x]: lofDetails[x] for x in range(len(lofHeader))} + return ret + +def main(): + args = parseCommandLine() + lofDict = {} + lofFile = args.lofFilePath + if lofFile: + lofDict = parseLofFile(lofFile) + if args.infile == "stdin": + infh = sys.stdin + else: + infh = open(args.infile) + if args.outfile == "stdout": + outfh = sys.stdout + else: + outfh = open(args.outfile, "w") + gnomadVcfBedToBigBed(infh, outfh, args.version, lofDict) + infh.close() + outfh.close() + +if __name__ == "__main__": + main()