5afa332cc12c75f441ffaf7fa2d9a219ec1f219b chmalee Fri Nov 13 10:22:20 2020 -0800 Remove the superfluous fields 'Reference Seq' and 'Variant Seq' from the genome in a bottle sv track, refs #24349 diff --git src/hg/utils/otto/dbVar/processNstd175.py src/hg/utils/otto/dbVar/processNstd175.py index ba46e3e..f7bca9c 100755 --- src/hg/utils/otto/dbVar/processNstd175.py +++ src/hg/utils/otto/dbVar/processNstd175.py @@ -1,29 +1,29 @@ #!/usr/bin/env python3 """ Read GVF lines from stdin and write bed9+ lines to stdout """ import os,sys,re import argparse bed9Fields = ["chrom", "chromStart", "chromEnd", "name", "score", "strand", "thickStart", "thickEnd", "itemRgb"] # the following list of fields may not exist for every record in the input file -extraFieldList = ["Size", "Variant Type", "Variant Region", "Link to dbVar", "Sample Name", "Sampleset Name", "Phenotype", "Variant Seq", "Reference Seq"] +extraFieldList = ["Size", "Variant Type", "Variant Region", "Link to dbVar", "Sample Name", "Sampleset Name", "Phenotype"] bedLines = {} chromLift = {} def setupCommandLine(): parser = argparse.ArgumentParser(description="Read GVF lines from infile and transform to bed9+", add_help=True, usage = "%(prog)s [options]") parser.add_argument("infile", action="store", default=None, help="Input GVF file from which to read input, use 'stdin' to read from default stdin") parser.add_argument("liftFile", action="store", default=None, help="liftUp file for converting chrom names to UCSC style names") args = parser.parse_args() return args def parseLiftFile(fname): global chromLift with open(fname) as f: @@ -65,31 +65,31 @@ for field in fields: try: if type(bed[field]) is list: finalBed.append(", ".join(bed[field])) else: finalBed.append(str(bed[field])) except KeyError: # some of the extra fields won't exist for every record finalBed.append("") print("\t".join(finalBed)) def processExtraFields(extraFields): """Special processing of the GVF extra fields""" ret = {} for key in extraFields: val = extraFields[key] - if key == "ID" or key == "Name": + if key == "ID" or key == "Name" or key == "Variant Seq" or key == "Reference Seq": continue elif key == "Dbxref": splitxrefs = val.split(',') if len(splitxrefs) > 1: sys.stderr.write("Error: more dbXref fields this release:\n") sys.stderr.write("%s\n" % (extraFields)) sys.stderr.write("stopping\n") sys.exit(1) else: val = "https://" + val[4:] ret["Link to dbVar"] = val elif key == "Parent": ret["Variant Region"] = val else: ret[key] = val