56e310552abf565c1bce2c7a58705e951e3bbf8f max Tue Sep 11 06:43:48 2018 -0700 I don't know what happened here. I committed the completely wrong file, sorry. Reverting it now. Revert "updating phegenToBed, refs #21052" This reverts commit 6985ec2b43b98f50d26ff7b3f72cefce3f6d5eba. diff --git src/utils/phegenToBed src/utils/phegenToBed index 3e77416..df4ce95 100755 --- src/utils/phegenToBed +++ src/utils/phegenToBed @@ -25,81 +25,67 @@ logging.basicConfig(level=logging.INFO) return args, options # ----------- main -------------- def main(): args, options = parseArgs() filename = args[0] snpBed = args[1] # Trait SNP rs Context Gene Gene ID Gene 2 Gene ID 2 Chromosome Location P-Value Source PubMed Analysis ID Study ID Study Name bySnp = defaultdict(list) for line in open(filename): if line.startswith("#"): continue row = line.rstrip("\n").split("\t") - - if row[-2]!="": - row[-2] = "phs%06d" % int(row[-2]) - if row[2]!="": - row[2] = "rs"+row[2] - _, trait, snpId, context, gene, geneId, gene2, geneId2, chrom, start, pVal, source, pmid, analysisId, studyId, studyName = row #if source=="NHGRI": # remove GWAS catalog for now? #continue # need phsxxxxxx for linkout to NCBI + if row[-2]!="": + row[-2] = "phs%06d" % int(row[-2]) extraCount = len(row) bySnp[snpId].append(row) bySnp = dict(bySnp) for line in open(snpBed): line = line.rstrip("\n") snpId = line.rstrip("\n").split('\t')[3] chrom, start, end, name = line.split("\t") # has chrom, start, end, name bed = [chrom, start, end, name] - rows = bySnp.get(snpId, None) + rows = bySnp.get(snpId.replace("rs",""), None) if rows==None: continue bed.append(str(len(rows))) bed.append(".") bed.append(start) bed.append(end) bed.append("0") bedCount = len(bed) extras = [] for i in range(1, extraCount): extras.append( [] ) - hasGwasCatalog = False for row in rows: for i in range(1, len(row)): #print row, i, len(row) extras[i-1].append(row[i].replace(",", ";")) - _, trait, snpId, context, gene, geneId, gene2, geneId2, chrom, start, pVal, source, pmid, analysisId, studyId, studyName = row - - if source=="NHGRI": - hasGwasCatalog = True extras = [", ".join(x) for x in extras] bed.extend(extras) # fixup a few things bed[10] = "rs"+bed[10] # use trait as name, shorten traitShort = bed[9].split(",")[0].split(';')[0].replace(" Disorders", "").replace(" Tests", "") if len(traitShort) > 17: traitShort = traitShort[:16]+".." bed[3] = traitShort - if hasGwasCatalog: - bed.append(snpId) - else: - bed.append("") - print "\t".join(bed) main()