0122974795a3f2b9261ad4cccda2b0245c5a7731
max
  Mon May 23 03:48:36 2022 -0700
updating natural selection track on wuhCor1, refs #29478

diff --git src/hg/makeDb/scripts/pondToBed src/hg/makeDb/scripts/pondToBed
new file mode 100755
index 0000000..0a00a7f
--- /dev/null
+++ src/hg/makeDb/scripts/pondToBed
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+
+import logging, sys, optparse
+from collections import defaultdict
+from os.path import join, basename, dirname, isfile
+import datetime
+from datetime import date
+
+# ==== functions =====
+    
+def parseArgs():
+    " setup logging, parse command line arguments and options. -h shows auto-generated help page "
+    parser = optparse.OptionParser("usage: %prog [options] filename")
+
+    parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages")
+    #parser.add_option("-f", "--file", dest="file", action="store", help="run on file") 
+    #parser.add_option("", "--test", dest="test", action="store_true", help="do something") 
+    (options, args) = parser.parse_args()
+
+    if args==[]:
+        parser.print_help()
+        exit(1)
+
+    if options.debug:
+        logging.basicConfig(level=logging.DEBUG)
+        logging.getLogger().setLevel(logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
+        logging.getLogger().setLevel(logging.INFO)
+
+    return args, options
+# ----------- main --------------
+def main():
+    args, options = parseArgs()
+
+    filename = args[0]
+    maxDays = int(args[1])
+    #if options.test:
+        #logging.debug("test is set")
+        #f = open(options.file, "r")
+    #gene	site	seqs	from	to	p	alpha	beta	T_int	T_total
+    #ORF3a	43	351	2019-12-01	2020-02-29	0.0263366	12.2782	0	0.121592	0.314228
+    genes = [
+     ["leader",265],
+     ["nsp2",805],
+     ["nsp3",2719],
+     ["nsp4",8554],
+     ["3C",10054],
+     ["nsp6",10972],
+     ["nsp7",11842],
+     ["nsp8",12091],
+     ["nsp9",12685],
+     ["nsp10",13024],
+     ["RdRp",13441],
+     ["helicase",16236],
+     ["exonuclease",18039],
+     ["endornase",19620],
+     ["methyltransferase",20658],
+     ["S",21562],
+     ["ORF3a",25392],
+     ["E",26244],
+     ["M",26523],
+     ["ORF6",27201],
+     ["ORF7a",27393],
+     ["ORF8",27893],
+     ["N",28273],
+    ]
+    geneToPos = dict(genes)
+
+    posSelFh = open("pos.bed", "w")
+    negSelFh = open("neg.bed", "w")
+
+    for line in open(filename):
+        if line.startswith("gene"):
+            continue
+        row = line.rstrip("\n").split("\t")
+        gene, site, seqs, fromDateStr, toDateStr, p, alpha, beta, tint, ttotal = row
+        nucPos = geneToPos[gene]
+        site = int(site)
+        start = nucPos+(site*3)
+        chrom = "NC_045512v2"
+        alpha = float(alpha)
+        beta = float(beta)
+        p = float(p)
+        fromDate = date.fromisoformat(fromDateStr)
+        toDate = date.fromisoformat(toDateStr)
+
+        delta = datetime.date.today() - fromDate
+        if delta.days > maxDays:
+            #print("too long ago: %s" % fromDate)
+            continue
+        if p > 0.01:
+            continue
+        # dn=alpha, ds=beta
+        # dn < ds = pos
+        # ds < dn = neg
+        if alpha < beta:
+            ofh = posSelFh
+        if beta < alpha:
+            ofh = negSelFh
+
+        bed = (chrom, str(start), str(start+3), "", "0", ".", str(start), str(start+3), str(seqs), str(alpha), str(beta), str(p))
+        ofh.write("\t".join(bed))
+        ofh.write("\n")
+
+    print("wrote pos.bed and neg.bed")
+
+main()
+