557415034ca7f233f8f4b5be7201261136ed80ae max Wed Sep 21 16:13:31 2016 -0700 small fix to two crispr-related utils diff --git src/utils/doLocusName src/utils/doLocusName index aef5fe9..8dee4d8 100755 --- src/utils/doLocusName +++ src/utils/doLocusName @@ -37,36 +37,43 @@ print "Could not run command %s" % cmd sys.exit(1) return ret def doLocusName(db, geneTableName): " create and fill the locusName table for db " tempFh = tempfile.NamedTemporaryFile() for row in runSql(db, "SELECT * from ensemblToGeneName"): tempFh.write("\t".join(row)+"\n") tempFh.flush() tempFname = tempFh.name chromSizesFname = "/cluster/data/%s/chrom.sizes" % db outTmp = "locusName.tmp.bed" - cmd = "hgsql %(db)s -NB -e 'SELECT * from %(geneTableName)s'| cut -f2- |" # get genePred - "genePredToBed stdin stdout | bedToExons stdin stdout | " # break into exons - "tabRepl %(tempFname)s 3 /dev/stdin | sort -u | " # replace transcript with symbol, skip dupes - "bedSort stdin stdout | bedOverlapMerge /dev/stdin /dev/stdout |" # merge adjacent exons - "bedBetween stdin /dev/stdout -a -s %(chromSizesFname)s |" # annotate regions - "bedSort stdin %(outTmp)s" % locals() - runCmd(cmd) + cmdParts = [] + # get genePred + cmdParts.append("hgsql %(db)s -NB -e 'SELECT * from %(geneTableName)s'| cut -f2- |") + # break genes into exons + cmdParts.append("genePredToBed stdin stdout | bedToExons stdin stdout | ") + # replace transcript with symbol, skip dupes + cmdParts.append("tabRepl %(tempFname)s 3 /dev/stdin | sort -u | ") + # merge adjacent exons + cmdParts.append("bedSort stdin stdout | bedOverlapMerge /dev/stdin /dev/stdout |") + # annotate regions + cmdParts.append("bedBetween stdin /dev/stdout -a -s %(chromSizesFname)s -m 100 |") + # and sort + cmdParts.append("bedSort stdin %(outTmp)s") + runCmd(("".join(cmdParts)) % locals()) cmd = "hgLoadBed %s locusName %s" % (db, outTmp) runCmd(cmd) os.remove(outTmp) # ----------- main -------------- def main(): args, options = parseArgs() db, geneTable = args doLocusName(db, geneTable) main()