bb3efd1b30b2fe47ce45c3c336f8982b5c7a7463 max Thu Feb 18 06:32:41 2021 -0800 tiny change to uniprot pipeline, no redmine diff --git src/hg/utils/otto/uniprot/doUniprot src/hg/utils/otto/uniprot/doUniprot index 1c59145..ebdadc6 100755 --- src/hg/utils/otto/uniprot/doUniprot +++ src/hg/utils/otto/uniprot/doUniprot @@ -148,30 +148,31 @@ taxIdDbs = getTaxIdDbs() print(("Current TaxId<->database assignment: %s" % str(taxIdDbs))) allDbs = [] for taxId, dbs in taxIdDbs: allDbs.extend(dbs) print(("To make the uniProt trackDbs for all DBs, run this in kent/src/hg/makeDb/trackDb: make alpha DBS='%s'" % " ".join(allDbs))) taxIdStr = ",".join([str(x) for (x,y) in taxIdDbs]) # just the taxIds themselves print(("to just convert the XML files (debugging?), run this: uniprotToTab %s %s %s" % (options.uniprotDir, taxIdStr, options.tabDir))) sys.exit(1) if len(args)==0 and not options.mapQa: print("To actually run the pipeline, you need to specify the argument 'run'.") parser.print_help() print("To actually run the pipeline, you need to specify the argument 'run'.") + sys.exit(0) if options.debug: consLevel = logging.DEBUG else: consLevel = logging.INFO # '' is the root logger logger = logging.getLogger('') logger.setLevel(logging.DEBUG) #By default, logs all messages # log to console ch = logging.StreamHandler() #StreamHandler logs to console ch.setLevel(consLevel) ch_format = logging.Formatter('%(asctime)s - %(message)s') ch.setFormatter(ch_format) @@ -1113,30 +1114,33 @@ "-minAli=0.99 -nohead | pslProtCnv > {out} ".format(db=db, inf=fullFaFname, out=mapFname) run(cmd) def updateUniprot(args, onlyDbs, taxIdDbs, options): uprotDir = options.uniprotDir tmpDir = join(uprotDir, "download.tmp") tabDir = options.tabDir mapDir = options.mapDir bigBedDir = options.bigBedDir faDir = options.faDir doTrembl = not options.skipTrembl relFname = join(tabDir, "version.txt") if not options.skipDownload and not options.skipParse: + if not isdir(tmpDir): + os.makedirs(tmpDir) + localFname = join(uprotDir, "uniprot_sprot.xml.gz") if not isNewer(upUrl, localFname): logging.info("files at %s are not newer than file in %s, nothing to do. Specify -l to skip this check." % (upUrl, localFname)) sys.exit(0) # use lftp to update uniprot and download only changed files cmd = 'lftp ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/ -e "lcd %s && mirror . -P 3 --use-pget-n=4 --exclude-glob *.dat.gz && exit"' % tmpDir run(cmd) # make sure that download interruptions don't mess up the files in the target dir logging.info("Moving files from %s to %s" % (tmpDir, uprotDir)) file_names = os.listdir(tmpDir) for file_name in file_names: shutil.move(os.path.join(tmpDir, file_name), uprotDir)