d1e2aafa5f6b2e2acb4069f4ed51e9db37934ffe max Wed Feb 26 06:59:11 2025 -0800 trying to fix hubtools behavior when specifying -i, email from Chris, no redmine diff --git src/utils/hubtools/hubtools src/utils/hubtools/hubtools index de74fcdd06c..7ace50d5b2e 100755 --- src/utils/hubtools/hubtools +++ src/utils/hubtools/hubtools @@ -1,22 +1,22 @@ #!/usr/bin/env python3 import logging, sys, optparse, os, json, subprocess, shutil, string, glob, tempfile, re import shlex from pathlib import Path from collections import defaultdict, OrderedDict -from os.path import join, basename, dirname, isfile, relpath, abspath, splitext, isdir +from os.path import join, basename, dirname, isfile, relpath, abspath, splitext, isdir, normpath #import pyyaml # not loaded here, so it's not a hard requirement, is lazy loaded in parseMetaYaml() # ==== functions ===== # allowed file types by hubtools up # copied from the javascript file hgHubConnect.js fileTypeExtensions = { "bigBed": [ ".bb", ".bigbed" ], "bam": [ ".bam" ], "vcf": [ ".vcf" ], "vcfTabix": [ ".vcf.gz", "vcf.bgz" ], "bigWig": [ ".bw", ".bigwig" ], "hic": [ ".hic" ], "cram": [ ".cram" ], "bigBarChart": [ ".bigbarchart" ], "bigGenePred": [ ".bgp", ".biggenepred" ], @@ -700,95 +700,99 @@ def cacheWrite(uploadCache, fname): logging.debug("Writing "+fname) json.dump(uploadCache, open(fname, "w"), indent=4) def getFileType(fbase): " return the file type defined in the hubspace system, given a base file name " ret = "NA" for fileType, fileExts in fileTypeExtensions.items(): for fileExt in fileExts: if fbase.endswith(fileExt): ret = fileType break if ret!="NA": break - logging.debug("file type for %s is %s" % (fbase, fileType)) + logging.debug("file type for %s is %s" % (fbase, ret)) return ret def uploadFiles(tdbDir, hubName): "upload files to hubspace. Server name and token can come from ~/.hubtools.conf " try: from tusclient import client except ModuleNotFoundError: installModule("tuspy") from tusclient import client serverUrl = cfgOption("tusUrl", "https://hubspace.gi.ucsc.edu/files") cookies = {} cookieNameUser = cfgOption("wiki.userNameCookie", "wikidb_mw1_UserName") cookieNameId = cfgOption("wiki.loggedInCookie", "wikidb_mw1_UserID") apiKey = cfgOption("apiKey") if apiKey is None: errAbort("To upload files, the file ~/.hubtools.conf must contain a line like apiKey='xxx').\n" "Go to https://genome.ucsc.edu/cgi-bin/hgHubConnect#dev to create a new apiKey. Then run \n" " echo 'apiKey=\"xxxx\"' >> ~/.hubtools.conf \n" "and run the 'hubtools up' command again.") logging.info(f"TUS server URL: {serverUrl}") my_client = client.TusClient(serverUrl) cacheFname = join(tdbDir, ".hubtools.files.json") uploadCache = cacheLoad(cacheFname) - for rootDir, dirs, files in os.walk(tdbDir): + logging.debug("trackDb directory is %s" % tdbDir) + for rootDir, _, files in os.walk(tdbDir): for fbase in files: if fbase.startswith("."): continue - fpath = join(rootDir, fbase) - localPath = join(tdbDir, fpath) + localPath = normpath(join(rootDir, fbase)) + logging.debug("localPath: %s" % localPath) localMtime = os.stat(localPath).st_mtime # skip files that have not changed their mtime since last upload - if fpath in uploadCache: + if localPath in uploadCache: cacheMtime = uploadCache[fpath]["mtime"] if localMtime == cacheMtime: logging.info("%s: file mtime unchanged, not uploading again" % localPath) continue else: logging.debug("file %s: mtime is %f, cache mtime is %f, need to re-upload" % - (fpath, localMtime, cacheMtime)) + (localPath, localMtime, cacheMtime)) else: - logging.debug("file %s not in upload cache" % fpath) + logging.debug("file %s not in upload cache" % localPath) fileType = getFileType(fbase) - parentDir = join(hubName, rootDir) + fileAbsPath = abspath(localPath) + remoteRelPath = relpath(fileAbsPath, tdbDir) + remoteDir = dirname(remoteRelPath) + meta = { "apiKey" : apiKey, - "parentDir" : parentDir, + "parentDir" : remoteDir, "genome":"NA", "fileName" : fbase, "hubName" : hubName, "hubtools" : "true", "fileType": fileType, "lastModified" : str(int(localMtime)), } - logging.info(f"Uploading {fpath}, meta {meta}") + logging.info(f"Uploading {localPath}, meta {meta}") uploader = my_client.uploader(localPath, metadata=meta) uploader.upload() # note that this file was uploaded cache = {} mtime = os.stat(localPath).st_mtime cache["mtime"] = mtime cache["size"] = os.stat(localPath).st_size uploadCache[fpath] = cache cacheWrite(uploadCache, cacheFname) def iterRaStanzas(fname): " parse an ra-style (trackDb) file and yield dictionaries " data = dict() @@ -1203,31 +1207,31 @@ """ find files under dirName and create a trackDb.txt for them""" cmd = args[0] inDir = "." if options.inDir: inDir = options.inDir outDir = "." if options.outDir: outDir = options.outDir if cmd=="up": if len(args)<2: errAbort("The 'up' command requires one argument, the name of the hub. You can specify any name, " - "ideally a short, meaningful string, e.g. atacseq, muscle-rna or yamamoto2022.") + "ideally a short, meaningful string, e.g. atacseq, muscle-rna or yamamoto2022. Avoid special characters.") hubName = args[1] uploadFiles(inDir, hubName) return tdbDir = inDir if options.outDir: tdbDir = options.outDir if cmd=="jbrowse": importJbrowse(args[1], args[2], tdbDir) elif cmd == "tab": raToTab(args[1]) elif cmd == "make": db = args[1]