b37c3e0908d07a7fe62e8925f1ace13a5ccf91f1 max Fri May 16 02:17:49 2025 -0700 hubtools fix to avoid uploading a file that has already been uploaded diff --git src/utils/hubtools/hubtools src/utils/hubtools/hubtools index 437751cd712..4fd573ffdf6 100755 --- src/utils/hubtools/hubtools +++ src/utils/hubtools/hubtools @@ -741,55 +741,55 @@ cacheFname = join(tdbDir, ".hubtools.files.json") uploadCache = cacheLoad(cacheFname) logging.debug("trackDb directory is %s" % tdbDir) for rootDir, _, files in os.walk(tdbDir): for fbase in files: if fbase.startswith("."): continue localPath = normpath(join(rootDir, fbase)) logging.debug("localPath: %s" % localPath) localMtime = os.stat(localPath).st_mtime # skip files that have not changed their mtime since last upload if localPath in uploadCache: - cacheMtime = uploadCache[fpath]["mtime"] + cacheMtime = uploadCache[localPath]["mtime"] if localMtime == cacheMtime: logging.info("%s: file mtime unchanged, not uploading again" % localPath) continue else: logging.debug("file %s: mtime is %f, cache mtime is %f, need to re-upload" % (localPath, localMtime, cacheMtime)) else: logging.debug("file %s not in upload cache" % localPath) fileType = getFileType(fbase) fileAbsPath = abspath(localPath) remoteRelPath = relpath(fileAbsPath, tdbDir) remoteDir = dirname(remoteRelPath) meta = { "apiKey" : apiKey, "parentDir" : remoteDir, "genome":"NA", "fileName" : fbase, "hubName" : hubName, "hubtools" : "true", "fileType": fileType, - "lastModified" : str(int(localMtime)), + "lastModified" : str(int(localMtime)*1000), } logging.info(f"Uploading {localPath}, meta {meta}") uploader = my_client.uploader(localPath, metadata=meta) uploader.upload() # note that this file was uploaded cache = {} mtime = os.stat(localPath).st_mtime cache["mtime"] = mtime cache["size"] = os.stat(localPath).st_size uploadCache[localPath] = cache cacheWrite(uploadCache, cacheFname)