dd626c7ec6ba839b0f51d3ef0f30f91bc5f23d80 bwick Wed Oct 29 15:10:48 2025 -0700 Adding in new code to also copy config files from cb-submit directory diff --git ucsc/cbMove ucsc/cbMove index 898f6af..453ef23 100755 --- ucsc/cbMove +++ ucsc/cbMove @@ -21,74 +21,139 @@ with open(file_path, newline='') as f: for line in f: stripped = line.strip() if stripped: file_list.append(stripped) return file_list def is_file_in_time_range(path, start_time=None, end_time=None): mtime = datetime.fromtimestamp(os.path.getmtime(path)) if start_time and mtime < start_time: return False if end_time and mtime > end_time: return False return True -def cb_push_to_orig(file, cb_dir, subdir="orig"): +def cb_push_to_orig(file, cb_dir, subdir=None): + + if subdir: + output_dir = os.path.join("/hive/data/inside/cells/datasets", cb_dir, "orig", subdir) + display_path = f"{cb_dir}/orig/{subdir}" + else: + output_dir = os.path.join("/hive/data/inside/cells/datasets", cb_dir, "orig") + display_path = f"{cb_dir}/orig" + file_ext = os.path.splitext(file)[1].lstrip(".") - target_dir = os.path.join("/hive/data/inside/cells/datasets", cb_dir, subdir) - actual_file = None + fname= None file_path = None if file_ext == "info": try: with open(file) as f: metadata = json.load(f) - actual_file = metadata["MetaData"]["filename"] + fname = metadata["MetaData"]["filename"] except Exception as e: print(f"[ERROR] Failed to read or parse {file}: {e}", file=sys.stderr) return 1 file_base = os.path.splitext(os.path.basename(file))[0] file_path = os.path.join(os.path.dirname(file), file_base) else: - actual_file = os.path.basename(file) + fname = os.path.basename(file) file_path = os.path.abspath(file) - dest_path = os.path.join(target_dir, actual_file) + dest_path = os.path.join(output_dir, fname) - os.makedirs(target_dir, exist_ok=True) + os.makedirs(output_dir, exist_ok=True) try: - print(f"Copying {actual_file} to {cb_dir}/{subdir} directory...") + print(f"Copying {fname} to {display_path} directory...") shutil.copy(file_path, dest_path) print("Successfully copied!") return 0 except Exception as e: print(f"[ERROR] Copy failed: {e}", file=sys.stderr) return 1 +def submit_need_to_copy(submit_dir, output_dir): + """ + Return True if any file/dir under submit_dir is missing in output_dir. + No hashes, just existence. + """ + for root, dirs, files in os.walk(submit_dir): + rel = os.path.relpath(root, submit_dir) + dst_root = os.path.join(output_dir, rel) if rel != "." else output_dir + # any directory missing? + for d in dirs: + if not os.path.exists(os.path.join(dst_root, d)): + return True + # any file missing? + for f in files: + if not os.path.exists(os.path.join(dst_root, f)): + return True + return False + +def copy_submit_dir(cb_dir, subdir=None, force=False): + """ + Recursively copy all files and directories from: + /hive/data/inside/cells/submit/<cb_dir>/ + into: + /hive/data/inside/cells/datasets/<cb_dir>/<subdir>/ + + Default: copy once (skip if files already copied over). + force=True (use with --submit): re-copy by merging/overwriting. + """ + submit_dir = os.path.join("/hive/data/inside/cells/submit", cb_dir) + + if subdir: + output_dir = os.path.join("/hive/data/inside/cells/datasets", cb_dir, "orig", subdir) + display_path = f"{cb_dir}/orig/{subdir}" + else: + output_dir = os.path.join("/hive/data/inside/cells/datasets", cb_dir, "orig") + display_path = f"{cb_dir}/orig" + + if not os.path.isdir(submit_dir): + print(f"[INFO] No directory found for {cb_dir} in submit directory, skipping.") + return 0 + + os.makedirs(output_dir, exist_ok=True) + + # Skip if destination already contains files and not forcing + if not force and not submit_need_to_copy(submit_dir, output_dir): + print(f"[SKIP] Project information files already copied over from submit directory in {output_dir}. Use --submit to refresh.") + return 0 + + try: + print(f"Copying submit directory contents:\n {submit_dir}\nā {display_path} ...") + shutil.copytree(submit_dir, output_dir, dirs_exist_ok=True) + print(f"[OK] Files for {cb_dir} successfully copied from submit directory.") + return 0 + except Exception as e: + print(f"[ERROR] Failed to copy files from submit directory: {e}", file=sys.stderr) + return 1 + def main(): parser = argparse.ArgumentParser( description="Move uploaded files to Cell Browser dataset 'orig' directory.", formatter_class=argparse.HelpFormatter) parser.add_argument("-d", "--dir", required=True, help="Input dataset directory name (e.g. cortex-dev).") parser.add_argument("-l", "--list", help="Input file list containing .info files (one per line) to move over.") parser.add_argument("-f", "--file", help="Path to a single .info file to move over.") - parser.add_argument("-o", "--subdir", default="orig", help="Subdirectory to move files to (default: orig). Helpful if you need to version or add updated files (e.g. orig/update-MM-DD-YY or orig/v1_MM-DD-YY)") + parser.add_argument("-o", "--subdir", default="orig", help="Subdirectory to move files to inside the 'orig' directory. Helpful if you need to version or add updated files (e.g. orig/update-MM-DD-YY or orig/v1_MM-DD-YY)") parser.add_argument("--date", type=parse_timestamp, help="Only move files modified on/after this date (e.g., 2025-06-23)") parser.add_argument("--time", type=parse_timestamp, help="Only move files modified on/after this time (e.g., 12:00)") parser.add_argument("--min", type=int, help="Limit to last N minutes") parser.add_argument("-a", "--all", action="store_true", help="List all .info files in the current directory matching the dataset name") + parser.add_argument("--submit", action="store_true", help="Re-copy files from submit directory") args = parser.parse_args() if args.all: results = [] for f in os.listdir('.'): if f.endswith('.info'): try: with open(f) as info_f: metadata = json.load(info_f) dataset_name = metadata.get("MetaData", {}).get("dataset", "") actual_file = metadata.get("MetaData", {}).get("filename", "") if dataset_name == args.dir: mtime = os.path.getmtime(f) results.append((f, actual_file, mtime)) @@ -117,47 +182,52 @@ file_list = get_file_list_from_table(args.list) elif args.file: file_list = [args.file] else: for f in os.listdir('.'): if f.endswith('.info'): try: with open(f) as info_f: metadata = json.load(info_f) dataset_name = metadata.get("MetaData", {}).get("dataset", "") if dataset_name == args.dir: file_list.append(f) except Exception as e: print(f"[SKIP] Failed to parse {f}: {e}") + orig_subdir = None if not args.subdir or args.subdir == "orig" else args.subdir + for f in file_list: if not os.path.exists(f): print(f"[SKIP] File not found: {f}") continue if is_file_in_time_range(f, start_time, end_time): - cb_push_to_orig(f, args.dir, subdir=args.subdir) + cb_push_to_orig(f, args.dir, subdir=orig_subdir) else: try: with open(f) as info_f: metadata = json.load(info_f) actual_file = metadata.get("MetaData", {}).get("filename", "") print(f"[SKIP] {f} ā {actual_file} outside time range.") except Exception: print(f"[SKIP] {f} outside time range.") + copy_submit_dir(args.dir, subdir=orig_subdir, force=args.submit) + if __name__ == "__main__": if len(sys.argv) == 1: parser = argparse.ArgumentParser( description="Move uploaded files to Cell Browser dataset 'orig' directory.", formatter_class=argparse.HelpFormatter) parser.add_argument("-d", "--dir", required=True, help="Input dataset directory name (e.g. cortex-dev).") parser.add_argument("-l", "--list", help="Input file list containing .info files (one per line) to move over.") parser.add_argument("-f", "--file", help="Path to a single .info file to move over.") - parser.add_argument("-o", "--subdir", default="orig", help="Subdirectory to move files to (default: orig). Helpful if you need to version or add updated files (e.g. orig/update-MM-DD-YY or orig/v1_MM-DD-YY)") + parser.add_argument("-o", "--subdir", default="orig", help="Subdirectory to move files to inside the 'orig' directory. Helpful if you need to version or add updated files (e.g. orig/update-MM-DD-YY or orig/v1_MM-DD-YY)") parser.add_argument("--date", type=parse_timestamp, help="Only move files modified on/after this date (e.g., 2025-06-23)") parser.add_argument("--time", type=parse_timestamp, help="Only move files modified on/after this time (e.g., 12:00)") parser.add_argument("--min", type=int, help="Limit to last N minutes") parser.add_argument("-a", "--all", action="store_true", help="List all .info files in the current directory matching the dataset name") + parser.add_argument("--submit", action="store_true", help="Re-copy files from submit directory") parser.print_help() sys.exit(0) main()