3ca204528035c798f2ac4d46958721df4659310f bwick Tue Jul 1 12:35:17 2025 -0700 Small change, changed time format to allow HH:MM diff --git ucsc/cbMove ucsc/cbMove index a0da308..898f6af 100755 --- ucsc/cbMove +++ ucsc/cbMove @@ -1,157 +1,163 @@ #!/usr/bin/env python3 import argparse import os import sys import json import shutil import csv from datetime import datetime, timedelta def parse_timestamp(s): - for fmt in ("%Y-%m-%d %H:%M", "%Y-%m-%d"): + for fmt in ("%H:%M", "%Y-%m-%d"): try: return datetime.strptime(s, fmt) except ValueError: continue raise argparse.ArgumentTypeError(f"Invalid date/time format: {s}") def get_file_list_from_table(file_path): file_list = [] with open(file_path, newline='') as f: for line in f: stripped = line.strip() if stripped: file_list.append(stripped) return file_list def is_file_in_time_range(path, start_time=None, end_time=None): mtime = datetime.fromtimestamp(os.path.getmtime(path)) if start_time and mtime < start_time: return False if end_time and mtime > end_time: return False return True def cb_push_to_orig(file, cb_dir, subdir="orig"): file_ext = os.path.splitext(file)[1].lstrip(".") target_dir = os.path.join("/hive/data/inside/cells/datasets", cb_dir, subdir) actual_file = None file_path = None if file_ext == "info": try: with open(file) as f: metadata = json.load(f) actual_file = metadata["MetaData"]["filename"] except Exception as e: print(f"[ERROR] Failed to read or parse {file}: {e}", file=sys.stderr) return 1 file_base = os.path.splitext(os.path.basename(file))[0] file_path = os.path.join(os.path.dirname(file), file_base) else: actual_file = os.path.basename(file) file_path = os.path.abspath(file) dest_path = os.path.join(target_dir, actual_file) os.makedirs(target_dir, exist_ok=True) try: print(f"Copying {actual_file} to {cb_dir}/{subdir} directory...") shutil.copy(file_path, dest_path) print("Successfully copied!") return 0 except Exception as e: print(f"[ERROR] Copy failed: {e}", file=sys.stderr) return 1 def main(): parser = argparse.ArgumentParser( description="Move uploaded files to Cell Browser dataset 'orig' directory.", formatter_class=argparse.HelpFormatter) parser.add_argument("-d", "--dir", required=True, help="Input dataset directory name (e.g. cortex-dev).") parser.add_argument("-l", "--list", help="Input file list containing .info files (one per line) to move over.") parser.add_argument("-f", "--file", help="Path to a single .info file to move over.") parser.add_argument("-o", "--subdir", default="orig", help="Subdirectory to move files to (default: orig). Helpful if you need to version or add updated files (e.g. orig/update-MM-DD-YY or orig/v1_MM-DD-YY)") - parser.add_argument("--date", type=parse_timestamp, help="Only move files modified after this date (e.g., 2025-06-23)") - parser.add_argument("--time", type=parse_timestamp, help="Only move files modified after this time (e.g., 12:00)") + parser.add_argument("--date", type=parse_timestamp, help="Only move files modified on/after this date (e.g., 2025-06-23)") + parser.add_argument("--time", type=parse_timestamp, help="Only move files modified on/after this time (e.g., 12:00)") parser.add_argument("--min", type=int, help="Limit to last N minutes") parser.add_argument("-a", "--all", action="store_true", help="List all .info files in the current directory matching the dataset name") args = parser.parse_args() if args.all: results = [] for f in os.listdir('.'): if f.endswith('.info'): try: with open(f) as info_f: metadata = json.load(info_f) dataset_name = metadata.get("MetaData", {}).get("dataset", "") actual_file = metadata.get("MetaData", {}).get("filename", "") if dataset_name == args.dir: mtime = os.path.getmtime(f) results.append((f, actual_file, mtime)) except Exception as e: print(f"[SKIP] Failed to parse {f}: {e}") for f, actual_file, mtime in sorted(results, key=lambda x: x[2]): time_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S') print(f"{time_str} {f} → {actual_file}") sys.exit(0) now = datetime.now() start_time = end_time = None if args.min: start_time = now - timedelta(minutes=args.min) end_time = now elif args.time: start_time = args.time elif args.date: start_time = args.date file_list = [] if args.list: file_list = get_file_list_from_table(args.list) elif args.file: file_list = [args.file] else: for f in os.listdir('.'): if f.endswith('.info'): try: with open(f) as info_f: metadata = json.load(info_f) dataset_name = metadata.get("MetaData", {}).get("dataset", "") if dataset_name == args.dir: file_list.append(f) except Exception as e: print(f"[SKIP] Failed to parse {f}: {e}") for f in file_list: if not os.path.exists(f): print(f"[SKIP] File not found: {f}") continue if is_file_in_time_range(f, start_time, end_time): cb_push_to_orig(f, args.dir, subdir=args.subdir) else: + try: + with open(f) as info_f: + metadata = json.load(info_f) + actual_file = metadata.get("MetaData", {}).get("filename", "") + print(f"[SKIP] {f} → {actual_file} outside time range.") + except Exception: print(f"[SKIP] {f} outside time range.") if __name__ == "__main__": if len(sys.argv) == 1: parser = argparse.ArgumentParser( description="Move uploaded files to Cell Browser dataset 'orig' directory.", formatter_class=argparse.HelpFormatter) parser.add_argument("-d", "--dir", required=True, help="Input dataset directory name (e.g. cortex-dev).") parser.add_argument("-l", "--list", help="Input file list containing .info files (one per line) to move over.") parser.add_argument("-f", "--file", help="Path to a single .info file to move over.") parser.add_argument("-o", "--subdir", default="orig", help="Subdirectory to move files to (default: orig). Helpful if you need to version or add updated files (e.g. orig/update-MM-DD-YY or orig/v1_MM-DD-YY)") - parser.add_argument("--date", type=parse_timestamp, help="Only move files modified after this date (e.g., 2025-06-23)") - parser.add_argument("--time", type=parse_timestamp, help="Only move files modified after this time (e.g., 12:00)") + parser.add_argument("--date", type=parse_timestamp, help="Only move files modified on/after this date (e.g., 2025-06-23)") + parser.add_argument("--time", type=parse_timestamp, help="Only move files modified on/after this time (e.g., 12:00)") parser.add_argument("--min", type=int, help="Limit to last N minutes") parser.add_argument("-a", "--all", action="store_true", help="List all .info files in the current directory matching the dataset name") parser.print_help() sys.exit(0) main()