6f3bf289f143c60b78ca17898f374b35121ee908
bwick
  Mon Jun 23 15:30:41 2025 -0700
Adding cbMove script that moves uploaded files to CB dataset directory.

diff --git ucsc/cbMove ucsc/cbMove
new file mode 100755
index 0000000..388c260
--- /dev/null
+++ ucsc/cbMove
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import sys
+import json
+import shutil
+import csv
+from datetime import datetime, timedelta
+
+def parse_timestamp(s):
+    for fmt in ("%H:%M", "%Y-%m-%d"):
+        try:
+            return datetime.strptime(s, fmt)
+        except ValueError:
+            continue
+    raise argparse.ArgumentTypeError(f"Invalid date/time format: {s}")
+
+def get_file_list_from_table(file_path):
+    file_list = []
+    with open(file_path, newline='') as f:
+        for line in f:
+            stripped = line.strip()
+            if stripped:
+                file_list.append(stripped)
+    return file_list
+
+def is_file_in_time_range(path, start_time=None, end_time=None):
+    mtime = datetime.fromtimestamp(os.path.getmtime(path))
+    if start_time and mtime < start_time:
+        return False
+    if end_time and mtime > end_time:
+        return False
+    return True
+
+def cb_push_to_orig(file, dir, subdir="orig"):
+    file_ext = os.path.splitext(file)[1].lstrip(".")
+    target_dir = os.path.join("/hive/data/inside/cells/datasets", dir, subdir)
+    actual_file = None
+    file_path = None
+
+    if file_ext == "info":
+        try:
+            with open(file) as f:
+                metadata = json.load(f)
+            actual_file = metadata["MetaData"]["filename"]
+        except Exception as e:
+            print(f"[ERROR] Failed to read or parse {file}: {e}", file=sys.stderr)
+            return 1
+        file_base = os.path.splitext(os.path.basename(file))[0]
+        file_path = os.path.join(os.path.dirname(file), file_base)
+    else:
+        actual_file = os.path.basename(file)
+        file_path = os.path.abspath(file)
+
+    dest_path = os.path.join(target_dir, actual_file)
+
+    print(f"[INFO] Copying from: {os.path.abspath(file_path)}")
+    #print(f"[INFO] Copying to:   {os.path.basename(dest_path)}")
+
+    os.makedirs(target_dir, exist_ok=True)
+    try:
+        shutil.copy(file_path, dest_path)
+        print(f"Copied {actual_file} to {dest_path}")
+        return 0
+    except Exception as e:
+        print(f"[ERROR] Copy failed: {e}", file=sys.stderr)
+        return 1
+
+def main():
+    parser = argparse.ArgumentParser(description="Move uploaded files to Cell Browser dataset 'orig' directory.")
+    parser.add_argument("-d","--dir", required=True, help="Input dataset directory name (e.g. cortex-dev).")
+    parser.add_argument("-o", "--subdir", default="orig", help="Subdirectory to move files to (default: orig). Helpful if you need to version or add updated files (e.g. orig/update-MM-DD-YY or orig/v1_MM-DD-YY)")
+    parser.add_argument("-f","--file", help="Input file list containing .info files (one per line) to move over.")
+    parser.add_argument("--date", type=parse_timestamp, help="Only move files modified after this date (e.g., 2025-06-23)")
+    parser.add_argument("--time", type=parse_timestamp, help="Only move files modified after this time (e.g., 12:00)")
+    parser.add_argument("--min", type=int, help="Limit to last N minutes")
+    
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(0)
+
+    args = parser.parse_args()
+
+    now = datetime.now()
+    start_time = end_time = None
+
+    if args.min:
+        start_time = now - timedelta(minutes=args.min)
+        end_time = now
+    elif args.time:
+        try:
+            start_time = now.replace(hour=int(args.time.split(":")[0]), minute=int(args.time.split(":")[1]), second=0, microsecond=0)
+        except Exception as e:
+            print(f"[ERROR] Invalid --time format: {args.time}. Use HH:MM", file=sys.stderr)
+            sys.exit(1)
+    elif args.date:
+        start_time = args.date
+
+    if args.file:
+        file_list = get_file_list_from_table(args.file)
+    else:
+        file_list = []
+        for f in os.listdir('.'):
+            if f.endswith('.info'):
+                try:
+                    with open(f) as info_f:
+                        metadata = json.load(info_f)
+                    dataset_name = metadata.get("MetaData", {}).get("dataset", "")
+                    if dataset_name == args.dir:
+                        file_list.append(f)
+                except Exception as e:
+                    print(f"[SKIP] Failed to parse {f}: {e}")
+
+    for f in file_list:
+        if not os.path.exists(f):
+            print(f"[SKIP] File not found: {f}")
+            continue
+        if is_file_in_time_range(f, start_time, end_time):
+            cb_push_to_orig(f, args.dir, subdir=args.subdir)
+        else:
+            print(f"[SKIP] {f} outside time range.")
+
+if __name__ == "__main__":
+    main()