26e71c08d975868515ff8ea4c584dc6eeb7368d6 max Fri Nov 8 03:56:54 2024 -0800 adding fields requested by chris, refs #34405 diff --git src/utils/hubtools/hubtools src/utils/hubtools/hubtools index 8b82d03..d91dafc 100755 --- src/utils/hubtools/hubtools +++ src/utils/hubtools/hubtools @@ -1,25 +1,38 @@ #!/usr/bin/env python3 import logging, sys, optparse, os, json, subprocess, shutil, string, glob, tempfile, re import shlex from pathlib import Path from collections import defaultdict, OrderedDict from os.path import join, basename, dirname, isfile, relpath, abspath, splitext, isdir #import pyyaml # not loaded here, so it's not a hard requirement, is lazy loaded in parseMetaYaml() # ==== functions ===== +# allowed file types by hubtools up +fileTypes = { + "bb" : "bigBed", + "bigBed" : "bigBed", + "bam" : "bam", + "vcf.gz" : "vcf", + "bigWig" : "bigWig", + "bw" : "bigWig", + "hic" : "hic", + "cram" : "cram", + "txt" : "text", +} + asHead = """table bed "Browser extensible data (<=12 fields) " ( """ asLines = """ string chrom; "Chromosome (or contig, scaffold, etc.)" uint chromStart; "Start position in chromosome" uint chromEnd; "End position in chromosome" string name; "Name of item" uint score; "Score from 0-1000" char[1] strand; "+ or -" uint thickStart; "Start of where display should be thick (start codon)" uint thickEnd; "End of where display should be thick (stop codon)" uint reserved; "Used as itemRgb as of 2004-11-22" @@ -714,35 +727,47 @@ for rootDir, dirs, files in os.walk(tdbDir): for fbase in files: if fbase.startswith("."): continue fpath = join(rootDir, fbase) fullPath = join(tdbDir, fpath) # skip files that have not changed their mtime since last upload if fpath in uploadCache: if os.stat(fullPath).st_mtime == uploadCache[fpath]["mtime"]: logging.info("%s: file mtime unchanged, not uploading again" % fullPath) continue cache = {} - cache["mtime"] = os.stat(fullPath).st_mtime + mtime = os.stat(fullPath).st_mtime + cache["mtime"] = mtime cache["size"] = os.stat(fullPath).st_size uploadCache[fpath] = cache - meta = {"subdir" : rootDir, "genome":"NA", "hubName" : hubName} + #timestamp = datetime.fromtimestamp(mtime, tz=timezone.utc).timestamp() # seconds since 1970 + + parentDir = join(hubName, rootDir) + meta = { + "parentDir" : parentDir, + "genome":"NA", + "fileName" : fbase, + "hubName" : hubName, + "fileType":"NA", + "lastModified" : str(int(mtime)), + } + logging.info(f"Uploading {fpath}, meta {meta}") uploader = my_client.uploader(fullPath, metadata=meta) uploader.upload() cacheWrite(uploadCache, cacheFname) def iterRaStanzas(fname): " parse an ra-style (trackDb) file and yield dictionaries " data = dict() logging.debug("Parsing %s in trackDb format" % fname) with open(fname, "rt") as ifh: for l in ifh: l = l.lstrip(" ").rstrip("\r\n") if len(l)==0: yield data