40428f923657ec045d64b54de7bf444dc140dd33 max Tue Nov 29 07:36:43 2022 -0800 using only assemblies on the RR in bigGuessDb now, refs #30316 diff --git src/utils/bigGuessDb/bigGuessDb src/utils/bigGuessDb/bigGuessDb index 6e9241a..fd3c7cd 100755 --- src/utils/bigGuessDb/bigGuessDb +++ src/utils/bigGuessDb/bigGuessDb @@ -62,31 +62,37 @@ def writeSizes(allSizes, outFname): " write all sizes to the index file " ofh = gzip.open(outFname, "wt") # "write" "text" for db, dbSizes in allSizes.items(): sizeParts = ["%s=%d" % (chrom, size) for size,chrom in dbSizes] sizeStr = ",".join(sizeParts) ofh.write("%s\t%s\n" % (db, sizeStr)) ofh.close() logging.info("Wrote %s", outFname) def buildIndex(inDir, outFname): """ go over all direct subdirectories of inDir and find a chrom.sizes file, compact it to format db -> list of (chrom,size) and write to outFname """ allSizes = dict() - for db in os.listdir(inDir): + + import json # this is not style guide conform, but makes sure that these packages don't lead to problems for users of this script + from six.moves import urllib # works in python2 and 3 + + apiData = json.load(urllib.request.urlopen("https://api.genome.ucsc.edu/list/ucscGenomes")) + + for db in apiData["ucscGenomes"]: if "Patch" in db or db == "sonMus0" or db.startswith("braNey") or db.endswith(".lowec"): continue subDir = join(inDir, db) chromFname = join(subDir, "chrom.sizes") if not isfile(chromFname): chromFname = join(subDir, db+".sizes") if not isfile(chromFname): print("not found "+chromFname) continue doSubset = True if db.startswith("hg") or db.startswith("mm"): doSubset = False