1bead704f880dcea50b2067c50261e6f3a0fab77 max Tue Nov 29 07:40:39 2022 -0800 sorting assemblies in bigGuessDb by orderKey, refs #30316 diff --git src/utils/bigGuessDb/bigGuessDb src/utils/bigGuessDb/bigGuessDb index fd3c7cd..6d9a58e 100755 --- src/utils/bigGuessDb/bigGuessDb +++ src/utils/bigGuessDb/bigGuessDb @@ -68,34 +68,38 @@ sizeStr = ",".join(sizeParts) ofh.write("%s\t%s\n" % (db, sizeStr)) ofh.close() logging.info("Wrote %s", outFname) def buildIndex(inDir, outFname): """ go over all direct subdirectories of inDir and find a chrom.sizes file, compact it to format db -> list of (chrom,size) and write to outFname """ allSizes = dict() import json # this is not style guide conform, but makes sure that these packages don't lead to problems for users of this script from six.moves import urllib # works in python2 and 3 apiData = json.load(urllib.request.urlopen("https://api.genome.ucsc.edu/list/ucscGenomes")) - for db in apiData["ucscGenomes"]: - if "Patch" in db or db == "sonMus0" or db.startswith("braNey") or db.endswith(".lowec"): - continue + dbList = list() + for db, dbData in apiData["ucscGenomes"].items(): + orderKey = dbData["orderKey"] + dbList.append( (orderKey, db) ) + + dbList.sort() + for orderKey, db in dbList: subDir = join(inDir, db) chromFname = join(subDir, "chrom.sizes") if not isfile(chromFname): chromFname = join(subDir, db+".sizes") if not isfile(chromFname): print("not found "+chromFname) continue doSubset = True if db.startswith("hg") or db.startswith("mm"): doSubset = False if os.path.getsize(chromFname) != 0: allSizes[db] = parseSizes(chromFname, doSubset)