1bead704f880dcea50b2067c50261e6f3a0fab77
max
  Tue Nov 29 07:40:39 2022 -0800
sorting assemblies  in bigGuessDb by orderKey, refs #30316

diff --git src/utils/bigGuessDb/bigGuessDb src/utils/bigGuessDb/bigGuessDb
index fd3c7cd..6d9a58e 100755
--- src/utils/bigGuessDb/bigGuessDb
+++ src/utils/bigGuessDb/bigGuessDb
@@ -68,34 +68,38 @@
         sizeStr = ",".join(sizeParts)
         ofh.write("%s\t%s\n" % (db, sizeStr))
     ofh.close()
     logging.info("Wrote %s", outFname)
 
 def buildIndex(inDir, outFname):
     """ go over all direct subdirectories of inDir and find a chrom.sizes file,
     compact it to format db -> list of (chrom,size) and write to outFname """
     allSizes = dict()
 
     import json # this is not style guide conform, but makes sure that these packages don't lead to problems for users of this script
     from six.moves import urllib # works in python2 and 3
 
     apiData = json.load(urllib.request.urlopen("https://api.genome.ucsc.edu/list/ucscGenomes"))
 
-    for db in apiData["ucscGenomes"]:
-        if "Patch" in db or db == "sonMus0" or db.startswith("braNey") or db.endswith(".lowec"):
-            continue
+    dbList = list()
+    for db, dbData in apiData["ucscGenomes"].items():
+        orderKey = dbData["orderKey"]
+        dbList.append( (orderKey, db) )
+
+    dbList.sort()
 
+    for orderKey, db in dbList:
         subDir = join(inDir, db)
         chromFname = join(subDir, "chrom.sizes")
         if not isfile(chromFname):
             chromFname = join(subDir, db+".sizes")
 
         if not isfile(chromFname):
             print("not found "+chromFname)
             continue
 
         doSubset = True
         if db.startswith("hg") or db.startswith("mm"):
             doSubset = False
 
         if os.path.getsize(chromFname) != 0:
             allSizes[db] = parseSizes(chromFname, doSubset)