40428f923657ec045d64b54de7bf444dc140dd33
max
  Tue Nov 29 07:36:43 2022 -0800
using only assemblies on the RR in bigGuessDb now, refs #30316

diff --git src/utils/bigGuessDb/bigGuessDb src/utils/bigGuessDb/bigGuessDb
index 6e9241a..fd3c7cd 100755
--- src/utils/bigGuessDb/bigGuessDb
+++ src/utils/bigGuessDb/bigGuessDb
@@ -62,31 +62,37 @@
 
 def writeSizes(allSizes, outFname):
     " write all sizes to the index file "
     ofh = gzip.open(outFname, "wt") # "write" "text"
     for db, dbSizes in allSizes.items():
         sizeParts = ["%s=%d" % (chrom, size) for size,chrom in dbSizes]
         sizeStr = ",".join(sizeParts)
         ofh.write("%s\t%s\n" % (db, sizeStr))
     ofh.close()
     logging.info("Wrote %s", outFname)
 
 def buildIndex(inDir, outFname):
     """ go over all direct subdirectories of inDir and find a chrom.sizes file,
     compact it to format db -> list of (chrom,size) and write to outFname """
     allSizes = dict()
-    for db in os.listdir(inDir):
+
+    import json # this is not style guide conform, but makes sure that these packages don't lead to problems for users of this script
+    from six.moves import urllib # works in python2 and 3
+
+    apiData = json.load(urllib.request.urlopen("https://api.genome.ucsc.edu/list/ucscGenomes"))
+
+    for db in apiData["ucscGenomes"]:
         if "Patch" in db or db == "sonMus0" or db.startswith("braNey") or db.endswith(".lowec"):
             continue
 
         subDir = join(inDir, db)
         chromFname = join(subDir, "chrom.sizes")
         if not isfile(chromFname):
             chromFname = join(subDir, db+".sizes")
 
         if not isfile(chromFname):
             print("not found "+chromFname)
             continue
 
         doSubset = True
         if db.startswith("hg") or db.startswith("mm"):
             doSubset = False