54d2ab3bc67708e54ef4ce54b0bd94927e1ff18b jnavarr5 Tue Nov 25 16:57:38 2025 -0800 Curling the URL like Hiram recommended. Using the file from /hive was not the most up-to-date, refs #36712 #33200 diff --git src/utils/qa/pairLastzWrapper.py src/utils/qa/pairLastzWrapper.py index 8fd0ec94a9c..be1dca0a050 100755 --- src/utils/qa/pairLastzWrapper.py +++ src/utils/qa/pairLastzWrapper.py @@ -66,31 +66,31 @@ except subprocess.CalledProcessError: sys.stdout = sys.__stdout__ print('# can not find '+destDir) quit() cdDir = str(cdDir)[1:-1] gcName = cdDir.split('//')[1][:-1] return gcName def getClade(assembly): """Input assembly and return clade of assembly""" if assembly[0:3]=='GCA' or assembly[0:3]=='GCF': chromSizes_1="/hive/data/genomes/asmHubs/"+assembly[0:3]+"/"+assembly[4:7]+"/"+assembly[7:10]+"/"+assembly[10:13]+"/"+assembly+"/"+assembly+".chrom.sizes.txt" assembly=goto(assembly) assembly_version = "_".join(assembly.split('_', 2)[:2]) # Get the assembly ID, e.g. GCF_016772045.1 - find_gcNum=bash(f"grep {assembly_version} /hive/data/genomes/asmHubs/UCSC_GI.assemblyHubList.txt")[0] #Bash returns a list, so get the first item + find_gcNum=bash(f"curl -s https://hgdownload.soe.ucsc.edu/hubs/UCSC_GI.assemblyHubList.txt | grep {assembly_version}")[0] #Bash returns a list, so get the first item try: clade=find_gcNum.split('\t')[5] # Value is tab-separated, the clade is the last item except IndexError: print('# can not find '+assembly+', the assembly might be suppressed') quit() if clade in validClades: return clade for singular, plural in validClades.items(): if clade in plural: return singular # check to see if 'mammals' or 'mammal(L)' is given. If so return 'mammal' else: clade='other' else: chromSizes_1="/hive/data/genomes/"+assembly+"/chrom.sizes"