d8fcb8618a0aed962c050c0914131e618304ced9 hiram Tue Feb 3 14:51:24 2026 -0800 preparing to add the haplotypes column to the table refs #34736 diff --git src/hg/hubApi/assemblyList.py src/hg/hubApi/assemblyList.py index 9ed83080b07..a21a063783b 100755 --- src/hg/hubApi/assemblyList.py +++ src/hg/hubApi/assemblyList.py @@ -859,85 +859,85 @@ if "na" not in gcAccession: if gcAccession in allStatus: stat = allStatus[gcAccession] refSeqCategory = stat['refSeqCategory'].lower() versionStatus = stat['versionStatus'].lower() assemblyLevel = stat['assemblyLevel'].lower() if gcAccession not in description: description += " " + gcAccession ### add alias names to description if they are not already there description = addAliases(dbDbName, aliasData, description) descr = f"{entry['sourceName']} {entry['taxId']} {description}" if year not in organism and year not in descr: descr = f"{entry['sourceName']} {entry['taxId']} {entry['description']} {year}" description = re.sub(r'\s+', ' ', descr).strip() - outLine =f"{entry['name']}\t{priority}\t{organism}\t{entry['scientificName']}\t{entry['taxId']}\t{clade}\t{description}\t1\t\t{year}\t{refSeqCategory}\t{versionStatus}\t{assemblyLevel}\n" + outLine =f"{entry['name']}\t{priority}\t{organism}\t{entry['scientificName']}\t{entry['taxId']}\t{clade}\t{description}\t1\t\t{year}\t{refSeqCategory}\t{versionStatus}\t{assemblyLevel}\t\n" fileOut.write(outLine) itemCount += 1 totalItemCount += itemCount print(f"{totalItemCount:4} - total\tdbDb count: {itemCount:4}") itemCount = 0 # Print the GenArk data for entry in genArkItems: gcAccession = entry['gcAccession'] if gcAccession in allPriorities: priority = allPriorities[gcAccession] else: print("no priority for ", gcAccession) sys.exit(255) hubPath = genarkPath(gcAccession) commonName = entry['commonName'] clade = entry['clade'] year = entry['year'] descr = f"{entry['asmName']} {entry['taxId']}" if year not in commonName and year not in descr: descr = f"{entry['asmName']} {entry['taxId']} {year}" ### add alias names to description if they are not already there descr = addAliases(gcAccession, aliasData, descr) description = re.sub(r'\s+', ' ', descr).strip() refSeqCategory = entry['refSeqCategory'].lower() versionStatus = entry['versionStatus'].lower() assemblyLevel = entry['assemblyLevel'].lower() - outLine = f"{entry['gcAccession']}\t{priority}\t{commonName.encode('ascii', 'ignore').decode('ascii')}\t{entry['scientificName']}\t{entry['taxId']}\t{clade}\t{description}\t1\t{hubPath}\t{year}\t{refSeqCategory}\t{versionStatus}\t{assemblyLevel}\n" + outLine = f"{entry['gcAccession']}\t{priority}\t{commonName.encode('ascii', 'ignore').decode('ascii')}\t{entry['scientificName']}\t{entry['taxId']}\t{clade}\t{description}\t1\t{hubPath}\t{year}\t{refSeqCategory}\t{versionStatus}\t{assemblyLevel}\t\n" fileOut.write(outLine) itemCount += 1 totalItemCount += itemCount print(f"{totalItemCount:4} - total\tgenArk count: {itemCount:4}") incrementPriority = len(allPriorities) + 1 print("# incrementing priorities from: ", incrementPriority) itemCount = 0 # Print the refSeq/genBank data for entry in refSeqGenBankSorted: gcAccession = entry['gcAccession'] commonName = entry['commonName'] scientificName = entry['scientificName'] asmName = entry['asmName'] clade = entry['clade'] year = entry['year'] refSeqCategory = entry['refSeqCategory'].lower() versionStatus = entry['versionStatus'].lower() assemblyLevel = entry['assemblyLevel'].lower() descr = f"{asmName} {entry['taxId']} {entry['other']}" if year not in commonName and year not in descr: descr = f"{asmName} {entry['taxId']} {entry['other']} {year}" ### add alias names to description if they are not already there descr = addAliases(gcAccession, aliasData, descr) description = re.sub(r'\s+', ' ', descr).strip() - outLine = f"{gcAccession}\t{incrementPriority}\t{entry['commonName'].encode('ascii', 'ignore').decode('ascii')}\t{entry['scientificName']}\t{entry['taxId']}\t{clade}\t{description.encode('ascii', 'ignore').decode('ascii')}\t0\t\t{year}\t{refSeqCategory}\t{versionStatus}\t{assemblyLevel}\n" + outLine = f"{gcAccession}\t{incrementPriority}\t{entry['commonName'].encode('ascii', 'ignore').decode('ascii')}\t{entry['scientificName']}\t{entry['taxId']}\t{clade}\t{description.encode('ascii', 'ignore').decode('ascii')}\t0\t\t{year}\t{refSeqCategory}\t{versionStatus}\t{assemblyLevel}\t\n" fileOut.write(outLine) incrementPriority += 1 itemCount += 1 totalItemCount += itemCount print(f"{totalItemCount:4} - total\trefSeq + genbank count: {itemCount:4}") fileOut.close() if __name__ == "__main__": main()