68207fb5ba1960173ec5460bce40789997c80e19 hiram Wed Oct 9 16:11:26 2024 -0700 fixup some common names and allow only 10 tracks on a fungi refs #29545 diff --git src/hg/makeDb/doc/asmHubs/commonNames.py src/hg/makeDb/doc/asmHubs/commonNames.py index 589559e..2e37b9a 100755 --- src/hg/makeDb/doc/asmHubs/commonNames.py +++ src/hg/makeDb/doc/asmHubs/commonNames.py @@ -57,31 +57,31 @@ pat = re.compile(r'.*ganism name:\s+', re.IGNORECASE) sciName = pat.sub('', line) pat = re.compile(r'\s+\(.*\)$') sciName = pat.sub('', sciName) pat = re.compile(r'[()\[\]+*]') sciName = pat.sub('', sciName) pat = re.compile(r'\?') sciName = pat.sub(' ', sciName) pat = r'kinetoplastids|firmicutes|proteobacteria|high G|enterobacteria|agent of' if re.search(pat, orgName): orgName = sciName else: - pat = r'apicomplexans|bugs|crustaceans|nematodes|flatworm|ascomycete|basidiomycete|budding|microsporidian|smut|fungi|eukaryotes|flies|beetles|mosquitos|bees|moths|sponges|^mites|ticks|^comb|jellies|jellyfishes|chitons|bivalves|bony fishes|birds|eudicots|snakes|bats|tunicates|tsetse fly' + pat = r'apicomplexans|bryozoans|bugs|ciliates|crustaceans|diatoms|hydrozoans|gastropods|nematodes|flatworm|pelagophytes|scorpions|ascomycete|basidiomycete|budding|microsporidian|smut|fungi|eukaryotes|flies|beetles|mosquitos|bees|moths|sponges|^mites|ticks|^comb|jellies|jellyfishes|chitons|bivalves|bony fishes|birds|eudicots|snakes|bats|tunicates|tsetse fly|cellular slime molds|stony corals' if re.search(pat, orgName): order = orgName.split()[0] if re.search('budding', order): order = "budding yeast" elif re.search('smut', order): order = "smut fungi" elif re.search('bony', order): order = "bony fish" elif re.search('ascomycete', order): order = "ascomycetes" elif re.search('eudicots', order): order = "eudicot" elif re.search('birds', order): order = "bird" elif re.search('snakes', order): @@ -110,34 +110,52 @@ order = "moth" elif re.search('sponges', order): order = "sponge" elif re.search('flatworms', order): order = "flatworm" elif re.search('nematodes', order): order = "nematode" elif re.search('basidiomycete', order): order = "basidiomycetes" words = sciName.split() restWords = " ".join(words[1:]) if re.search("eukaryotes", orgName): orgName = words[0][0].upper() + "." + restWords elif re.search("apicomplexans", orgName): orgName = "apicomplexans " + words[0][0].upper() + "." + restWords + elif re.search("bryozoans", orgName): + orgName = "bryozoans " + words[0][0].upper() + "." + restWords + elif re.search("ciliates", orgName): + orgName = "ciliates " + words[0][0].upper() + "." + restWords + elif re.search("diatoms", orgName): + orgName = "diatoms " + words[0][0].upper() + "." + restWords + elif re.search("hydrozoans", orgName): + orgName = "hydrozoans " + words[0][0].upper() + "." + restWords + elif re.search("gastropods", orgName): + orgName = "gastropods " + words[0][0].upper() + "." + restWords + elif re.search("pelagophytes", orgName): + orgName = "pelagophytes " + words[0][0].upper() + "." + restWords + elif re.search("scorpions", orgName): + orgName = "scorpions " + words[0][0].upper() + "." + restWords elif re.search("flies", orgName): orgName = "fly " + words[0][0].upper() + "." + restWords elif re.search("tsetse", orgName): orgName = "tsetse fly " + words[0][0].upper() + "." + restWords + elif re.search("cellular slime mold", orgName): + orgName = "cellular slime mold " + words[0][0].upper() + "." + restWords + elif re.search("stony corals", orgName): + orgName = "stony coral " + words[0][0].upper() + "." + restWords elif re.search("tunicates", orgName): orgName = "tunicate " + words[0][0].upper() + "." + restWords else: orgName = order + " " + words[0][0].upper() + "." + restWords elif re.search("viruses", orgName): orgName = saveOrgName pat = re.compile(r'.*ism name:\s+') orgName = pat.sub('', orgName) pat = re.compile(r'\s+\(.*\)$') orgName = pat.sub('', orgName) elif "Date:" in line: words = line.split() pat = re.compile(r'-.*') yearDate = pat.sub('', words[-1])