325f711f30c884459738f065f3415ce2914ec828
braney
  Wed Aug 13 16:18:51 2025 -0700
get hgConvert and hgLiftOver to use genark assemblies that are not in
the UCSC taxId list

diff --git src/hg/makeDb/doc/genarkOrg.txt src/hg/makeDb/doc/genarkOrg.txt
index 49af77240df..56c6073e73a 100644
--- src/hg/makeDb/doc/genarkOrg.txt
+++ src/hg/makeDb/doc/genarkOrg.txt
@@ -1,32 +1,32 @@
 # get UCSC org names from dbDb
 hgsql  -hgenome-centdb.soe.ucsc.edu hgcentral -Ne "select taxId, organism from  dbDb where active=1" | sort | uniq > taxIdOrg.txt
 
 # get all the genark accessions with their taxId
 hgsql hgcentraltest -Ne "select taxId, gcAccession from genark" | sort > taxIdGC.txt
 
 # mapping of genark accessions to UCSC Org
 join -t $'\t' taxIdOrg.txt taxIdGC.txt | tawk '{print $3,$2}' | sort > gcToUCSCOrg.txt
 
 # grab NCBI Taxonomy database name table
 wget "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz"
 tar xf taxdump.tar.gz names.dmp
 
 # get list of genbank common names from NCBI taxonomy database
 grep "genbank common" names.dmp | tawk '{print $1, $3}' | sort > taxIdCommon.txt
 
 # mapping of genark accessions to NCBI Common names
 join -t $'\t' taxIdCommon.txt taxIdGC.txt | tawk '{print $3,$2}' | sort > gcToCommon.txt
 
 # get list of scientific names from NCBI taxonomy database
 grep "scientific" names.dmp | tawk '{print $1, $3}' | sort > taxIdScientific.txt
 
 # mapping of genark accessions to NCBI Scientific names
 join -t $'\t' taxIdScientific.txt taxIdGC.txt | tawk '{print $3,$2}' | sort > gcToSci.txt
 
 # get list of all GCs to make "other" file
 cut -f 2 taxIdGC.txt | sort > gc.txt
 tawk '{print $1, "Other"}' gc.txt > gcToOther.txt
 
-cat gcToUCSCOrg.txt gcToCommon.txt gcToSci.txt gcToOther.txt  | tawk '{if (!seen[$1]) print; seen[$1]=1}' | sort >  genarkOrg.txt
+cat gcToUCSCOrg.txt gcToCommon.txt gcToSci.txt gcToOther.txt  | tawk '{if (!seen[$1]) { $2 = toupper(substr($2,1,1)) tolower(substr($2,2));  print}; seen[$1]=1}' | sort >  genarkOrg.txt
 
 # load as genarkOrg table..