2bd3c930e4d93e242ee896ab83a585f6b83b8069
hiram
  Fri Sep 11 14:28:03 2020 -0700
update gold table search rule for new fakeAgp refs #19883

diff --git src/hg/makeDb/doc/neoSch1/initialBuild.txt src/hg/makeDb/doc/neoSch1/initialBuild.txt
index 8bd3286..2db0cc6 100644
--- src/hg/makeDb/doc/neoSch1/initialBuild.txt
+++ src/hg/makeDb/doc/neoSch1/initialBuild.txt
@@ -430,61 +430,65 @@
     hgsql -N -e 'select chrom,name,"genbank" from ucscToINSDC;' neoSch1 \
         > ucsc.genbank.tab
 
     # verify chrM is correct:
     grep chrM *
 # ucsc.genbank.tab:chrM   AM181022.1      genbank
 # ucsc.refseq.tab:chrM    NC_008421.1     refseq
 
     awk '{printf "%s\t%s\t%s\n", $2,$1,$3}' ucsc.genbank.tab ucsc.refseq.tab \
         | sort > neoSch1.chromAlias.tab
 
     hgLoadSqlTab neoSch1 chromAlias ~/kent/src/hg/lib/chromAlias.sql \
         neoSch1.chromAlias.tab
 
 #########################################################################
-# fixup search rule for assembly track/gold table (DONE - 2017-09-23 - Hiram)
+# fixup search rule for assembly track/gold table (DONE - 2020-09-11 - Hiram)
 
     cd ~/kent/src/hg/makeDb/trackDb/seal/neoSch1
     # preview prefixes and suffixes:
-    hgsql -N -e "select frag from gold;" neoSch1 \
-      | sed -e 's/[0-9][0-9]*//;' | sort | uniq -c
-      1 NC_.1
-   7871 NINY.1
+    hgsql -N -e "select frag from gold;" neoSch1 | less
+      
+# Manually examine patterns in the new fakeAgp names:
+NW_[0-9]+(v[0-9]+[0-9_]*)?
+chrM
+chrX_NW_[0-9]+(v[0-9]+_random[0-9_]*)?
+
+    # implies a search rule of:
 
-    # implies a search rule of: 'N[CI][NY0-9_]+(\.[0-9]+)?'
+    '[chrMNWX_]+([0-9]+)?(v[0-9]+)?(_random)?([0-9_]*)?'
 
     # verify this rule will find them all or eliminate them all:
     hgsql -N -e "select frag from gold;" neoSch1 | wc -l
-    # 7872
+    # 46705
 
     hgsql -N -e "select frag from gold;" neoSch1 \
-       | egrep -e 'N[CI][NY0-9_]+(\.[0-9]+)?' | wc -l
-    # 7872
+       | egrep -e '[chrMNWX_]+([0-9]+)?(v[0-9]+)?(_random)?([0-9_]*)?' | wc -l
+    # 46705
 
     hgsql -N -e "select frag from gold;" neoSch1 \
-       | egrep -v -e 'N[CI][NY0-9_]+(\.[0-9]+)?' | wc -l
+     | egrep -v -e '[chrMNWX_]+([0-9]+)?(v[0-9]+)?(_random)?([0-9_]*)?' | wc -l
     # 0
 
     # hence, add to trackDb/rhesus/neoSch1/trackDb.ra
 searchTable gold
 shortCircuit 1
-termRegex N[CI][NY0-9_]+(\.[0-9]+)?
+termRegex [chrMNWX_]+([0-9]+)?(v[0-9]+)?(_random)?([0-9_]*)?
 query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%'
 searchPriority 8
 
-    git commit -m 'add gold table search rule refs #19859' trackDb.ra
+    git commit -m 'add gold table search rule refs #19883' trackDb.ra
 
     # verify searches work in the position box
 
 ##########################################################################
 ## WINDOWMASKER (DONE - 2017-09-22 - Hiram)
 
     mkdir /hive/data/genomes/neoSch1/bed/windowMasker
     cd /hive/data/genomes/neoSch1/bed/windowMasker
     time (doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
         -dbHost=hgwdev neoSch1) > do.log 2>&1
     # real    159m30.920s
 
     # Masking statistics
     cat faSize.neoSch1.cleanWMSdust.txt
 # 2400839308 bases (53716773 N's 2347122535 real 1552520352 upper