2bd3c930e4d93e242ee896ab83a585f6b83b8069 hiram Fri Sep 11 14:28:03 2020 -0700 update gold table search rule for new fakeAgp refs #19883 diff --git src/hg/makeDb/doc/neoSch1/initialBuild.txt src/hg/makeDb/doc/neoSch1/initialBuild.txt index 8bd3286..2db0cc6 100644 --- src/hg/makeDb/doc/neoSch1/initialBuild.txt +++ src/hg/makeDb/doc/neoSch1/initialBuild.txt @@ -430,61 +430,65 @@ hgsql -N -e 'select chrom,name,"genbank" from ucscToINSDC;' neoSch1 \ > ucsc.genbank.tab # verify chrM is correct: grep chrM * # ucsc.genbank.tab:chrM AM181022.1 genbank # ucsc.refseq.tab:chrM NC_008421.1 refseq awk '{printf "%s\t%s\t%s\n", $2,$1,$3}' ucsc.genbank.tab ucsc.refseq.tab \ | sort > neoSch1.chromAlias.tab hgLoadSqlTab neoSch1 chromAlias ~/kent/src/hg/lib/chromAlias.sql \ neoSch1.chromAlias.tab ######################################################################### -# fixup search rule for assembly track/gold table (DONE - 2017-09-23 - Hiram) +# fixup search rule for assembly track/gold table (DONE - 2020-09-11 - Hiram) cd ~/kent/src/hg/makeDb/trackDb/seal/neoSch1 # preview prefixes and suffixes: - hgsql -N -e "select frag from gold;" neoSch1 \ - | sed -e 's/[0-9][0-9]*//;' | sort | uniq -c - 1 NC_.1 - 7871 NINY.1 + hgsql -N -e "select frag from gold;" neoSch1 | less + +# Manually examine patterns in the new fakeAgp names: +NW_[0-9]+(v[0-9]+[0-9_]*)? +chrM +chrX_NW_[0-9]+(v[0-9]+_random[0-9_]*)? + + # implies a search rule of: - # implies a search rule of: 'N[CI][NY0-9_]+(\.[0-9]+)?' + '[chrMNWX_]+([0-9]+)?(v[0-9]+)?(_random)?([0-9_]*)?' # verify this rule will find them all or eliminate them all: hgsql -N -e "select frag from gold;" neoSch1 | wc -l - # 7872 + # 46705 hgsql -N -e "select frag from gold;" neoSch1 \ - | egrep -e 'N[CI][NY0-9_]+(\.[0-9]+)?' | wc -l - # 7872 + | egrep -e '[chrMNWX_]+([0-9]+)?(v[0-9]+)?(_random)?([0-9_]*)?' | wc -l + # 46705 hgsql -N -e "select frag from gold;" neoSch1 \ - | egrep -v -e 'N[CI][NY0-9_]+(\.[0-9]+)?' | wc -l + | egrep -v -e '[chrMNWX_]+([0-9]+)?(v[0-9]+)?(_random)?([0-9_]*)?' | wc -l # 0 # hence, add to trackDb/rhesus/neoSch1/trackDb.ra searchTable gold shortCircuit 1 -termRegex N[CI][NY0-9_]+(\.[0-9]+)? +termRegex [chrMNWX_]+([0-9]+)?(v[0-9]+)?(_random)?([0-9_]*)? query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%' searchPriority 8 - git commit -m 'add gold table search rule refs #19859' trackDb.ra + git commit -m 'add gold table search rule refs #19883' trackDb.ra # verify searches work in the position box ########################################################################## ## WINDOWMASKER (DONE - 2017-09-22 - Hiram) mkdir /hive/data/genomes/neoSch1/bed/windowMasker cd /hive/data/genomes/neoSch1/bed/windowMasker time (doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \ -dbHost=hgwdev neoSch1) > do.log 2>&1 # real 159m30.920s # Masking statistics cat faSize.neoSch1.cleanWMSdust.txt # 2400839308 bases (53716773 N's 2347122535 real 1552520352 upper