dc1f6067d727ce64e88b69440d8c7556930db8ae braney Fri Aug 16 14:47:34 2019 -0700 some tweaks to the genbank build process diff --git src/hg/makeDb/genbank/etc/build-genbank src/hg/makeDb/genbank/etc/build-genbank index 583bbb0..396c8bd 100755 --- src/hg/makeDb/genbank/etc/build-genbank +++ src/hg/makeDb/genbank/etc/build-genbank @@ -18,85 +18,99 @@ . $gbRoot/lib/gbCommon.sh if test -f var/build.lock then echo "genbank build lock file exists." ls -l var/build.lock exit 1 fi touch var/build.lock # download and process cd $gbRoot nice gbDownloadStep +# remove the genbank dailies because they're crufty AF. +lastDaily=`ls -rdt data/download/g*/daily-nc | tail -1` +rm -rf $lastDaily nice gbProcessStep -mkOrganismList +# should delete directories in data/*/ if there are more than three +# versions of refseq.* or genbank.* +for i in `ls -td data/download/genbank* | tail -n +4`; do echo rm -rf $i; done +for i in `ls -td data/download/refseq* | tail -n +4`; do echo rm -rf $i; done +for i in `ls -td data/processed/genbank* | tail -n +4`; do echo rm -rf $i; done +for i in `ls -td data/processed/refseq* | tail -n +4`; do echo rm -rf $i; done + # copy new fasta to hgwdev rsync --delete -a /data/genbank/data/processed /hive/data/outside/genbank/data/ rsync -a /data/genbank/data/organism.lst /hive/data/outside/genbank/data/ # should check to see if any of the two-bit files changed from the last # time we ran, and if so, re-do all the alignments. nice etc/align-genbank +rsync -av /data/genbank/data/ /hive/data/outside/genbank/data/archive + # if you want to ignore errors # date +%s > var/build/build.time # rm var/build/run/* #check for new databases SOMEDIR=some mkdir $SOMEDIR sed '/^#/d' etc/hgwdev.dbs | sort > $SOMEDIR/wantDbs.txt hgsql "" -Ne "show databases" | sort > $SOMEDIR/haveDbs.txt join -v 1 $SOMEDIR/wantDbs.txt $SOMEDIR/haveDbs.txt > $SOMEDIR/needDbs.txt (cd $SOMEDIR; for i in `join -v 1 wantDbs.txt haveDbs.txt`; do HGDB_CONF=/data/genbank/etc/.hg.conf.hgwdev hgsql $i -Ne "select * from chromInfo" > $i.chromInfo; hgsql "" -Ne "create database $i"; hgsql $i < ~/kent/src/hg/lib/chromInfo.sql; hgsql $i -Ne "load data local infile '$i.chromInfo' into table chromInfo"; hgsql $i < ~/kent/src/hg/lib/gbLoaded.sql; done) -rm -rf $SOMEDIR +# rm -rf $SOMEDIR # load the databases etc/genbank-dbload -inclEsts # newly made dbs need to be copied without validation -# TODO +cd etc +for i in `cat ../some/needDbs.txt`; do echo ./copyDbDev.sh $i gbTmp .hg.conf.hgwdev gbPerAssemblyTables.txt yes; done +cd . # check for gbExtFile for need to run gbExtUpdate f=`hgsql hg38 -Ne "select path from gbExtFile" | sed 's?/gbdb/genbank/./data/processed/??' | sed 's?/.*??' | uniq | wc -l` if test $f -gt 2; then bin/x86_64/extFileUpdate hg38 ; fi # we should re-build orfeome in this case too! -# should delete directories in data/*/ if there are more than three -# versions of refseq.* or genbank.* - # copy over hg38 metadata tables to hgFixed cp etc/gbMetadataTables.txt /var/tmp/genbanktables sudo mygbcopy hg38 hgFixed # build the grep index bin/dumpGrepIndex /data/tmp/grepIndex hgFixed rsync -av /data/tmp/grepIndex/hgFixed/ hgwdev-101:/data/tmp/grepIndex/hgFixed/ # rsync hgFixed etc/copyMetaDev.sh # to fix errors -- # cd etc # ./deleteLocks.sh hgFixed gbMeta .hg.conf.hgwdev # ./validateDb.sh .hg.conf.hgwdev hgFixed gbMeta gbMetadataTablesReg.txt yes # ./copyDbDev.sh hgFixed gbMeta .hg.conf.hgwdev gbMetadataTablesReg.txt yes # rsync the assemblies etc/copyAssemblies.sh # to fix errors -- # cd etc -# ./deleteLocks.sh ochPri3 gbTmp .hg.conf.hgwdev -# ./validateDb.sh .hg.conf.hgwdev ochPri3 gbTmp gbPerAssemblyTables.txt yes -# ./copyDbDev.sh ochPri3 gbTmp .hg.conf.hgwdev gbPerAssemblyTables.txt yes +db=chiLan1 +# ./deleteLocks.sh $db gbTmp .hg.conf.hgwdev +# ./validateDb.sh .hg.conf.hgwdev $db gbTmp gbPerAssemblyTables.txt yes +# ./copyDbDev.sh $db gbTmp .hg.conf.hgwdev gbPerAssemblyTables.txt yes # cd .. # etc/copyAssemblies.sh + +# make the downloads and copy them to hgdownload (somehow sync with cluster-admin push of tables rm var/build.lock echo "genbank build completed on $(hostname)"