ccff28e568396045db4f31d9fa577412b017e837 hiram Wed Nov 19 14:28:51 2025 -0800 now getting the /gbdb/*/liftOver/ files on the RR push list refs #35575 diff --git src/hg/utils/otto/genArk/liftOverNew.sh src/hg/utils/otto/genArk/liftOverNew.sh new file mode 100755 index 00000000000..cf7fbe52f04 --- /dev/null +++ src/hg/utils/otto/genArk/liftOverNew.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +export TOP="/hive/data/inside/GenArk/pushRR" + +cd "${TOP}" + +export doNotCount="public.hub.txt|beta.hub.txt|alpha.hub.txt|user.hub.txt|hub.txt|/contrib/" + +export devCount=`zegrep -c -v "${doNotCount}" dev.today.liftOverList.gz` +export betaCount=`zegrep -c -v "${doNotCount}" hgwbeta.today.liftOverList.gz` +export hgw1Count=`zegrep -c -v "${doNotCount}" hgw1.today.liftOverList.gz` + +printf "### excluding the files:\n\t${doNotCount}\nin these counts.\n" + +printf "### count of files on hgwdev: ${devCount}, beta: ${betaCount} and hgw1: ${hgw1Count}\n" + +printf "### count of common files between hgwbeta and hgwdev,\nnot counting /contrib/ or the hub.txt files:\n" + +zegrep -v "${doNotCount}" dev.today.liftOverList.gz | cut -f2 | sort \ + | join -t$'\t' - <(zegrep -v "${doNotCount}" hgwbeta.today.liftOverList.gz | cut -f2 | sort) | wc -l + +rm -f new.liftOver.ready.to.beta.txt +touch new.liftOver.ready.to.beta.txt +if [ "${devCount}" -gt "${betaCount}" ]; then + export newFiles=`echo ${devCount} ${betaCount} | awk '{printf "%d", $1-$2}'` + printf "### ${newFiles} new files to go out from hgwdev not /contrib/\n" + zegrep -v "${doNotCount}" dev.today.liftOverList.gz | cut -f2 | sort \ + | join -v1 -t$'\t' - <(zegrep -v "${doNotCount}" hgwbeta.today.liftOverList.gz | cut -f2 | sort) | sort -u > new.liftOver.ready.to.beta.txt + head -3 new.liftOver.ready.to.beta.txt + printf " . . .\n" + tail -3 new.liftOver.ready.to.beta.txt +fi + +printf "### files with different time stamps hgwdev to hgwbeta:\n" + +rm -f beta.liftOver.timeStamps.txt +zegrep -v "${doNotCount}" dev.today.liftOverList.gz | sort -k2 \ + | join -t$'\t' -1 2 -2 2 - <(zegrep -v "${doNotCount}" hgwbeta.today.liftOverList.gz | sort -k2) | awk -F$'\t' '$2 != $3' | cut -f1 | sort -u > beta.liftOver.timeStamps.txt + +if [ -s "beta.liftOver.timeStamps.txt" ]; then + head -3 beta.liftOver.timeStamps.txt + printf " . . .\n" + tail -3 beta.liftOver.timeStamps.txt +fi + +printf "### count of common files between hgw1 and hgwbeta,\nnot counting /contrib/ or the hub.txt files:\n" + +zegrep -v "${doNotCount}" hgwbeta.today.liftOverList.gz | cut -f2 | sort \ + | join -t$'\t' - <(zegrep -v "${doNotCount}" hgw1.today.liftOverList.gz | cut -f2 | sort) | wc -l + +# accumulate list for cluster-admin cron job rsync +# from hgwbeta out to RR machines +rm -f rsync.gbdb.liftOver.fileList.txt + +rm -f new.liftOver.ready.to.go.txt +touch new.liftOver.ready.to.go.txt +if [ "${betaCount}" -gt "${hgw1Count}" ]; then + export newFiles=`echo ${betaCountCount} ${hgw1Count} | awk '{printf "%d", $1-$2}'` + printf "### ${newFiles} new files to go out from hgwbeta to the RR not /contrib/\n" + zegrep -v "${doNotCount}" hgwbeta.today.liftOverList.gz | cut -f2 | sort \ + | join -v1 -t$'\t' - <(zegrep -v "${doNotCount}" hgw1.today.liftOverList.gz | cut -f2 | sort) | sort -u > new.liftOver.ready.to.go.txt + head -3 new.liftOver.ready.to.go.txt + printf " . . .\n" + tail -3 new.liftOver.ready.to.go.txt + touch rsync.gbdb.liftOver.fileList.txt rsync.gbdb.toRR.fileList.txt + cat new.liftOver.ready.to.go.txt >> rsync.gbdb.liftOver.fileList.txt + cat new.liftOver.ready.to.go.txt >> rsync.gbdb.toRR.fileList.txt +fi + +printf "### files with different time stamps hgwbeta to hgw1:\n" + +rm -f new.liftOver.timeStamps.txt +zegrep -v "${doNotCount}" hgwbeta.today.liftOverList.gz | sort -k2 \ + | join -t$'\t' -1 2 -2 2 - <(zegrep -v "${doNotCount}" hgw1.today.liftOverList.gz | sort -k2) | awk -F$'\t' '$2 != $3' | cut -f1 | sort -u > new.liftOver.timeStamps.txt + +if [ -s "new.liftOver.timeStamps.txt" ]; then + head -3 new.liftOver.timeStamps.txt + printf " . . .\n" + tail -3 new.liftOver.timeStamps.txt + touch rsync.gbdb.liftOver.fileList.txt rsync.gbdb.toRR.fileList.txt + cat new.liftOver.timeStamps.txt >> rsync.gbdb.liftOver.fileList.txt + cat new.liftOver.timeStamps.txt >> rsync.gbdb.toRR.fileList.txt +fi + +exit $?