ae5edd37c55752e0694c17530a5412701605f782 chmalee Mon Feb 3 13:51:03 2020 -0800 Add a -c check option for archiveTracks.sh so you can check what would be backed up, feedback from BrianL diff --git src/hg/utils/archiveTracks.sh src/hg/utils/archiveTracks.sh index ca70b4c..9bef4b0 100755 --- src/hg/utils/archiveTracks.sh +++ src/hg/utils/archiveTracks.sh @@ -1,59 +1,63 @@ #!/bin/bash # this script backs up track data (both bigBeds and mysql tables) # to a location of your choosing set -beEu -o pipefail # globals archiveDir="/hive/data/inside/archive" tables="" files="" versionName="" trackSetName="" dbList="" verbose="FALSE" +checkOnly="FALSE" EXIT_STATUS=0 usage() { cat << EOF -Usage: `basename $0` [-hbtfv] archiveRoot database(s) trackArchiveName +Usage: `basename $0` [-hcbtfv] archiveRoot database(s) trackArchiveName Required Positional arguments: archiveRoot The root location of the backup directory (/hive/data/inside/archive/). database(s) A single database or double quoted list of databases to back up trackSetName The name of this track archive set. A directory will be created in the archiveRoot location with the files or tables from the -t or -f arguments. Optional arguments (Must preceed required args): -h Display this help and exit. -t A table name or a file with a list of tables to backup. -f A list of files to back up for this track set (/gbdb/ files). -v Use a specified version string like "v1" instead of the output of 'date +%F'. -s Print verbose status along the way to stderr. +-c Don't copy anything, just output what WOULD be copied Backs up a list of tables or files for a track for a single database or list of databases. Note the third required argument of what this track set is named. Exits 0 for success and 1 on failure. The heirarchy created is: \$archiveRoot/\$database/\$trackSetName/\$version/ Example Usages: -To back up the big files of the crispr track, as well as the crisprRanges table: -find /gbdb/hg38/crispr/crispr{Details.tab,.bb} -print > crisprFiles.txt -`basename $0` -t crisprRanges -f crisprFiles.txt /hive/data/inside/archive/ hg38 "CRISPR" +In the example below, a list of files (listOfBigBeds.txt) contains all the bigBeds for backup, +while we only want to back up one table, crisprRanges: +`basename $0` -t crisprRanges -f listOfBigBeds.txt /hive/data/inside/archive/ hg38 "CRISPR" + +A list of tables would work similarly for the -t switch. EOF } printVars() { printf "found variables:\n" printf "archiveDir: '%s'\n" "${archiveDir}" printf "versionName: '%s'\n" "${versionName}" printf "dbs: '%s'\n" "${dbs}" printf "trackName: '%s'\n" "${trackName}" printf "tbls: '%s'\n" "${tables}" printf "files: '%s'\n" "${files}" exit 1 } backupOneTable() { @@ -163,43 +167,90 @@ then printf "created dir %s/%s/%s\n" "${db}" "${trackSetName}" "${versionName}" fi cd ${db}/"${trackSetName}"/${versionName} if [[ ! -z "${files}" ]] then backupBigFiles "${db}" "${files}" fi if [[ ! -z "${tables}" ]] then backupTables "${db}" "${tables}" fi cd ${archiveDir} } +printCheck() +{ + db=$1 + archRoot="`realpath ${archiveDir}`/${db}/${trackSetName}/${versionName}" + printf "check mode: moving files to the following directory:\n" + printf "%s\n" "${archRoot}" + printf "\n" + if [[ -e "${tables}" ]] + then + printf "The following tables and trackDb's will be dumped:\n" + for tbl in $(cat "${tables}") + do + printf "${archRoot}/${tbl}.gz\n" "" + printf "${archRoot}/${tbl}.tab.tab.sql\n" "" + printf "${archRoot}/${tbl}.sql\n" "" + done + else + printf "The following tables and trackDb's will be dumped:\n" + for tbl in ${tables} + do + printf "${archRoot}/${tbl}.gz\n" "" + printf "${archRoot}/${tbl}.trackDb.tab.gz\n" "" + printf "${archRoot}/${tbl}.sqk\n" "" + done + fi + printf "\n" + if [[ -e "${files}" ]] + then + printf "The following big data files will be archived:\n" + for f in $(cat "${files}") + do + fname=`basename ${f}` + printf "%s/%s\n" "${archRoot}" "${fname}" + done + else + printf "The following big data files will be archived:\n" + for f in ${files} + do + fname=`basename ${f}` + printf "%s/%s\n" "${archRoot}" "${fname}" + done + fi +} + ##### Parse command-line input ##### #OPTIND=1 # Reset is necessary if getopts was used previously in the script. It is a good idea to make this local in a function. -while getopts "hst:f:v:" opt +while getopts "hsct:f:v:" opt do case $opt in h) usage exit 0 ;; s) verbose="TRUE" ;; + c) + checkOnly="TRUE" + ;; t) tables="${OPTARG}" ;; f) files="${OPTARG}" ;; v) versionName="${OPTARG}" ;; '?') printf "unknown option %s\n" "$opt" 1>&2 usage >&2 exit 1 ;; esac @@ -250,38 +301,52 @@ tmp="${tables}" tables=`pwd`/"${tmp}" if [ "${verbose}" = "TRUE" ] then printf "Fixing up tables list to %s\n" "${tables}" fi fi # set the archive parent directory location if [[ ! -d "${archiveDir}" ]] then if [ "${verbose}" = "TRUE" ] then printf "Archiving to %s\n" "${archiveDir}" 1>&2 fi + if [ "${check}" != "TRUE" ] + then mkdir -p "${archiveDir}" fi +fi if [[ "${archiveDir}" != /* ]] then tmp="${archiveDir}" archiveDir=`pwd`/"${tmp}" fi + cd "${archiveDir}" if [[ -f "${dbList}" ]] then for db in $(cat "${dbList}") do + if [ "${checkOnly}" = "TRUE" ] + then + printCheck ${db} + else doBackup ${db} + fi done else for db in $(echo "${dbList}") do + if [ "${checkOnly}" = "TRUE" ] + then + printCheck ${db} + else doBackup ${db} + fi done fi exit ${EXIT_STATUS}