ae5edd37c55752e0694c17530a5412701605f782
chmalee
  Mon Feb 3 13:51:03 2020 -0800
Add a -c check option for archiveTracks.sh so you can check what would be backed up, feedback from BrianL

diff --git src/hg/utils/archiveTracks.sh src/hg/utils/archiveTracks.sh
index ca70b4c..9bef4b0 100755
--- src/hg/utils/archiveTracks.sh
+++ src/hg/utils/archiveTracks.sh
@@ -1,59 +1,63 @@
 #!/bin/bash
 
 # this script backs up track data (both bigBeds and mysql tables)
 # to a location of your choosing
 
 set -beEu -o pipefail
 
 # globals
 archiveDir="/hive/data/inside/archive"
 tables=""
 files=""
 versionName=""
 trackSetName=""
 dbList=""
 verbose="FALSE"
+checkOnly="FALSE"
 EXIT_STATUS=0
 
 usage() {
 cat << EOF
-Usage: `basename $0` [-hbtfv] archiveRoot database(s) trackArchiveName
+Usage: `basename $0` [-hcbtfv] archiveRoot database(s) trackArchiveName
 
 Required Positional arguments:
 archiveRoot        The root location of the backup directory (/hive/data/inside/archive/).
 database(s)        A single database or double quoted list of databases to back up
 trackSetName       The name of this track archive set. A directory will be created
                    in the archiveRoot location with the files or tables from the -t
                    or -f arguments.
 
 Optional arguments (Must preceed required args):
 -h                  Display this help and exit.
 -t                  A table name or a file with a list of tables to backup.
 -f                  A list of files to back up for this track set (/gbdb/ files).
 -v                  Use a specified version string like "v1" instead of the output of 'date +%F'.
 -s                  Print verbose status along the way to stderr.
+-c                  Don't copy anything, just output what WOULD be copied
 
 Backs up a list of tables or files for a track for a single database or list of
 databases. Note the third required argument of what this track set is named. Exits 0 for success
 and 1 on failure. The heirarchy created is:
 \$archiveRoot/\$database/\$trackSetName/\$version/
 
 Example Usages:
-To back up the big files of the crispr track, as well as the crisprRanges table:
-find /gbdb/hg38/crispr/crispr{Details.tab,.bb} -print  > crisprFiles.txt
-`basename $0` -t crisprRanges -f crisprFiles.txt /hive/data/inside/archive/ hg38 "CRISPR"
+In the example below, a list of files (listOfBigBeds.txt) contains all the bigBeds for backup,
+while we only want to back up one table, crisprRanges:
+`basename $0` -t crisprRanges -f listOfBigBeds.txt /hive/data/inside/archive/ hg38 "CRISPR"
+
+A list of tables would work similarly for the -t switch.
 EOF
 }
 
 printVars() {
     printf "found variables:\n" 
     printf "archiveDir: '%s'\n" "${archiveDir}"
     printf "versionName: '%s'\n" "${versionName}"
     printf "dbs: '%s'\n" "${dbs}"
     printf "trackName: '%s'\n" "${trackName}"
     printf "tbls: '%s'\n" "${tables}"
     printf "files: '%s'\n" "${files}"
     exit 1
 }
 
 backupOneTable() {
@@ -163,43 +167,90 @@
     then
         printf "created dir %s/%s/%s\n" "${db}" "${trackSetName}" "${versionName}"
     fi
     cd ${db}/"${trackSetName}"/${versionName}
     if [[ ! -z "${files}" ]]
     then
         backupBigFiles "${db}" "${files}"
     fi
     if [[ ! -z "${tables}" ]]
     then
         backupTables "${db}" "${tables}"
     fi
     cd ${archiveDir}
 }
 
+printCheck()
+{
+    db=$1
+    archRoot="`realpath ${archiveDir}`/${db}/${trackSetName}/${versionName}"
+    printf "check mode: moving files to the following directory:\n"
+    printf "%s\n" "${archRoot}"
+    printf "\n"
+    if [[ -e "${tables}" ]]
+    then
+        printf "The following tables and trackDb's will be dumped:\n"
+        for tbl in $(cat "${tables}")
+        do
+            printf "${archRoot}/${tbl}.gz\n" ""
+            printf "${archRoot}/${tbl}.tab.tab.sql\n" ""
+            printf "${archRoot}/${tbl}.sql\n" ""
+        done
+    else
+        printf "The following tables and trackDb's will be dumped:\n"
+        for tbl in ${tables}
+        do
+            printf "${archRoot}/${tbl}.gz\n" ""
+            printf "${archRoot}/${tbl}.trackDb.tab.gz\n" ""
+            printf "${archRoot}/${tbl}.sqk\n" ""
+        done
+    fi
+    printf "\n"
+    if [[ -e "${files}" ]]
+    then
+        printf "The following big data files will be archived:\n"
+        for f in $(cat "${files}")
+        do
+            fname=`basename ${f}`
+            printf "%s/%s\n" "${archRoot}" "${fname}"
+        done
+    else
+        printf "The following big data files will be archived:\n"
+        for f in ${files}
+        do
+            fname=`basename ${f}`
+            printf "%s/%s\n" "${archRoot}" "${fname}"
+        done
+    fi
+}
+
 ##### Parse command-line input #####
 
 #OPTIND=1 # Reset is necessary if getopts was used previously in the script.  It is a good idea to make this local in a function.
-while getopts "hst:f:v:" opt
+while getopts "hsct:f:v:" opt
 do
     case $opt in
         h)
             usage
             exit 0
             ;;
         s)
             verbose="TRUE"
             ;;
+        c)
+            checkOnly="TRUE"
+            ;;
         t)
             tables="${OPTARG}"
             ;;
         f)
             files="${OPTARG}"
             ;;
         v)
             versionName="${OPTARG}"
             ;;
         '?')
             printf "unknown option %s\n" "$opt" 1>&2
             usage >&2
             exit 1
             ;;
     esac
@@ -250,38 +301,52 @@
     tmp="${tables}"
     tables=`pwd`/"${tmp}"
     if [ "${verbose}" = "TRUE" ]
     then
         printf "Fixing up tables list to %s\n" "${tables}"
     fi
 fi
 
 # set the archive parent directory location
 if [[ ! -d "${archiveDir}" ]]
 then
     if [ "${verbose}" = "TRUE" ]
     then
         printf "Archiving to %s\n" "${archiveDir}" 1>&2
     fi
+    if [ "${check}" != "TRUE" ]
+    then
         mkdir -p "${archiveDir}"
     fi
+fi
 
 if [[ "${archiveDir}" != /* ]]
 then
     tmp="${archiveDir}"
     archiveDir=`pwd`/"${tmp}"
 fi
 
+
 cd "${archiveDir}"
 if [[ -f "${dbList}" ]]
 then
     for db in $(cat "${dbList}")
     do
+        if [ "${checkOnly}" = "TRUE" ]
+        then
+            printCheck ${db}
+        else
             doBackup ${db}
+        fi
     done
 else
     for db in $(echo "${dbList}")
     do
+        if [ "${checkOnly}" = "TRUE" ]
+        then
+            printCheck ${db}
+        else
             doBackup ${db}
+        fi
     done
 fi
 exit ${EXIT_STATUS}