dbdb3f0d042dfc47b1214c2212c8f32297125b5e
hiram
  Wed Oct 9 10:30:39 2019 -0700
add construction information refs #21784

diff --git src/hg/makeDb/doc/platinum/runOne.sh src/hg/makeDb/doc/platinum/runOne.sh
new file mode 100755
index 0000000..f35f6f8
--- /dev/null
+++ src/hg/makeDb/doc/platinum/runOne.sh
@@ -0,0 +1,152 @@
+#!/bin/bash
+
+# download, sequence, assemblyGap, gatewayPage, cytoBand, gc5Base,
+# repeatMasker, simpleRepeat, allGaps, idKeys, windowMasker, addMask,
+# gapOverlap, tandemDups, cpgIslands, ncbiGene, xenoRefGene, augustus,
+# trackDb, cleanup
+
+for asmIdName in $1
+do
+  gcDir=`echo $asmIdName | cut -c1-3`
+  dir1=`echo $asmIdName | cut -c5-7`
+  dir2=`echo $asmIdName | cut -c8-10`
+  dir3=`echo $asmIdName | cut -c11-13`
+  asmId="$gcDir_""`echo $asmIdName | cut -d'_' -f 2`"
+
+  export genbankRefseq="genbank"
+  export sciName=`grep -h "${asmId}" genBank.list | cut -f3 | sed -e 's/ /_/g;'`
+  if [ "${gcDir}" = "GCF" ]; then
+    genbankRefseq="refseq"
+    sciName=`grep -h "${asmId}"  refSeq.list | cut -f3 | sed -e 's/ /_/g;'`
+  fi
+
+  # export stepStart="gatewayPage"
+  # export stepEnd="addMask"
+  export stepStart="gapOverlap"
+  export stepEnd="trackDb"
+  export augustusSpecies="human"
+
+  export buildDir="/hive/data/genomes/asmHubs/platinumGenomes/${asmIdName}"
+  export sourceDir="/hive/data/genomes/asmHubs/ncbiSrc/${gcDir}/${dir1}/${dir2}/${dir3}/${asmIdName}"
+  # cluster specifications and ucscNames request
+  export hubSpecs="-bigClusterHub=ku -smallClusterHub=hgwdev-101 -ucscNames"
+  mkdir -p "${buildDir}"
+  printf "###########################################################\n" >> ${buildDir}/${asmIdName}.${stepStart}-${stepEnd}.log
+  printf "### %s\n" "`date '+%F %T'`" >> ${buildDir}/${asmIdName}.${stepStart}-${stepEnd}.log
+
+  echo "~/kent/src/hg/utils/automation/doAssemblyHub.pl genbankRefseq
+    vertebrate_mammalian \"${sciName}\" \"${asmIdName}\" -verbose=2
+     -continue=$stepStart -stop=$stepEnd ${hubSpecs} -fileServer=hgwdev
+      -augustusSpecies=${augustusSpecies} -buildDir=\"${buildDir}\"
+        -sourceDir=\"${sourceDir}\"
+" >> ${buildDir}/${asmIdName}.${stepStart}-${stepEnd}.log
+
+  time (~/kent/src/hg/utils/automation/doAssemblyHub.pl genbankRefseq \
+    vertebrate_other "${sciName}" "${asmIdName}" -verbose=2 \
+     -continue=$stepStart -stop=$stepEnd ${hubSpecs} -fileServer=hgwdev \
+      -augustusSpecies=${augustusSpecies} -buildDir="${buildDir}" \
+        -sourceDir="${sourceDir}") \
+          >> ${buildDir}/${asmIdName}.${stepStart}-${stepEnd}.log 2>&1 &
+
+  echo ${buildDir}/${asmIdName}.${stepStart}-${stepEnd}.log
+  printf "###########################################################\n" >> ${buildDir}/${asmIdName}.${stepStart}-${stepEnd}.log
+done
+
+wait
+
+exit $?
+
+  time (~/kent/src/hg/utils/automation/doAssemblyHub.pl genbankRefseq \
+    vertebrate_other "${sciName}" "${asmIdName}" -verbose=2 \
+     -continue=$stepStart -stop=$stepEnd ${hubSpecs} -fileServer=hgwdev \
+      -augustusSpecies=${augustusSpecies} -buildDir="${buildDir}" \
+        -sourceDir="${sourceDir}") \
+   >> ${buildDir}/${asmIdName}.${stepStart}-${stepEnd}.log 2>&1
+
+   echo ${buildDir}/${asmIdName}.${stepStart}-${stepEnd}.log
+
+done
+
+exit $?
+
+# export asmIdName="GCA_900324485.2_fMasArm1.2"
+export asmIdName="GCF_901001135.1_aRhiBiv1.1"
+
+
+
+exit $?
+
+    -buildDir dir     Construct assembly hub in dir instead of default
+       $HgAutomate::clusterData/asmHubs/{genbank|refseq}/subGroup/species/asmId/
+    -sourceDir dir    Find assembly in dir instead of default
+                          $sourceDir
+                          the assembly is found at:
+
+
+
+usage: doAssemblyHub.pl [options] genbank|refseq subGroup species asmId
+required arguments:
+    genbank|refseq - specify either genbank or refseq hierarchy source
+    subGroup       - specify subGroup at NCBI FTP site, examples:
+                   - vertebrate_mammalian vertebrate_other plant etc...
+    species        - species directory at NCBI FTP site, examples:
+                   - Homo_sapiens Mus_musculus etc...
+    asmId          - assembly identifier at NCBI FTP site, examples:
+                   - GCF_000001405.32_GRCh38.p6 GCF_000001635.24_GRCm38.p4 etc..
+
+options:
+    -continue step        Pick up at the step where a previous run left off
+                          (some debugging and cleanup may be necessary first).
+                          step must be one of the following:
+                          download, sequence, assemblyGap, gatewayPage, gc5Base, repeatMasker, simpleRepeat, allGaps, idKeys, addMask, windowMasker, cpgIslands, augustus, trackDb, cleanup
+    -stop step            Stop after completing the specified step.
+                          (same possible values as for -continue above)
+    -buildDir dir     Construct assembly hub in dir instead of default
+       /hive/data/genomes/asmHubs/{genbank|refseq}/subGroup/species/asmId/
+    -sourceDir dir    Find assembly in dir instead of default
+                          /hive/data/outside/ncbi/genomes
+                          the assembly is found at:
+  sourceDir/{genbank|refseq}/subGroup/species/all_assembly_versions/asmId/
+    -ucscNames        Translate NCBI/INSDC/RefSeq names to UCSC names
+                      default is to use the given NCBI/INSDC/RefSeq names
+    -bigClusterHub mach   Use mach (default: ku) as parasol hub
+                          for cluster runs with very large job counts.
+    -dbHost mach          Use mach (default: hgwdev) as database server.
+    -fileServer mach      Use mach (default: fileServer of the build directory)
+                          for I/O-intensive steps.
+    -smallClusterHub mach Use mach (default: ku) as parasol hub
+                          for cluster runs with smallish job counts.
+    -workhorse machine    Use machine (default: hgwdev) for compute or
+                          memory-intensive steps.
+    -debug                Don't actually run commands, just display them.
+    -verbose num          Set verbose level to num (default 1).
+    -help                 Show detailed help and exit.
+
+Automates build of assembly hub.  Steps:
+    download: sets up sym link working hierarchy from already mirrored
+                files from NCBI in:
+                      /hive/data/outside/ncbi/genomes/{genbank|refseq}/
+    sequence: establish AGP and 2bit file from NCBI directory
+    assemblyGap: create assembly and gap bigBed files and indexes
+                 for assembly track names
+    gatewayPage: create html/asmId.description.html contents
+    gc5Base: create bigWig file for gc5Base track
+    repeatMasker: run repeat masker cluster run and create bigBed files for
+                  the composite track categories of repeats
+    simpleRepeat: run trf cluster run and create bigBed file for simple repeats
+    allGaps: calculate all actual real gaps due to N's in sequence, can be
+                  more than were specified in the AGP file
+    idKeys: calculate md5sum for each sequence in the assembly to be used to
+            find identical sequences in similar assemblies
+    addMask: combine repeatMasker and trf simpleRepeats into one 2bit file
+    windowMasker: run windowMasker cluster run, create windowMasker bigBed file
+                  and compute intersection with repeatMasker results
+    cpgIslands: run CpG islands cluster runs for both masked and unmasked
+                sequences and create bigBed files for this composite track
+    trackDb: create trackDb.txt file for assembly hub to include all constructed
+             bigBed and bigWig tracks
+    cleanup: Removes or compresses intermediate files.
+All operations are performed in the build directory which is
+/hive/data/genomes/$db/bed/template.$date unless -buildDir is given.
+
+