fe1290a83aa62636e7f26092751571e016818493
hiram
  Thu Jan 5 16:59:10 2023 -0800
correct NCBI Nucleotide database outlink and now with gap track no redmine

diff --git src/hg/utils/automation/faToHub.sh src/hg/utils/automation/faToHub.sh
index 15513e4..d988445 100755
--- src/hg/utils/automation/faToHub.sh
+++ src/hg/utils/automation/faToHub.sh
@@ -1,249 +1,272 @@
 #!/bin/bash
 
 # exit on any error
 set -beEu -o pipefail
 
 if [ $# -ne 2 ]; then
  printf "usage: faToTwoBit.sh /path/to/assembly.fa /destination/build/directory/
     will process the assembly.fa file into a set of files to use
     as an assembly hub for the UCSC Genome Browser.
 Expected that the /destination/build/directory/ already exists.
 The 'name' of the assembly hub will be the 'assembly' name from
     the 'assembly.fa' file name.
 The assembly.fa file can be gzipped specified with the .gz extension:
     assembly.fa.gz\n" 1>&2
   exit 255
 fi
 
 #### expected two arguments ####
 export asmFa="${1}"
 export destDir="${2}"
 
 #### must have destDir existing ####
 if [ ! -d "${destDir}" ]; then
   printf "expecting the destination directory to already exist\n" 1>&2
   printf "do not see '%s' as a directory\n" "${destDir}" 1>&2
   exit 255
 fi
 
 #### establish fundamental variables ####
 export buildLog="${destDir}/build.log"
 export asmFile=`basename ${asmFa}`
 export noGz="${asmFile%.gz}"
 export asmId="${noGz%.*}"
 
 #### working in destDir ####
 cd "${destDir}"
 mkdir -p html bbi ixIxx
 
 #### record progress in buildLog ####
 printf "#### %s ####\n"  "`date \"+%F %T\"`" >> $buildLog
 printf "# processing %s into assembly %s\n" "${asmFile}" "${asmId}" 1>&2
 printf "# processing %s into assembly %s\n" "${asmFile}" "${asmId}" >> $buildLog
 
 #### construct 2bit and 2bit.bpt from assembly.fa sequence ####
 if [ "${asmFa}" -nt "${asmId}.2bit" ]; then
    printf "# time faToTwoBit ${asmFa} ${asmId}.2bit\n" 1>&2
    printf "# time faToTwoBit ${asmFa} ${asmId}.2bit\n" >> $buildLog
    time faToTwoBit "${asmFa}" "${asmId}.2bit" >> $buildLog 2>&1
    printf "# time bptForTwoBit ${asmId}.2bit ${asmId}.2bit.bpt\n" >> $buildLog
    time bptForTwoBit "${asmId}.2bit" "${asmId}.2bit.bpt" >> $buildLog 2>&1
    touch -r "${asmFa}" "${asmId}.2bit"
    touch -r "${asmFa}" "${asmId}.2bit.bpt"
 fi
 
 #### construct chrom.sizes.txt from 2bit file ####
 if [ "${asmId}.2bit" -nt "${asmId}.chrom.sizes.txt" ]; then
    printf "# time twoBitInfo ${asmId}.2bit stdout | sort -k2,2nr > ${asmId}.chrom.sizes.txt\n" 1>&2
    printf "# time twoBitInfo ${asmId}.2bit stdout | sort -k2,2nr > ${asmId}.chrom.sizes.txt\n" >> $buildLog
    time twoBitInfo "${asmId}.2bit" stdout | sort -k2,2nr > "${asmId}.chrom.sizes.txt" 2>> $buildLog
    touch -r "${asmId}.2bit" "${asmId}.chrom.sizes.txt"
 fi
 
 #### construct AGP file from assembly.fa sequence ####
 if [ "${asmFa}" -nt "${asmId}.agp.gz" ]; then
   printf "# time hgFakeAgp -minContigGap=1 -minScaffoldGap=1000 -singleContigs  "${asmFa}" stdout | gzip -c > ${asmId}.agp.gz\n" 1>&2
   printf "# time hgFakeAgp -minContigGap=1 -minScaffoldGap=1000 -singleContigs  "${asmFa}" stdout | gzip -c > ${asmId}.agp.gz\n" >> $buildLog
   hgFakeAgp -minContigGap=1 -minScaffoldGap=1000 -singleContigs  "${asmFa}" stdout | gzip -c > "${asmId}.agp.gz" 2>> $buildLog
    touch -r "${asmFa}" "${asmId}.agp.gz"
 fi
 
 wget --timestamping "https://genome-source.gi.ucsc.edu/gitlist/kent.git/raw/master/src/hg/makeDb/doc/asmHubs/groups.txt"
 
 # calculate a default position in the middle of the largest sequence
 export chrName=`head -1 $asmId.chrom.sizes.txt | cut -f1`
 export bigChrom=`head -1 $asmId.chrom.sizes.txt | awk '{print $NF}'`
 export oneThird=`awk -v s=$bigChrom 'BEGIN {printf "%d", s/3}'`
 export tenK=`awk -v t=$oneThird -v m=$bigChrom 'BEGIN {r=t + 10000; if (r > m) { r = m}; printf "%d", r}'`
 export defPos="${chrName}:${oneThird}-${tenK}"
 
 printf "hub $asmId genome assembly
 shortLabel $asmId
 longLabel $asmId
 useOneFile on
 email yourEmail@uni.edu
 descriptionUrl html/$asmId.description.html
 
 genome $asmId
 groups groups.txt
 description $asmId
 twoBitPath $asmId.2bit
 twoBitBptUrl $asmId.2bit.bpt
 organism $asmId
 defaultPos $defPos
 scientificName $asmId
 htmlPath html/$asmId.description.html
 " > ${destDir}/hub.txt
 
 printf "<h1>assembly description page</h1>
 <p>
 Should be filled in with information about this assembly hub.
 </p>\n" > ${destDir}/html/$asmId.description.html
 
 ###### assembly/gap ####################
 mkdir -p "${destDir}/trackData/assemblyGap"
 cd "${destDir}/trackData/assemblyGap"
 if [ "../../${asmId}.agp.gz" -nt "${asmId}.assembly.bb" ]; then
   printf "# constructing assembly/gap tracks\n" 1>&2
   zgrep -v "^#" ../../$asmId.agp.gz | awk '$5 != "N" && $5 != "U"' \
      | awk '{printf "%s\t%d\t%d\t%s\t0\t%s\n", $1, $2-1, $3, $6, $9}' \
         | sort -k1,1 -k2,2n > $asmId.assembly.bed
   zgrep -v "^#" ../../$asmId.agp.gz | awk '$5 == "N" || $5 == "U"' \
      | awk '{printf "%s\t%d\t%d\t%s\n", $1, $2-1, $3, $8}' \
         | sort -k1,1 -k2,2n > $asmId.gap.bed
 
   bedToBigBed -extraIndex=name -verbose=0 $asmId.assembly.bed \
     ../../$asmId.chrom.sizes.txt $asmId.assembly.bb
   touch -r ../../$asmId.agp.gz $asmId.assembly.bb
   $HOME/kent/src/hg/utils/automation/genbank/nameToIx.pl \
     $asmId.assembly.bed | sort -u > $asmId.assembly.ix.txt
   if [ -s $asmId.assembly.ix.txt ]; then
     ixIxx $asmId.assembly.ix.txt $asmId.assembly.ix $asmId.assembly.ixx
   fi
   if [ -s "${asmId}.gap.bed" ]; then
     bedToBigBed -extraIndex=name -verbose=0 $asmId.gap.bed \
-      ../../$asmId.chrom.sizes $asmId.gap.bb
+      ../../$asmId.chrom.sizes.txt $asmId.gap.bb
     touch -r ../../$asmId.agp.gz $asmId.gap.bb
     rm -f ${destDir}/bbi/$asmId.gap.bb
     ln -s ../trackData/assemblyGap/$asmId.gap.bb ${destDir}/bbi/$asmId.gap.bb
   else
     rm -f "${asmId}.gap.bed"
   fi
 else
   printf "# assembly/gap previously completed\n" 1>&2
 fi
 
 cd "${destDir}"
 
 if [ -s "${destDir}/trackData/assemblyGap/$asmId.assembly.bb" ]; then
   rm -f ${destDir}/bbi/$asmId.assembly.bb
   ln -s ../trackData/assemblyGap/$asmId.assembly.bb ${destDir}/bbi/$asmId.assembly.bb
   if [ -s "${destDir}/trackData/assemblyGap/$asmId.assembly.ix" ]; then
     rm -f ${destDir}/ixIxx/$asmId.assembly.ix ${destDir}/ixIxx/$asmId.assembly.ixx
     ln -s ../trackData/assemblyGap/$asmId.assembly.ix ${destDir}/ixIxx/$asmId.assembly.ix
     ln -s ../trackData/assemblyGap/$asmId.assembly.ixx ${destDir}/ixIxx/$asmId.assembly.ixx
   fi
 
   printf "
 track assembly
 longLabel Assembly
 shortLabel Assembly
 visibility pack
 colorByStrand 150,100,30 230,170,40
 color 150,100,30
 altColor 230,170,40
 bigDataUrl bbi/$asmId.assembly.bb
 type bigBed 6
 html html/$asmId.assembly
 searchIndex name
 searchTrix ixIxx/$asmId.assembly.ix
-url https://www.ncbi.nlm.nih.gov/nuccore/$$
+url https://www.ncbi.nlm.nih.gov/nuccore/\$\$
 urlLabel NCBI Nucleotide database
 group map
 " >> ${destDir}/hub.txt
 
 printf "<h1>assembly track description page</h1>
 <p>
 Should be filled in with information about this track.
 </p>\n" > ${destDir}/html/$asmId.assembly.html
 
 fi
 
+if [ -s "${destDir}/trackData/assemblyGap/$asmId.gap.bb" ]; then
+  rm -f ${destDir}/bbi/$asmId.gap.bb
+  ln -s ../trackData/assemblyGap/$asmId.gap.bb ${destDir}/bbi/$asmId.gap.bb
+
+  printf "
+track gap
+longLabel AGP gap
+shortLabel Gap (AGP defined)
+visibility dense
+color 0,0,0
+bigDataUrl bbi/$asmId.gap.bb
+type bigBed 4
+group map
+html html/$asmId.gap
+" >> ${destDir}/hub.txt
+
+printf "<h1>gap track description page</h1>
+<p>
+Should be filled in with information about this track.
+</p>\n" > ${destDir}/html/$asmId.gap.html
+
+fi
+
 cd "${destDir}"
 
 ###### cytoBand ####################
 mkdir -p "${destDir}/trackData/cytoBand"
 cd "${destDir}/trackData/cytoBand"
 if [ ../../$asmId.chrom.sizes.txt -nt $asmId.cytoBand.bb ]; then
   printf "# construction cytoBand track\n" 1>&2
   awk '{printf "%s\t0\t%d\t\tgneg\n", $1, $2}' ../../$asmId.chrom.sizes.txt | sort -k1,1 -k2,2n > $asmId.cytoBand.bed
   bedToBigBed -type=bed4+1 -as=$HOME/kent/src/hg/lib/cytoBand.as -tab $asmId.cytoBand.bed ../../$asmId.chrom.sizes.txt $asmId.cytoBand.bb >> $buildLog 2>&1
 
   touch -r ../../$asmId.chrom.sizes.txt $asmId.cytoBand.bb
 else
   printf "# cytoBand previously completed\n" 1>&2
 fi
 
 cd "${destDir}"
 
 if [ -s "${destDir}/trackData/cytoBand/$asmId.cytoBand.bb" ]; then
   rm -f ${destDir}/bbi/$asmId.cytoBand.bb
   ln -s ../trackData/cytoBand/$asmId.cytoBand.bb ${destDir}/bbi/$asmId.cytoBand.bb
   printf "
 track cytoBandIdeo
 shortLabel Chromosome Band (Ideogram)
 longLabel Ideogram for Orientation
 group map
 visibility dense
 type bigBed 4 +
 bigDataUrl bbi/$asmId.cytoBand.bb
 " >> ${destDir}/hub.txt
 
 fi
 
 ###### gc5Base ####################
 mkdir -p "${destDir}/trackData/gc5Base"
 cd "${destDir}/trackData/gc5Base"
 if [ ../../$asmId.2bit -nt $asmId.gc5Base.bw ]; then
   printf "# constructin gc5Base track\n" 1>&2
   hgGcPercent -wigOut -doGaps -file=stdout -win=5 -verbose=0 test \
     ../../$asmId.2bit 2>> $buildLog \
       | gzip -c > $asmId.wigVarStep.gz
   wigToBigWig $asmId.wigVarStep.gz ../../$asmId.chrom.sizes.txt $asmId.gc5Base.bw >> $buildLog 2>&1
   rm -f $asmId.wigVarStep.gz
   touch -r ../../$asmId.2bit $asmId.gc5Base.bw
 else
   printf "# gc5Base previously completed\n" 1>&2
 fi
 
 cd "${destDir}"
 
 if [ -s "${destDir}/trackData/gc5Base/$asmId.gc5Base.bw" ]; then
   rm -f ${destDir}/bbi/$asmId.gc5Base.bw
   ln -s ../trackData/gc5Base/$asmId.gc5Base.bw ${destDir}/bbi/$asmId.gc5Base.bw
 
   printf "
 track gc5Base
 shortLabel GC Percent
 longLabel GC Percent in 5-Base Windows
 group map
 visibility full
 autoScale Off
 maxHeightPixels 128:36:16
 graphTypeDefault Bar
 gridDefault OFF
 windowingFunction Mean
 color 0,0,0
 altColor 128,128,128
 viewLimits 30:70
 type bigWig 0 100
 bigDataUrl bbi/$asmId.gc5Base.bw
 html html/$asmId.gc5Base
 " >> ${destDir}/hub.txt
 
 printf "<h1>gc5Base description page</h1>
 <p>
 Should be filled in with information about this track.
 </p>\n" > ${destDir}/html/$asmId.gc5Base.html
 
 fi