e1874141e3c52a43cbdd32bcceff60f2a4232807
hiram
  Mon Sep 15 12:57:55 2025 -0700
fixed rmToTrackHub.pl script from Robert

diff --git src/hg/utils/automation/asmHubRepeatMasker.sh src/hg/utils/automation/asmHubRepeatMasker.sh
index 878f116a7d6..8e2f0c3274f 100755
--- src/hg/utils/automation/asmHubRepeatMasker.sh
+++ src/hg/utils/automation/asmHubRepeatMasker.sh
@@ -24,54 +24,51 @@
 export dateStamp=`date "+%FT%T %s"`
 
 export asmId=$1
 export rmOutFile=$2
 export destDir=$3
 export chrSizes=""
 
 if [ -s "$destDir/$asmId.chrom.sizes" ]; then
   chrSizes="$destDir/$asmId.chrom.sizes"
 elif [ -s "../../$asmId.chrom.sizes" ]; then
   chrSizes="../../$asmId.chrom.sizes"
 fi
 
 # assume this file name pattern
 export faAlign=`echo "${rmOutFile}" | sed -e 's/sorted.fa.out/fa.align/; s/.gz//;'`
-export RepeatMaskerPath="/hive/data/staging/data/RepeatMasker221107"
+export RepeatMaskerPath="/hive/data/outside/RepeatMasker/RepeatMasker-4.2.1"
 
 if [ -d "${destDir}" ]; then
   cd "${destDir}"
 
   # might already be gzipped
   if [ ! -s "${faAlign}" ]; then
      faAlign="${faAlign}.gz"
   fi
   # align file only exists when RM has been run locally, not for NCBI version
   # it is OK if it is missing, can do this anyway without it
   if [ -s "${faAlign}" ]; then
     printf "# using faAlign file: %s\n" "${faAlign}" 1>&2
-    printf "time $RepeatMaskerPath/util/rmToTrackHub.pl -sizes \"${chrSizes}\" -genome \"${asmId}\" -hubname \"${asmId}\" -out \"${rmOutFile}\" -align \"${faAlign}\"\n" 1>&2
-    time $RepeatMaskerPath/util/rmToTrackHub.pl -sizes "${chrSizes}" -genome "${asmId}" -hubname "${asmId}" -out "${rmOutFile}" -align "${faAlign}"
-    awk -F$'\t' '$15 > -1 && $13 > -1' "$asmId.fa.align.tsv" | sort -k1,1 -k2,2n > t.tsv
-    rm -f "$asmId.fa.align.tsv"
-    mv t.tsv "$asmId.fa.align.tsv"
+    printf "time $RepeatMaskerPath/util/rmToTrackHub.pl -chromsizes \"${chrSizes}\" -genome \"${asmId}\" -hubname \"${asmId}\" -out \"${rmOutFile}\" -align \"${faAlign}\"\n" 1>&2
+    time $RepeatMaskerPath/util/rmToTrackHub.pl -chromsizes "${chrSizes}" -genome "${asmId}" -hubname "${asmId}" -out "${rmOutFile}" -align "${faAlign}"
     # in place same file sort using the -o output option
-#    sort -k1,1 -k2,2n -o "${asmId}.fa.align.tsv" "${asmId}.fa.align.tsv" &
+    sort -k1,1 -k2,2n -o "${asmId}.fa.align.tsv" "${asmId}.fa.align.tsv" &
   else
     printf "# there is no faAlign file\n" 1>&2
-    printf "time $RepeatMaskerPath/util/rmToTrackHub.pl -sizes \"${chrSizes}\" -genome \"${asmId}\" -hubname \"${asmId}\" -out \"${rmOutFile}\"\n" 1>&2
-    time $RepeatMaskerPath/util/rmToTrackHub.pl -sizes "${chrSizes}" -genome "${asmId}" -hubname "${asmId}" -out "${rmOutFile}"
+    printf "time $RepeatMaskerPath/util/rmToTrackHub.pl -chromsizes \"${chrSizes}\" -genome \"${asmId}\" -hubname \"${asmId}\" -out \"${rmOutFile}\"\n" 1>&2
+    time $RepeatMaskerPath/util/rmToTrackHub.pl -chromsizes "${chrSizes}" -genome "${asmId}" -hubname "${asmId}" -out "${rmOutFile}"
   fi
   sort -k1,1 -k2,2n -o "${asmId}.sorted.fa.join.tsv" "${asmId}.sorted.fa.join.tsv" &
   wait
   printf "bedToBigBed -tab -as=$HOME/kent/src/hg/lib/bigRmskBed.as -type=bed9+5
     \"${asmId}.sorted.fa.join.tsv\" \"${chrSizes}\"
       \"${asmId}.rmsk.bb\" &\n" 1>&2
   bedToBigBed -tab -as=$HOME/kent/src/hg/lib/bigRmskBed.as -type=bed9+5 \
     "${asmId}.sorted.fa.join.tsv" "${chrSizes}" \
       "${asmId}.rmsk.bb" &
   if [ -s "${asmId}.fa.align.tsv" ]; then
     printf "bedToBigBed -tab -as=$HOME/kent/src/hg/lib/bigRmskAlignBed.as
       -type=bed3+14 \"${asmId}.fa.align.tsv\" \"${chrSizes}\"
         \"${asmId}.rmsk.align.bb\" &\n" 1>&2
     bedToBigBed -tab -as=$HOME/kent/src/hg/lib/bigRmskAlignBed.as \
       -type=bed3+14 "${asmId}.fa.align.tsv" "${chrSizes}" \