f1b5eb49959cc16f05675a7f5e254bc787b99409 hiram Wed May 18 09:08:32 2022 -0700 better recognition of the align file presence no redmine diff --git src/hg/utils/automation/asmHubRepeatMasker.sh src/hg/utils/automation/asmHubRepeatMasker.sh index af63364..9164bf1 100755 --- src/hg/utils/automation/asmHubRepeatMasker.sh +++ src/hg/utils/automation/asmHubRepeatMasker.sh @@ -16,30 +16,33 @@ export dateStamp=`date "+%FT%T %s"` export asmId=$1 export rmOutFile=$2 export destDir=$3 export chrSizes="../../$asmId.chrom.sizes" # assume this file name pattern export faAlign=`echo "${rmOutFile}" | sed -e 's/sorted.fa.out/fa.align/; s/.gz//;'` export RepeatMaskerPath="/hive/data/staging/data/RepeatMasker210401" if [ -d "${destDir}" ]; then cd "${destDir}" + if [ ! -s "${faAlign}" ]; then + faAlign="${faAlign}.gz" + fi # align file only exists when RM has been run locally, not for NCBI version if [ -s "${faAlign}" ]; then $RepeatMaskerPath/util/rmToTrackHub.pl -out "${rmOutFile}" -align "${faAlign}" # in place same file sort using the -o output option sort -k1,1 -k2,2n -o "${asmId}.fa.align.tsv" "${asmId}.fa.align.tsv" & sort -k1,1 -k2,2n -o "${asmId}.sorted.fa.join.tsv" "${asmId}.sorted.fa.join.tsv" wait bedToBigBed -tab -as=$HOME/kent/src/hg/lib/bigRmskAlignBed.as \ -type=bed3+14 "${asmId}.fa.align.tsv" "${chrSizes}" \ "${asmId}.rmsk.align.bb" & bedToBigBed -tab -as=$HOME/kent/src/hg/lib/bigRmskBed.as -type=bed9+5 \ "${asmId}.sorted.fa.join.tsv" "${chrSizes}" \ "${asmId}.rmsk.bb" wait gzip "${asmId}.fa.align.tsv" &