66b1030888d2fbc504ed737350256b20442367c7 braney Fri Sep 3 10:43:22 2021 -0700 ongoing work on knownGene automation diff --git src/hg/utils/otto/knownGene/buildMafGene.sh src/hg/utils/otto/knownGene/buildMafGene.sh index 781a271..330f826 100755 --- src/hg/utils/otto/knownGene/buildMafGene.sh +++ src/hg/utils/otto/knownGene/buildMafGene.sh @@ -1,96 +1,98 @@ #!/bin/sh -ex cd $dir { +. ./buildEnv.sh + if test "$multizDir" == "" then echo "Must set multizDir to directory with multiz files in it" fi if test "$mz" == "" then echo "Must set mz to name of multiz track" fi mkdir -p $multizDir/mafGene.knownGene${GENCODE_VERSION} cd $multizDir/mafGene.knownGene${GENCODE_VERSION} cat $multizDir/species.list | tr '[ ]' '[\n]' > order.list export gp=knownGene export I=0 rm -rf exonAA exonNuc mkdir exonAA exonNuc for C in `sort -nk2 ../../../chrom.sizes | cut -f1` do I=`echo $I | awk '{print $1+1}'` echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &" echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &" if [ $I -gt 11 ]; then echo "date" echo "wait" I=0 fi done > $gp.jobs echo "date" >> $gp.jobs echo "wait" >> $gp.jobs time sh -x ./$gp.jobs time cat exonAA/*.gz > $gp.$mz.exonAA.fa.gz time cat exonNuc/*.gz > $gp.$mz.exonNuc.fa.gz export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments mkdir -p $pd rm -f $pd/$gp.exonAA.fa.gz $pd/$gp.exonNuc.fa.gz ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz export gp=knownCanonical export I=0 rm -rf exonAA exonNuc knownCanonical mkdir exonAA exonNuc knownCanonical time cut -f1 $multizDir/../../chrom.sizes | while read C do echo $C 1>&2 hgsql $db -N -e "select chrom, chromStart, chromEnd, transcript from knownCanonical where chrom='$C'" > knownCanonical/$C.known.bed done ls knownCanonical/*.known.bed | while read F do if [ -s $F ]; then echo $F | sed -e 's#knownCanonical/##; s/.known.bed//' fi done | while read C do echo "date" echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons -noTrans $db $mz knownGene order.list stdout | \ gzip -c > exonNuc/$C.exonNuc.fa.gz &" echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons $db $mz knownGene order.list stdout | \ gzip -c > exonAA/$C.exonAA.fa.gz &" if [ $I -gt 11 ]; then echo "date" echo "wait" I=0 fi done > $gp.$mz.jobs echo "date" >> $gp.$mz.jobs echo "wait" >> $gp.$mz.jobs time sh -x $gp.$mz.jobs cat exonAA/c*.gz > $gp.$mz.exonAA.fa.gz cat exonNuc/c*.gz > $gp.$mz.exonNuc.fa.gz rm -rf exonAA exonNuc knownCanonical export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments mkdir -p $pd rm -f $pd/$gp.exonAA.fa.gz $pd/$gp.exonNuc.fa.gz ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz #cd $pd #md5sum *.fa.gz > md5sum.txt echo "BuildMafGene successfully finished" } > doMafGene.log < /dev/null 2>&1