9f629656dd1bb546fb3e222ae28e9c68486936cb hiram Wed Dec 23 17:03:31 2020 -0800 ready for wiki summary pages and download README files refs #25864 diff --git src/hg/makeDb/doc/mm39/multiz35way.txt src/hg/makeDb/doc/mm39/multiz35way.txt index 4536e4a..fbe1705 100644 --- src/hg/makeDb/doc/mm39/multiz35way.txt +++ src/hg/makeDb/doc/mm39/multiz35way.txt @@ -1724,140 +1724,149 @@ ############################################################################# # construct download files for 35-way (TBD - 2015-04-15 - Hiram) mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/multiz35way mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phastCons35way mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phyloP35way mkdir /hive/data/genomes/mm39/bed/multiz35way/downloads cd /hive/data/genomes/mm39/bed/multiz35way/downloads mkdir multiz35way phastCons35way phyloP35way ######################################################################### ## create upstream refGene maf files cd /hive/data/genomes/mm39/bed/multiz35way/downloads/multiz35way # bash script #!/bin/sh -export geneTbl="refGene" +export geneTbl="ncbiRefSeq" for S in 300 2000 5000 do echo "making upstream${S}.maf" featureBits mm39 ${geneTbl}:upstream:${S} -fa=/dev/null -bed=stdout \ | perl -wpe 's/_up[^\t]+/\t0/' | sort -k1,1 -k2,2n \ | /cluster/bin/$MACHTYPE/mafFrags mm39 multiz35way \ stdin stdout \ -orgs=/hive/data/genomes/mm39/bed/multiz35way/species.list \ | gzip -c > upstream${S}.${geneTbl}.maf.gz echo "done upstream${S}.${geneTbl}.maf.gz" done - +XXX - running - Wed Dec 23 14:36:49 PST 2020 # real 88m40.730s -rw-rw-r-- 1 52659159 Nov 6 11:46 upstream300.ncbiRefSeq.maf.gz -rw-rw-r-- 1 451126665 Nov 6 12:15 upstream2000.ncbiRefSeq.maf.gz -rw-rw-r-- 1 1080533794 Nov 6 12:55 upstream5000.ncbiRefSeq.maf.gz ###################################################################### ## compress the maf files cd /hive/data/genomes/mm39/bed/multiz35way/downloads/multiz35way mkdir maf - rsync -a -P ../../anno/result/ ./maf/ - du -hsc maf/ - # 156G maf + time rsync -a -P ../../maf/ ./maf/ + # real 12m9.290s + + du -hscL maf/ ../../maf/ + # 141G maf/ + # 141G ../../maf/ + cd maf time gzip *.maf & - # real 135m1.784s +XXX - running - Wed Dec 23 14:55:47 PST 2020 + # real 81m10.239s - du -hscL maf ../../anno/result/ + du -hscL maf ../../maf/ # 18G maf cd maf md5sum *.maf.gz *.nh > md5sum.txt mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/multiz35way/maf cd maf ln -s `pwd`/* /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/multiz35way/maf cd -- ln -s `pwd`/*.maf.gz `pwd`/*.nh `pwd`/*.txt \ /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/multiz35way/ ########################################################################### cd /hive/data/genomes/mm39/bed/multiz35way/downloads/multiz35way grep TREE ../../4d/all.mod | awk '{print $NF}' \ | ~/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \ > mm39.35way.nh - ~/kent/src/hg/utils/phyloTrees/commonNames.sh mm39.35way.nh \ - | ~/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \ - > mm39.35way.commonNames.nh - ~/kent/src/hg/utils/phyloTrees/scientificNames.sh mm39.35way.nh \ - | $HOME/kent/src/hg/utils/phyloTrees/asciiTree.pl /dev/stdin \ + + sed -f ../../db.to.name.sed mm39.35way.nh \ + | sed -e "s#_x_#'#g; s#X__#X._#;" > mm39.35way.commonNames.nh + + sed -f ../../db.to.sciName.sed mm39.35way.nh \ > mm39.35way.scientificNames.nh + time md5sum *.nh *.maf.gz > md5sum.txt # real 0m3.147s ln -s `pwd`/*.maf.gz `pwd`/*.nh \ /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/multiz35way - du -hsc ./maf ../../anno/result + du -hscL ./maf ../../maf # 18G ./maf - # 156G ../../anno/result + # 156G ../../maf - # obtain the README.txt from mm39/multiz20way and update for this +XXX + # obtain the README.txt from danRer10/multiz12way and update for this # situation ln -s `pwd`/*.txt \ /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/multiz35way/ ##################################################################### cd /hive/data/genomes/mm39/bed/multiz35way/downloads/phastCons35way mkdir mm39.35way.phastCons cd mm39.35way.phastCons ln -s ../../../cons/all/downloads/*.wigFix.gz . md5sum *.gz > md5sum.txt cd /hive/data/genomes/mm39/bed/multiz35way/downloads/phastCons35way ln -s ../../cons/all/phastCons35way.bw ./mm39.phastCons35way.bw ln -s ../../cons/all/all.mod ./mm39.phastCons35way.mod time md5sum *.mod *.bw > md5sum.txt # real 0m20.354s +XXX # obtain the README.txt from mm39/phastCons20way and update for this mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phastCons35way/mm39.35way.phastCons cd mm39.35way.phastCons ln -s `pwd`/* /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phastCons35way/mm39.35way.phastCons cd .. # situation ln -s `pwd`/*.mod `pwd`/*.bw `pwd`/*.txt \ /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phastCons35way ##################################################################### cd /hive/data/genomes/mm39/bed/multiz35way/downloads/phyloP35way mkdir mm39.35way.phyloP cd mm39.35way.phyloP ln -s ../../../consPhyloP/all/downloads/*.wigFix.gz . md5sum *.wigFix.gz > md5sum.txt cd .. ln -s ../../consPhyloP/run.phyloP/all.mod mm39.phyloP35way.mod ln -s ../../consPhyloP/all/phyloP35way.bw mm39.phyloP35way.bw md5sum *.mod *.bw > md5sum.txt +XXX # obtain the README.txt from mm39/phyloP20way and update for this mkdir /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phyloP35way/mm39.35way.phyloP cd mm39.35way.phyloP ln -s `pwd`/* \ /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phyloP35way/mm39.35way.phyloP cd .. # situation ln -s `pwd`/*.mod `pwd`/*.bw `pwd`/*.txt \ /usr/local/apache/htdocs-hgdownload/goldenPath/mm39/phyloP35way ############################################################################# # hgPal downloads (TBD - 2017-11-06 - Hiram) # FASTA from 35-way for ncbiRefSeq, refGene and knownCanonical @@ -1882,58 +1891,55 @@ dNum=`echo $D | awk '{printf "%03d", int($1/300)}'` mkdir -p exonNuc/${dNum} > /dev/null mkdir -p exonAA/${dNum} > /dev/null echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/${dNum}/$C.exonNuc.fa.gz &" echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/${dNum}/$C.exonAA.fa.gz &" if [ $I -gt 16 ]; then echo "date" echo "wait" I=0 fi done > $gp.jobs echo "date" >> $gp.jobs echo "wait" >> $gp.jobs time (sh -x ./$gp.jobs) > $gp.jobs.log 2>&1 -XXX - running - Tue Dec 22 21:33:28 PST 2020 - # real 79m18.323s + # real 18m43.962s export mz=multiz35way export gp=ncbiRefSeq time find ./exonAA -type f | grep exonAA.fa.gz | xargs zcat \ | gzip -c > $gp.$mz.exonAA.fa.gz - # real 1m28.841s + # real 2m0.962s time find ./exonNuc -type f | grep exonNuc.fa.gz | xargs zcat \ | gzip -c > $gp.$mz.exonNuc.fa.gz - # real 3m56.370s + # real 10m12.351s - # -rw-rw-r-- 1 397928833 Nov 6 18:44 ncbiRefSeq.multiz35way.exonAA.fa.gz - # -rw-rw-r-- 1 580377720 Nov 6 18:49 ncbiRefSeq.multiz35way.exonNuc.fa.gz + # -rw-rw-r-- 1 906052407 Dec 23 16:34 ncbiRefSeq.multiz35way.exonAA.fa.gz + # -rw-rw-r-- 1 1596566489 Dec 23 16:53 ncbiRefSeq.multiz35way.exonNuc.fa.gz export mz=multiz35way export gp=ncbiRefSeq export db=mm39 export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments mkdir -p $pd + md5sum *.fa.gz > md5sum.txt ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz ln -s `pwd`/md5sum.txt $pd/ - cd $pd - md5sum *.fa.gz > md5sum.txt - rm -rf exonAA exonNuc ############################################################################# # wiki page for 35-way (TBD - 2017-11-06 - Hiram) mkdir /hive/users/hiram/bigWays/mm39.35way cd /hive/users/hiram/bigWays echo "mm39" > mm39.35way/ordered.list awk '{print $1}' /hive/data/genomes/mm39/bed/multiz35way/35way.distances.txt \ >> mm39.35way/ordered.list # sizeStats.sh catches up the cached measurements required for data # in the tables. They are usually already mostly done, only new # assemblies will have updates. ./sizeStats.sh mm39.35way/ordered.list # dbDb.sh constructs mm39.35way/XenTro9_35-way_conservation_alignment.html