3846c108e2ff699c4d1de31c4b38f7379e1cf9ee braney Sat Apr 6 07:51:15 2019 -0700 new CDS FASTA downloads in honor of Gencode VM20 release diff --git src/hg/makeDb/doc/mm10.txt src/hg/makeDb/doc/mm10.txt index 2108959..99e7fac 100644 --- src/hg/makeDb/doc/mm10.txt +++ src/hg/makeDb/doc/mm10.txt @@ -17731,15 +17731,153 @@ time (doSameSpeciesLiftOver.pl -verbose=2 \ -fileServer=hgwdev \ -query2Bit=/hive/data/genomes/mm10/mm10.2bit \ -querySizes=/hive/data/genomes/mm10/chrom.sizes \ -target2Bit=/hive/data/genomes/GRCm38B/GRCm38B.2bit \ -targetSizes=/hive/data/genomes/GRCm38B/chrom.sizes \ -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ -ooc=/hive/data/genomes/mm10/mm10.11.ooc \ mm10 GRCm38B) > doLiftOverToGRCm38B.log 2>&1 # real 156m50.777s # see if the liftOver menus function in the browser from mm10 to GRCm38B ######################################################################### +############################################################################# +# hgPal downloads (rebuilt knownGene and knownCanonical 2019-04-01 braney ) + + ssh hgwdev + mkdir /hive/data/genomes/mm10/bed/multiz60way/pal.ucsc18 + cd /hive/data/genomes/mm10/bed/multiz60way/pal.ucsc18 + cat ../species.list | tr '[ ]' '[\n]' > order.list + + export mz=multiz60way + export gp=knownGene + export db=mm10 + export I=0 + mkdir exonAA exonNuc + for C in `sort -nk2 ../../../chrom.sizes | cut -f1` + do + I=`echo $I | awk '{print $1+1}'` + echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &" + echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &" + if [ $I -gt 6 ]; then + echo "date" + echo "wait" + I=0 + fi + done > $gp.jobs + echo "date" >> $gp.jobs + echo "wait" >> $gp.jobs + + time sh -x ./$gp.jobs > $gp.jobs.log 2>&1 & + # real 59m23.279s + + time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz + # real 1m35.590s + time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz + # real 7m46.538s + + export mz=multiz60way + export gp=knownGene + export db=mm10 + export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments + rm -rf $pd + mkdir -p $pd + ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz + ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz + + rm -rf exonAA exonNuc + + cd /hive/data/genomes/mm10/bed/multiz60way/pal + export mz=multiz60way + export gp=ncbiRefSeq + export db=mm10 + export I=0 + mkdir exonAA exonNuc + for C in `sort -nk2 ../../../chrom.sizes | cut -f1` + do + I=`echo $I | awk '{print $1+1}'` + echo "mafGene -chrom=$C -exons -noTrans $db $mz $gp order.list stdout | gzip -c > exonNuc/$C.exonNuc.fa.gz &" + echo "mafGene -chrom=$C -exons $db $mz $gp order.list stdout | gzip -c > exonAA/$C.exonAA.fa.gz &" + if [ $I -gt 6 ]; then + echo "date" + echo "wait" + I=0 + fi + done > $gp.jobs + echo "date" >> $gp.jobs + echo "wait" >> $gp.jobs + + time sh -x $gp.jobs > $gp.jobs.log 2>&1 + # real 126m0.688s + + export mz=multiz60way + export gp=ncbiRefSeq + export db=mm10 + time zcat exonAA/*.gz | gzip -c > $gp.$mz.exonAA.fa.gz + # real 2m56.817s + time zcat exonNuc/*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz + # real 14m8.080s + + rm -rf exonAA exonNuc + + # we're only distributing exons at the moment + export mz=multiz60way + export gp=ncbiRefSeq + export db=mm10 + export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments + ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz + ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz + + ### And knownCanonical + cd /hive/data/genomes/mm10/bed/multiz60way/pal + export mz=multiz60way + export gp=knownCanonical + export db=mm10 + mkdir exonAA exonNuc knownCanonical + + time cut -f1 ../../../chrom.sizes | while read C + do + echo $C 1>&2 + hgsql mm10 -N -e "select chrom, chromStart, chromEnd, transcript from knownCanonical where chrom='$C'" > knownCanonical/$C.known.bed + done + # real 0m15.897s + + ls knownCanonical/*.known.bed | while read F + do + if [ -s $F ]; then + echo $F | sed -e 's#knownCanonical/##; s/.known.bed//' + fi + done | while read C + do + echo "date" + echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons -noTrans $db $mz knownGene order.list stdout | \ + gzip -c > exonNuc/$C.exonNuc.fa.gz" + echo "mafGene -geneBeds=knownCanonical/$C.known.bed -exons $db $mz knownGene order.list stdout | \ + gzip -c > exonAA/$C.exonAA.fa.gz" + done > $gp.$mz.jobs + + time sh -x $gp.$mz.jobs > $gp.$mz.job.log 2>&1 + # 267m58.813s + + rm *.known.bed + export mz=multiz60way + export gp=knownCanonical + export db=mm10 + zcat exonAA/c*.gz | gzip -c > $gp.$mz.exonAA.fa.gz & + zcat exonNuc/c*.gz | gzip -c > $gp.$mz.exonNuc.fa.gz & + # about 6 minutes + + rm -rf exonAA exonNuc + + export mz=multiz60way + export gp=knownCanonical + export db=mm10 + export pd=/usr/local/apache/htdocs-hgdownload/goldenPath/$db/$mz/alignments + mkdir -p $pd + ln -s `pwd`/$gp.$mz.exonAA.fa.gz $pd/$gp.exonAA.fa.gz + ln -s `pwd`/$gp.$mz.exonNuc.fa.gz $pd/$gp.exonNuc.fa.gz + + cd $pd + md5sum *.fa.gz > md5sum.txt