180fdd5b8ed0b1d93cde304e58816ee64eb9f7f0 hiram Mon Aug 17 12:22:20 2020 -0700 have 2bit masked with custom repeat library refs #23367 diff --git src/hg/makeDb/doc/ambMex2/initialBuild.txt src/hg/makeDb/doc/ambMex2/initialBuild.txt index d5a42df..13ce20c 100644 --- src/hg/makeDb/doc/ambMex2/initialBuild.txt +++ src/hg/makeDb/doc/ambMex2/initialBuild.txt @@ -221,69 +221,72 @@ # temporary symlink until masked sequence is available cd /hive/data/genomes/ambMex2 ln -s `pwd`/ambMex2.unmasked.2bit /gbdb/ambMex2/ambMex2.2bit ############################################################################## # cpgIslands on UNMASKED sequence (TBD - 2018-10-11 - Hiram) mkdir /hive/data/genomes/ambMex2/bed/cpgIslandsUnmasked cd /hive/data/genomes/ambMex2/bed/cpgIslandsUnmasked time (doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \ -tableName=cpgIslandExtUnmasked \ -maskedSeq=/hive/data/genomes/ambMex2/ambMex2.unmasked.2bit \ -workhorse=hgwdev -smallClusterHub=ku ambMex2) > do.log 2>&1 XXX - running - Fri Apr 12 23:24:42 PDT 2019 +XXX - something is too large: +MALLOC failure reqesting -2147483648 bytes - aborting + # real 2m11.881s cat fb.ambMex2.cpgIslandExtUnmasked.txt # 27399280 bases of 1055588482 (2.596%) in intersection ############################################################################# # cytoBandIdeo - (DONE - 2019-04-12 - Hiram) mkdir /hive/data/genomes/ambMex2/bed/cytoBand cd /hive/data/genomes/ambMex2/bed/cytoBand makeCytoBandIdeo.csh ambMex2 ############################################################################# -# run up idKeys files for chromAlias/ncbiRefSeq (DONE - 2019-04-12 - Hiram) +# run up idKeys files for chromAlias/ncbiRefSeq (DONE - 2019-04-15 - Hiram) mkdir /hive/data/genomes/ambMex2/bed/idKeys cd /hive/data/genomes/ambMex2/bed/idKeys time (doIdKeys.pl \ -twoBit=/hive/data/genomes/ambMex2/ambMex2.unmasked.2bit \ -buildDir=`pwd` ambMex2) > do.log 2>&1 & -XXX - running - Fri Apr 12 23:26:32 PDT 2019 - # real 0m47.105s + # real 29m20.505s cat ambMex2.keySignature.txt - # 7850e2d5dabb6134fdc9d7083f1a3a54 + # 72abcdcc8a28b54cad2ff751c3494bed ############################################################################# -# gapOverlap (DONE - 2019-04-12 - Hiram) +# gapOverlap (DONE - 2019-04-15 - Hiram) mkdir /hive/data/genomes/ambMex2/bed/gapOverlap cd /hive/data/genomes/ambMex2/bed/gapOverlap time (doGapOverlap.pl \ -twoBit=/hive/data/genomes/ambMex2/ambMex2.unmasked.2bit ambMex2 ) \ > do.log 2>&1 & -XXX - running - Fri Apr 12 23:26:32 PDT 2019 - # real 1m40.205s + # real 4m30.732s - # results are empty, there are none found. + # only a few: + wc -l bed.tab + # 64 bed.tab cat fb.ambMex2.gapOverlap.txt - # 97216 bases of 2615516299 (0.004%) in intersection + # 16776 bases of 32396387346 (0.000%) in intersection ############################################################################# # tandemDups (DONE - 2019-04-12 - Hiram) mkdir /hive/data/genomes/ambMex2/bed/tandemDups cd /hive/data/genomes/ambMex2/bed/tandemDups time (~/kent/src/hg/utils/automation/doTandemDup.pl \ -twoBit=/hive/data/genomes/ambMex2/ambMex2.unmasked.2bit ambMex2) \ > do.log 2>&1 & XXX - running - Fri Apr 12 23:26:32 PDT 2019 # real 97m29.383s cat fb.ambMex2.tandemDups.txt # 24887623 bases of 1065365425 (2.336%) in intersection bigBedInfo ambMex2.tandemDups.bb | sed -e 's/^/# /;' @@ -461,187 +464,270 @@ searchTable gold shortCircuit 1 termRegex [AN][AC][D0-9_][N0-9][0-9]+(\.[0-9]+)? query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%' searchPriority 8 # verify searches work in the position box ########################################################################## # running repeat masker (DONE - 2018-04-12 - Hiram) mkdir /hive/data/genomes/ambMex2/bed/repeatMasker cd /hive/data/genomes/ambMex2/bed/repeatMasker time (doRepeatMasker.pl -buildDir=`pwd` \ -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ -smallClusterHub=ku ambMex2) > do.log 2>&1 -XXX - running - Fri Apr 12 23:27:57 PDT 2019 - # real 48m25.181s + # real 216m7.175s cat faSize.rmsk.txt -# 1065365425 bases (9784466 N's 1055580959 real 922186059 upper -# 133394900 lower) in 464 sequences in 1 files -# Total size: mean 2296046.2 sd 14494999.8 min 87 (chrUn_NW_020109844v1) -# max 197608386 (chr1) median 10066 -# %12.52 masked total, %12.64 masked real +# 32396387346 bases (4029676509 N's 28366710837 real 28112571951 upper +# 254138886 lower) in 98071 sequences in 1 files +# Total size: mean 330336.1 sd 20104017.6 min 1033 (chrUn_PGSH01113832v1) +# max 2030161756 (chr7) median 40920 +# %0.78 masked total, %0.90 masked real egrep -i "versi|relea" do.log - # RepeatMasker version open-4.0.7 - # February 01 2017 (open-4-0-7) 1.331 version of RepeatMasker - # CC Dfam_Consensus RELEASE 20170127; * - # CC RepBase RELEASE 20170127; +# RepeatMasker version development-$Id: RepeatMasker,v 1.332 2017/04/17 19:01:11 rhubley Exp $ +# February 01 2017 (open-4-0-8) 1.332 version of RepeatMasker +# CC Dfam_Consensus RELEASE 20181026; * +# CC RepBase RELEASE 20181026; +XXX - this standard run is useless, note the custom library used next procedure time featureBits -countGaps ambMex2 rmsk # 133395265 bases of 1065365425 (12.521%) in intersection # real 0m4.226s # why is it different than the faSize above ? # because rmsk masks out some N's as well as bases, the faSize count above # separates out the N's from the bases, it doesn't show lower case N's # faster way to get the same result on high contig count assemblies: time hgsql -N -e 'select genoName,genoStart,genoEnd from rmsk;' ambMex2 \ | bedSingleCover.pl stdin | ave -col=4 stdin | grep "^total" # total 133395265.000000 # real 0m3.198s -########################################################################## -# running simple repeat (DONE - 2019-04-12 - Hiram) +############################################################################### +# running repeat masker (DONE - 2020-06-19 - 2020-08-15 - Hiram) + # using a custom library from Jermiah Smith they developed with + # Repeat Modeller + + mkdir /hive/data/genomes/ambMex2/bed/repeatModeler + cd /hive/data/genomes/ambMex2/bed/repeatModeler + + # note the file used for customLib, this took almost two months running + # time with little interference on the ku kluster + + doRepeatMasker.pl -buildDir=`pwd` -customLib=`pwd`/LTRs_all_repeats.fa \ + -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ + -smallClusterHub=hgwdev ambMex2 + cat run.cluster/run.time +# Completed: 65638 of 65638 jobs +# CPU time in finished jobs: 4047318392s 67455306.53m 1124255.11h 46843.96d 128.340 y +# IO & Wait Time: 11101559s 185025.99m 3083.77h 128.49d 0.352 y +# Average job time: 61830s 1030.51m 17.18h 0.72d +# Longest finished job: 77503s 1291.72m 21.53h 0.90d +# Submission to last job: 4811964s 80199.40m 1336.66h 55.69d + + # continuing after the kluster run is complete: + doRepeatMasker.pl -buildDir=`pwd` -customLib=`pwd`/LTRs_all_repeats.fa \ + -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ + -continue=cat -smallClusterHub=hgwdev ambMex2 + # real 329m25.992s + + # much better result with this custom library: + cat faSize.rmsk.txt +# 32396387346 bases (4029676509 N's 28366710837 real 10003444277 upper +# 18363266560 lower) in 98071 sequences in 1 files +# Total size: mean 330336.1 sd 20104017.6 min 1033 (chrUn_PGSH01113832v1) +# max 2030161756 (chr7) median 40920 +# %56.68 masked total, %64.74 masked real + + egrep -i "versi|relea" do.log +# RepeatMasker version development-$Id: RepeatMasker,v 1.332 2017/04/17 19:01:11 rhubley Exp $ +# CC Dfam_Consensus RELEASE 20181026; * +# CC RepBase RELEASE 20181026; * + + time featureBits -countGaps ambMex2 rmsk + # 18368951822 bases of 32396387346 (56.701%) in intersection + # real 4m34.562s + + # why is it different than the faSize above ? + # because rmsk masks out some N's as well as bases, the faSize count above + # separates out the N's from the bases, it doesn't show lower case N's + + # faster way to get the same result on high contig count assemblies: + time hgsql -N -e 'select genoName,genoStart,genoEnd from rmsk;' ambMex2 \ + | bedSingleCover.pl stdin | ave -col=4 stdin | grep "^total" + # total 18368951822.000000 + # real 2m8.428s + +############################################################################### +# running simple repeat (DONE - 2019-04-15 - Hiram) mkdir /hive/data/genomes/ambMex2/bed/simpleRepeat cd /hive/data/genomes/ambMex2/bed/simpleRepeat time (doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=ku \ -dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=ku \ -trf409=6 ambMex2) > do.log 2>&1 -XXX - running - Fri Apr 12 23:28:56 PDT 2019 - # real 58m3.288s + # real 30m12.201s cat fb.simpleRepeat - # 31110690 bases of 1055588482 (2.947%) in intersection + # 1399134851 bases of 32393621946 (4.319%) in intersection cd /hive/data/genomes/ambMex2 - # using the Window Masker result: + # if using the Window Masker result: cd /hive/data/genomes/ambMex2 twoBitMask bed/windowMasker/ambMex2.cleanWMSdust.2bit \ -add bed/simpleRepeat/trfMask.bed ambMex2.2bit # you can safely ignore the warning about fields >= 13 - # add to rmsk after it is done: -# twoBitMask ambMex2.rmsk.2bit \ -# -add bed/simpleRepeat/trfMask.bed ambMex2.2bit + # or using RepeatMasker result add to rmsk after it is done: + twoBitMask ambMex2.rmsk.2bit \ + -add bed/simpleRepeat/trfMask.bed ambMex2.2bit # you can safely ignore the warning about fields >= 13 twoBitToFa ambMex2.2bit stdout | faSize stdin > faSize.ambMex2.2bit.txt cat faSize.ambMex2.2bit.txt -# 1065365425 bases (9784466 N's 1055580959 real 829559086 upper -# 226021873 lower) in 464 sequences in 1 files -# Total size: mean 2296046.2 sd 14494999.8 min 87 (chrUn_NW_020109844v1) -# max 197608386 (chr1) median 10066 -# %21.22 masked total, %21.41 masked real +# 32396387346 bases (4029676509 N's 28366710837 real 9998218507 upper +# 18368492330 lower) in 98071 sequences in 1 files +# Total size: mean 330336.1 sd 20104017.6 min 1033 (chrUn_PGSH01113832v1) +# max 2030161756 (chr7) median 40920 +# %56.70 masked total, %64.75 masked real rm /gbdb/ambMex2/ambMex2.2bit ln -s `pwd`/ambMex2.2bit /gbdb/ambMex2/ambMex2.2bit ######################################################################### -# CREATE MICROSAT TRACK (TBD - 2018-10-11 - Hiram) +# CREATE MICROSAT TRACK (DONE - 2020-08-17 - Hiram) ssh hgwdev mkdir /cluster/data/ambMex2/bed/microsat cd /cluster/data/ambMex2/bed/microsat awk '($5==2 || $5==3) && $6 >= 15 && $8 == 100 && $9 == 0 {printf("%s\t%s\t%s\t%dx%s\n", $1, $2, $3, $6, $16);}' \ ../simpleRepeat/simpleRepeat.bed > microsat.bed hgLoadBed ambMex2 microsat microsat.bed - # Read 1745 elements of size 4 from microsat.bed + # Read 56937 elements of size 4 from microsat.bed ########################################################################## ## WINDOWMASKER (DONE - 2019-04-15 - Hiram) - + # Odd result here, WM masked all but 703 bases ? mkdir /hive/data/genomes/ambMex2/bed/windowMasker cd /hive/data/genomes/ambMex2/bed/windowMasker time (doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \ -dbHost=hgwdev ambMex2) > do.log 2>&1 -XXX - running - Mon Apr 15 22:55:39 PDT 2019 - # real 26m58.753s + # real 1747m17.123s # Masking statistics cat faSize.ambMex2.cleanWMSdust.txt -# 1065365425 bases (9784466 N's 1055580959 real 830149186 upper -# 225431773 lower) in 464 sequences in 1 files -# Total size: mean 2296046.2 sd 14494999.8 min 87 (chrUn_NW_020109844v1) -# max 197608386 (chr1) median 10066 -# %21.16 masked total, %21.36 masked real +# 32396387346 bases (4029676509 N's 28366710837 real 703 upper 28366710134 +# lower) in 98071 sequences in 1 files +# Total size: mean 330336.1 sd 20104017.6 min 1033 (chrUn_PGSH01113832v1) +# max 2030161756 (chr7) median 40920 +# %87.56 masked total, %100.00 masked real cat fb.ambMex2.rmsk.windowmaskerSdust.txt - # 86091413 bases of 1065365425 (8.081%) in intersection + # 18368939458 bases of 32396387346 (56.701%) in intersection ########################################################################## # cpgIslands - (TBD - 2018-10-11 - Hiram) mkdir /hive/data/genomes/ambMex2/bed/cpgIslands cd /hive/data/genomes/ambMex2/bed/cpgIslands time (doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku \ -workhorse=hgwdev -smallClusterHub=ku ambMex2) > do.log 2>&1 # real 2m5.105s cat fb.ambMex2.cpgIslandExt.txt # 16395346 bases of 1055588482 (1.553%) in intersection ############################################################################## -# genscan - (TBD - 2018-10-11 - Hiram) +# genscan - (DONE - 2020-08-17 - Hiram) +XXX - waiting for ku to return after power fails - Mon Aug 17 12:11:48 PDT 2020 mkdir /hive/data/genomes/ambMex2/bed/genscan cd /hive/data/genomes/ambMex2/bed/genscan time (doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \ -bigClusterHub=ku ambMex2) > do.log 2>&1 # real 88m34.900s cat fb.ambMex2.genscan.txt # 23911678 bases of 1055588482 (2.265%) in intersection cat fb.ambMex2.genscanSubopt.txt # 24521608 bases of 1055588482 (2.323%) in intersection ######################################################################### -# Create kluster run files (TBD - 2018-10-11 - Hiram) +# Create kluster run files (DONE - 2020-08-17 - Hiram) # numerator is ambMex2 gapless bases "real" as reported by: featureBits -noRandom -noHap ambMex2 gap - # 9758843 bases of 1040397755 (0.938%) in intersection + # 2765400 bases of 27505544706 (0.010%) in intersection # ^^^ # denominator is hg19 gapless bases as reported by: # featureBits -noRandom -noHap hg19 gap # 234344806 bases of 2861349177 (8.190%) in intersection # 1024 is threshold used for human -repMatch: - calc \( 1040397755 / 2861349177 \) \* 1024 - # ( 1040397755 / 2861349177 ) * 1024 = 372.330406 + calc \( 27505544706 / 2861349177 \) \* 1024 + # ( 27505544706 / 2861349177 ) * 1024 = 9843.495511 - # ==> use -repMatch=350 according to size scaled down from 1024 for human. - # and rounded down to nearest 50 + # ==> use -repMatch=9000 according to size scaled up from 1024 for human. + # and rounded down to nearest 1000 + # experiment with 9000, 8000, 7000 - using 7000 as it makes a + # reasonable number cd /hive/data/genomes/ambMex2 - blat ambMex2.2bit \ + time blat ambMex2.2bit \ /dev/null /dev/null -tileSize=11 -makeOoc=jkStuff/ambMex2.11.ooc \ - -repMatch=350 - # Wrote 18169 overused 11-mers to jkStuff/ambMex2.11.ooc - - # check non-bridged gaps to see what the typical size is: - hgsql -N \ - -e 'select * from gap where bridge="no" order by size;' ambMex2 \ - | sort -k7,7nr | ave -col=7 stdin - # minimum gap size is 10 and produces a reasonable number of lifts - gapToLift -verbose=2 -minGap=10 ambMex2 jkStuff/nonBridged.lft \ - -bedFile=jkStuff/nonBridged.bed - wc -l jkStuff/nonBri* - # 525 jkStuff/nonBridged.bed - # 525 jkStuff/nonBridged.lft + -repMatch=7000 + # real 4m11.198s + + # at repMatch 9000 + # Wrote 9042 overused 11-mers to jkStuff/ambMex2.11.ooc + # at repMatch 8000 + # Wrote 13163 overused 11-mers to jkStuff/ambMex2.11.ooc + # at repMatch 7000 + # Wrote 20332 overused 11-mers to jkStuff/ambMex2.11.ooc + + # there are no non-bridged gaps + hgsql -N -e 'select bridge from gap;' ambMex2 | sort | uniq -c + # 27654 yes + # survey gap sizes: + # all gaps are size 100 + hgsql -N -e 'select size from gap where bridge="yes" order by size;' \ + ambMex2 | ave stdin | sed -e 's/^/# /;' +# Q1 100.000000 +# median 100.000000 +# Q3 100.000000 +# average 100.000000 +# min 100.000000 +# max 100.000000 +# count 27654 +# total 2765400.000000 +# standard deviation 0.000000 + + # minimum gap size is 100: + gapToLift -verbose=2 -minGap=100 ambMex2 jkStuff/ambMex2.100baseGaps.lft \ + -allowBridged -bedFile=jkStuff/ambMex2.100baseGaps.bed + wc -l jkStuff/ambMex* + # 125725 jkStuff/ambMex2.100baseGaps.bed + # 125725 jkStuff/ambMex2.100baseGaps.lft + + # to see the gaps used: + bedInvert.pl chrom.sizes jkStuff/ambMex2.100baseGaps.bed | less + # and their sizes: + bedInvert.pl chrom.sizes jkStuff/ambMex2.100baseGaps.bed \ + | cut -f4 | sort -n | uniq -c | less + # 27654 100 ######################################################################## # lastz/chain/net swap human/hg38 (TBD - 2018-10-12 - Hiram) # original alignment cd /hive/data/genomes/hg38/bed/lastzAmbMex2.2018-10-12 cat fb.hg38.chainAmbMex2Link.txt # 154079940 bases of 3095998939 (4.977%) in intersection cat fb.hg38.chainSynAmbMex2Link.txt # 95877644 bases of 3095998939 (3.097%) in intersection cat fb.hg38.chainRBest.AmbMex2.txt # 106665747 bases of 3095998939 (3.445%) in intersection # and for the swap: mkdir /hive/data/genomes/ambMex2/bed/blastz.hg38.swap @@ -692,78 +778,79 @@ cat fb.ambMex2.chainMm10Link.txt # 88539346 bases of 1055588482 (8.388%) in intersection time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` ambMex2 mm10) > rbest.log 2>&1 & # real 94m11.007s cat fb.ambMex2.chainRBest.Mm10.txt # 79474812 bases of 1055588482 (7.529%) in intersection ######################################################################### # GENBANK AUTO UPDATE (TBD - 2018-10-12 - Hiram) ssh hgwdev cd $HOME/kent/src/hg/makeDb/genbank git pull # /cluster/data/genbank/data/organism.lst shows: - # #organism mrnaCnt estCnt refSeqCnt - # Gallus gallus 30708 600485 6392 + # organism mrnaCnt estCnt refSeqCnt + # Ambystoma mexicanum 7749 43323 0 - # edit etc/genbank.conf to add ambMex2 just before galGal5 + # edit etc/genbank.conf to add ambMex2 just before ambMex2 -# ambMex2 (chicken/GCF_000002315.5_GRCg6a) +# ambMex2 (Axolotl - Ambystoma mexicanum) GCA_002915635.2 - 30Gb total ambMex2.serverGenome = /hive/data/genomes/ambMex2/ambMex2.2bit -ambMex2.clusterGenome = /hive/data/genomes/ambMex2/ambMex2.2bit ambMex2.ooc = /hive/data/genomes/ambMex2/jkStuff/ambMex2.11.ooc -ambMex2.lift = /hive/data/genomes/ambMex2/jkStuff/nonBridged.lft +ambMex2.lift = /hive/data/genomes/ambMex2/jkStuff/ambMex2.100baseGaps.lft ambMex2.perChromTables = no -ambMex2.refseq.mrna.native.pslCDnaFilter = ${finished.refseq.mrna.native.pslCDnaFilter} -ambMex2.refseq.mrna.xeno.pslCDnaFilter = ${finished.refseq.mrna.xeno.pslCDnaFilter} -ambMex2.genbank.mrna.native.pslCDnaFilter = ${finished.genbank.mrna.native.pslCDnaFilter} -ambMex2.genbank.mrna.xeno.pslCDnaFilter = ${finished.genbank.mrna.xeno.pslCDnaFilter} -ambMex2.genbank.est.native.pslCDnaFilter = ${finished.genbank.est.native.pslCDnaFilter} -ambMex2.genbank.est.xeno.pslCDnaFilter = ${finished.genbank.est.xeno.pslCDnaFilter} -ambMex2.refseq.mrna.native.load = yes -ambMex2.refseq.mrna.xeno.load = yes -ambMex2.genbank.mrna.xeno.load = yes ambMex2.downloadDir = ambMex2 -# ambMex2.upstreamGeneTbl = refGene -# ambMex2.upstreamMaf = multiz7way /hive/data/genomes/galGal4/bed/multiz7way/species.lst +ambMex2.refseq.mrna.xeno.pslCDnaFilter = ${ordered.refseq.mrna.xeno.pslCDnaFilter} +ambMex2.refseq.mrna.native.pslCDnaFilter = ${ordered.refseq.mrna.native.pslCDnaFilter} +ambMex2.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter} +ambMex2.genbank.mrna.xeno.pslCDnaFilter = ${ordered.genbank.mrna.xeno.pslCDnaFilter} +ambMex2.genbank.est.native.pslCDnaFilter = ${ordered.genbank.est.native.pslCDnaFilter} +ambMex2.genbank.est.xeno.pslCDnaFilter = ${ordered.genbank.est.xeno.pslCDnaFilter} +# defaults yes: genbank.mrna.native.load genbank.mrna.native.loadDesc +# yes: genbank.est.native.load refseq.mrna.native.load +# yes: refseq.mrna.native.loadDesc refseq.mrna.xeno.load +# yes: refseq.mrna.xeno.loadDesc +# defaults no: genbank.mrna.xeno.load genbank.mrna.xeno.loadDesc +# no: genbank.est.native.loadDesc genbank.est.xeno.load +# no: genbank.est.xeno.loadDesc +# DO NOT NEED genbank.mrna.xeno except for human, mouse +# ambMex2.upstreamGeneTbl = ensGene +# ambMex2.upstreamMaf = multiz6way /hive/data/genomes/ambMex2/bed/multiz6way/species.list # verify the files specified exist before checking in the file: grep ^ambMex2 etc/genbank.conf | grep hive | awk '{print $NF}' | xargs ls -og -# -rw-rw-r-- 1 313201328 Oct 11 15:51 /hive/data/genomes/ambMex2/ambMex2.2bit -# -rw-rw-r-- 1 313201328 Oct 11 15:51 /hive/data/genomes/ambMex2/ambMex2.2bit -# -rw-rw-r-- 1 72684 Oct 11 15:56 /hive/data/genomes/ambMex2/jkStuff/ambMex2.11.ooc -# -rw-rw-r-- 1 29513 Oct 11 15:57 /hive/data/genomes/ambMex2/jkStuff/nonBridged.lft +-rw-rw-r-- 1 8271637678 Aug 17 10:51 /hive/data/genomes/ambMex2/ambMex2.2bit +-rw-rw-r-- 1 7002521 Aug 17 12:05 /hive/data/genomes/ambMex2/jkStuff/ambMex2.100baseGaps.lft +-rw-rw-r-- 1 81336 Aug 17 11:46 /hive/data/genomes/ambMex2/jkStuff/ambMex2.11.ooc - git commit -m "Added ambMex2; refs #22113" etc/genbank.conf + git commit -m "Added ambMex2; refs #23367" etc/genbank.conf git push # update /cluster/data/genbank/: make etc-update # enable daily alignment and update of hgwdev cd ~/kent/src/hg/makeDb/genbank git pull # add ambMex2 to: # etc/align.dbs etc/hgwdev.dbs - git add etc/align.dbs etc/hgwdev.dbs - git commit -m "Added ambMex2 - chicken refs #22113" etc/hgwdev.dbs + git commit -m "Added ambMex2 refs #23367" etc/hgwdev.dbs etc/align.dbs git push make etc-update - # wait a few days for genbank magic to take place, the tracks will - # appear + # Notify Chris this is ready to go 2020-08-17 ############################################################################# # augustus gene track (TBD - 2018-10-12 - Hiram) mkdir /hive/data/genomes/ambMex2/bed/augustus cd /hive/data/genomes/ambMex2/bed/augustus time (doAugustus.pl -buildDir=`pwd` -bigClusterHub=ku \ -species=chicken -dbHost=hgwdev \ -workhorse=hgwdev ambMex2) > do.log 2>&1 # real 48m48.597s cat fb.ambMex2.augustusGene.txt # 25827925 bases of 1055588482 (2.447%) in intersection ######################################################################### @@ -914,31 +1001,31 @@ # real 307m41.143s ######################################################################### # all.joiner update, downloads and in pushQ - (TBD - 2018-10-17 - Hiram) xyz cd $HOME/kent/src/hg/makeDb/schema # verify all the business is done for release ~/kent/src/hg/utils/automation/verifyBrowser.pl ambMex2 # fixup all.joiner until this is a clean output joinerCheck -database=ambMex2 -tableCoverage all.joiner joinerCheck -database=ambMex2 -times all.joiner joinerCheck -database=ambMex2 -keys all.joiner # when clean, check in: - git commit -m 'adding rules for ambMex2 refs #22113' all.joiner + git commit -m 'adding rules for ambMex2 refs #23367' all.joiner git push # run up a 'make alpha' in hg/hgTables to get this all.joiner file # into the hgwdev/genome-test system cd /hive/data/genomes/ambMex2 time (makeDownloads.pl ambMex2) > downloads.log 2>&1 # real 10m7.605s # now ready for pushQ entry mkdir /hive/data/genomes/ambMex2/pushQ cd /hive/data/genomes/ambMex2/pushQ time (makePushQSql.pl -redmineList ambMex2) > ambMex2.pushQ.sql 2> stderr.out # real 9m58.779s # remove the extra chainNet files from the listings: