4deb0c9ffddc09de17d71a387cec95c9a1cbfc14 angie Thu Feb 20 11:30:48 2020 -0800 Oops, need to ensure sortedness of db tables after adding patch sequences - thx Jairo. refs #24648 diff --git src/hg/makeDb/doc/hg19.addChrMT.txt src/hg/makeDb/doc/hg19.addChrMT.txt index 1307af0..c49696b 100644 --- src/hg/makeDb/doc/hg19.addChrMT.txt +++ src/hg/makeDb/doc/hg19.addChrMT.txt @@ -1,462 +1,538 @@ # for emacs: -*- mode: sh; -*- # This file describes how hg19 was extended with the rCRS mitochondrial sequence (NC_012920) as chrMT # Redmine #24648 ############################################################################## # Get NC_012920.1 sequence, rename to chrMT. (DONE - 2020-01-14 - Angie) mkdir /hive/data/genomes/hg19/MT cd /hive/data/genomes/hg19/MT wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Homo_sapiens/current/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_assembly_structure/non-nuclear/assembled_chromosomes/FASTA/chrMT.fna.gz wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Homo_sapiens/current/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_assembly_structure/non-nuclear/assembled_chromosomes/AGP/chrMT.comp.agp.gz zcat chrMT.fna.gz \ | sed -e 's/^>NC_012920.1.*/>chrMT/' \ > chrMT.fa zcat chrMT.comp.agp.gz \ | grep -v ^# \ | sed -e 's/^NC_012920.1/chrMT/' \ > chrMT.agp ############################################################################## # Basic annotations (makeGenomeDb.pl, masking) (DONE - 2020-01-14 - Angie) # For patches, we make a new-patch-only "db" like grcH37P13 and run makeGenomeDb.pl, # doRepeatMasker.pl etc. to generate the usual set of basic annotations. # chrMT is so little, just run the base commands instead of the big cluster run pipelines. # Note, we're not loading the database tables just yet; do that after 2bit and chromInfo. # We need to mask the sequence before we can update the 2bit # Add AGP info to gold table (nothing to add to gap). cd /hive/data/genomes/hg19 hgGoldGapGl -noLoad -noGl -chrom=chrMT hg19 . . ls -l chrMT*tab #-rw-rw-r-- 1 angie genecats 0 Jan 14 13:37 chrMT_gap.tab #-rw-rw-r-- 1 angie genecats 44 Jan 14 13:37 chrMT_gold.tab # RepeatMasker cd /hive/data/genomes/hg19/MT /scratch/data/RepeatMasker/RepeatMasker -engine crossmatch -s -align -species 'Homo sapiens' \ chrMT.fa hgLoadOut -tabFile=chrMT.rmsk.tab hg19 chrMT.fa.out wc -l chrMT.rmsk.tab #5 chrMT.rmsk.tab # SimpleRepeat (no results) trfBig -trf=/cluster/bin/$MACHTYPE/trf \ chrMT.fa /dev/null -bedAt=chrMT.trf.bed -tempDir=/dev/shm ls -l chrMT.trf.bed #-rw-rw-r-- 1 angie genecats 0 Jan 14 14:00 chrMT.trf.bed # WindowMasker & SDust windowmasker -ustat ../bed/windowMasker/windowmasker.counts \ -input chrMT.fa -output windowmasker.intervals perl -wpe 'if (s/^>lcl\|(.*)\n$//) { $chr = $1; } if (/^(\d+) - (\d+)/) { $s=$1; $e=$2+1; s/(\d+) - (\d+)/$chr\t$s\t$e/; }' windowmasker.intervals > windowmasker.bed wc -l windowmasker.bed #28 windowmasker.bed windowmasker -ustat ../bed/windowMasker/windowmasker.counts -sdust true -input chrMT.fa \ -output windowmasker.sdust.intervals perl -wpe 'if (s/^>lcl\|(.*)\n$//) { $chr = $1; $chr =~ s/\s*$//; } if (/^(\d+) - (\d+)/) { $i++; $s=$1; $e=$2+1; s/(\d+) - (\d+)/$chr\t$s\t$e\t$chr.$i/; }' windowmasker.sdust.intervals > windowmasker.sdust.bed wc -l windowmasker.sdust.bed #36 windowmasker.sdust.bed # Make masked and unmasked .2bit (just RepeatMasker, nothing to add from TRF/SimpleRepeat) # chrMT.fa is unmasked, but just to be safe: faToTwoBit -noMask chrMT.fa chrMT.unmasked.2bit twoBitMask chrMT.unmasked.2bit chrMT.fa.out chrMT.masked.2bit # hgGcPercent echo -e "chrMT\t16569" > chrMT.sizes hgGcPercent -wigOut -doGaps -file=stdout -win=5 -verbose=0 hg19 chrMT.unmasked.2bit \ | wigToBigWig stdin chrMT.sizes chrMT.gc5Base.bw # CpG Islands (none) twoBitToFa chrMT.masked.2bit stdout \ | maskOutFa stdin hard chrMT.hardMasked.fa /hive/data/staging/data/cpgIslandExt/cpglh chrMT.hardMasked.fa > chrMT.cpg wc -l chrMT.cpg #0 chrMT.cpg # GenScan has a couple predictions on chrM: hgsql hg19 -NBe 'select count(*) from genscan where chrom = "chrM";' #2 gsBig chrMT.hardMasked.fa chrMT.genscan.gtf -trans=chrMT.genscan.pep \ -subopt=chrMT.genscan.subopt.bed \ -exe=/hive/data/staging/data/genscan/genscan \ -par=/hive/data/staging/data/genscan/HumanIso.smat -tmp=/dev/shm -window=2400000 wc -l chrMT.genscan* # 3 chrMT.genscan.gtf # 8 chrMT.genscan.pep # 4 chrMT.genscan.subopt.bed # Augustus (none for chrM, don't bother) hgsql hg19 -NBe 'select count(*) from augustusGene where chrom = "chrM";' #0 ############################################################################## # Extend main database 2bit, chrom.sizes, chromInfo (TODO - 2020-01-15 - Angie) # main 2bit cd /hive/data/genomes/hg19 time faToTwoBit <(twoBitToFa hg19.2bit stdout) <(twoBitToFa MT/chrMT.masked.2bit stdout) \ hg19.p13.plusMT.2bit #real 2m13.176s # unmasked 2bit time twoBitMask -type=.bed hg19.p13.plusMT.2bit /dev/null hg19.p13.plusMT.unmasked.2bit #real 0m3.220s # chrom.sizes twoBitInfo hg19.p13.plusMT.2bit stdout \ | sort -k 2nr,2nr > chrom.sizes.p13.plusMT # chromInfo cd /hive/data/genomes/hg19/bed/chromInfo awk '{print $1 "\t" $2 "\t/gbdb/hg19/hg19.2bit";}' ../../chrom.sizes.p13.plusMT \ > chromInfo.p13.plusMT.tab wc -l chromInfo*.tab # 298 chromInfo.p13.plusMT.tab # 297 chromInfo.p13.tab # 93 chromInfo.tab # Install cd /hive/data/genomes/hg19 ln -sf hg19.p13.plusMT.2bit hg19.2bit ln -sf hg19.p13.plusMT.unmasked.2bit hg19.unmasked.2bit ln -sf chrom.sizes.p13.plusMT chrom.sizes cd /hive/data/genomes/hg19/bed/chromInfo hgLoadSqlTab hg19 chromInfo $HOME/kent/src/hg/lib/chromInfo.sql chromInfo.p13.plusMT.tab ############################################################################## # Extend main database tables for fileless tracks (DONE - 2020-01-15 - Angie) # Assembly (gold) cd /hive/data/genomes/hg19 hgGoldGapGl -noGl -chrom=chrMT hg19 . . # Oops, it loaded chrMT_{gap,gold}. (gap empty as expected) hgsql hg19 -NBe 'select count(*) from chrMT_gap; select count(*) from chrMT_gold;' #0 #1 hgsql hg19 -e 'insert into gold select * from chrMT_gold; drop table chrMT_gap; drop table chrMT_gold;' cd /hive/data/genomes/hg19/MT # RepeatMasker hgLoadSqlTab -oldTable hg19 rmsk $HOME/kent/src/hg/lib/rmsk.sql chrMT.rmsk.tab # Nothing to load for TRF/simpleRepeat. # hgLoadBed hg19 simpleRepeat chrMT.trf.bed \ # -sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql # WindowMasker/SDust< hgLoadBed -oldTable hg19 windowmaskerSdust windowmasker.sdust.bed # Genscan ldHgGene -oldTable -gtf hg19 genscan chrMT.genscan.gtf hgLoadBed -oldTable hg19 genscanSubopt chrMT.genscan.subopt.bed # Nothing to load for CpG Islands or Augustus ############################################################################## # Extend main database gc5BaseBw.bw (DONE - 2020-01-15 - Angie) cd /hive/data/genomes/hg19/bed/gc5Base/ time (cat <(zcat hg19.p13.gc5Base.wigVarStep.gz) \ <(bigWigToWig ../../MT/chrMT.gc5Base.bw stdout) \ | gzip -c \ > hg19.p13.plusMT.gc5Base.wigVarStep.gz) #real 5m46.441s time wigToBigWig hg19.p13.plusMT.gc5Base.wigVarStep.gz ../../chrom.sizes.p13.plusMT \ hg19.p13.plusMT.gc5Base.bw #real 12m42.190s # Install cd /hive/data/genomes/hg19/bed/gc5Base/ ln -sf hg19.p13.plusMT.gc5Base.wigVarStep.gz hg19.gc5Base.wigVarStep.gz ln -sf hg19.p13.plusMT.gc5Base.bw hg19.gc5Base.bw ln -sf /hive/data/genomes/hg19/bed/gc5Base/hg19.gc5Base.wigVarStep.gz \ /usr/local/apache/htdocs-hgdownload/goldenPath/hg19/gc5Base/hg19.gc5Base.txt.gz ############################################################################# # cytoBandIdeo - (DONE - 2020-01-15 - Angie) # There is no cytoBand info for these (although... could we map??) # so just make a fake cytoBandIdeo to get a blank ideogram. hgsql hg19 -e 'insert into cytoBandIdeo values ("chrMT", 0, 16569, "", "gneg");' ############################################################################## # Extend main database download files (DONE - 2020-01-17 - Angie) cd /hive/data/genomes/hg19/goldenPath/bigZips mkdir p13.plusMT # hg19.2bit and chrom.sizes were already extended above. ln -sf /hive/data/genomes/hg19/hg19.p13.plusMT.2bit p13.plusMT/ ln -sf /hive/data/genomes/hg19/chrom.sizes.p13.plusMT p13.plusMT/hg19.p13.plusMT.chrom.sizes # AGP: cat <(zcat p13/hg19.p13.agp.gz) <(cat ../../MT/chrMT.agp) \ | gzip -c > p13.plusMT/hg19.p13.plusMT.agp.gz # FASTA twoBitToFa ../../hg19.p13.plusMT.2bit stdout \ | gzip -c > p13.plusMT/hg19.p13.plusMT.fa.gz twoBitToFa ../../hg19.p13.plusMT.2bit stdout \ | maskOutFa stdin hard stdout \ | gzip -c > p13.plusMT/hg19.p13.plusMT.fa.masked.gz #TODO # RepeatMasker (don't include header of patch file): cat <(zcat p13/hg19.p13.fa.out.gz) \ <(cat ../../MT/chrMT.fa.out | tail -n +4) \ | gzip -c > p13.plusMT/hg19.p13.plusMT.fa.out.gz # SimpleRepeats/TRF: nothing added, just copy. cp p13/hg19.p13.trf.bed.gz p13.plusMT/hg19.p13.plusMT.trf.bed.gz # hg19 also has download files with the old tar-bundle structure -- update those too. # Per-chrom AGP: rm -rf agp && mkdir agp && cd agp tar xvzf ../p13/hg19.p13.chromAgp.tar.gz mkdir MT cp ../../../MT/chrMT.agp MT/ tar cvzf ../p13.plusMT/hg19.p13.plusMT.chromAgp.tar.gz * cd .. rm -rf agp # Per-chrom soft-masked FASTA: rm -rf chroms && mkdir chroms && cd chroms tar xvzf ../p13/hg19.p13.chromFa.tar.gz twoBitToFa ../../../MT/chrMT.masked.2bit chroms/MT.fa ls -1 chroms | wc -l #298 tar cvzf ../p13.plusMT/hg19.p13.plusMT.chromFa.tar.gz ./chroms cd .. rm -rf chroms # Per-chrom hard-masked FASTA: rm -rf maskedChroms && mkdir maskedChroms && cd maskedChroms tar xvzf ../p13/hg19.p13.chromFaMasked.tar.gz twoBitToFa ../../../MT/chrMT.masked.2bit stdout \ | maskOutFa stdin hard maskedChroms/chrMT.fa.masked ls -1 maskedChroms | wc -l #298 tar cvzf ../p13.plusMT/hg19.p13.plusMT.chromFaMasked.tar.gz ./maskedChroms cd .. rm -rf maskedChroms # Per-chrom RepeatMasker .out: rm -rf out && mkdir out && cd out tar xvzf ../p13/hg19.p13.chromOut.tar.gz mkdir MT cp -p ../../../MT/chrMT.fa.out MT/ ls -1 | wc -l #298 tar cvzf ../p13.plusMT/hg19.p13.plusMT.chromOut.tar.gz * cd .. rm -r out # Per-chrom TRF output: nothing added, just copy. cp p13/hg19.p13.chromTrf.tar.gz p13.plusMT/hg19.p13.plusMT.chromTrf.tar.gz # RepeatMasker .align files: cat <(zcat p13/hg19.p13.fa.align.gz) <(cat ../../MT/chrMT.fa.align) \ | gzip -c > p13.plusMT/hg19.p13.plusMT.fa.align.gz # TODO: regenerate upstream* files for p13.plusMT # Make new md5sum.txt cd p13.plusMT md5sum hg19.* > md5sum.txt # Edit README.txt # Update latest subdir cd /hive/data/genomes/hg19/goldenPath/bigZips mv latest latest.bak mkdir latest cd latest for f in ../p13.plusMT/*; do noV=$(basename $(echo $f | sed -re 's/\.p13\.plusMT//;')) ln -s $f $noV done cd .. rm -rf latest.bak # Install ln -s /hive/data/genomes/hg19/goldenPath/bigZips/p13.plusMT \ /usr/local/apache/htdocs-hgdownload/goldenPath/hg19/bigZips/ rm /usr/local/apache/htdocs-hgdownload/goldenPath/hg19/bigZips/latest ln -s /hive/data/genomes/hg19/goldenPath/bigZips/latest \ /usr/local/apache/htdocs-hgdownload/goldenPath/hg19/bigZips/ # TODO: /hive/data/genomes/hg19/goldenPath/chromosomes/ ? Does anybody use that anymore? ############################################################################# # Don't need to Build perSeqMax file for gfServer (hgBlat) - # but don't forget to get new blat servers for the updated hg19.2bit. ######################################################################### # Regenerate idKeys with extended hg19 (DONE - 2020-01-21 - Angie) mkdir /hive/data/genomes/hg19/bed/idKeys.p13.plusMT cd /hive/data/genomes/hg19/bed/idKeys.p13.plusMT time ($HOME/kent/src/hg/utils/automation/doIdKeys.pl \ -twoBit=/hive/data/genomes/hg19/hg19.p13.plusMT.unmasked.2bit \ -bigClusterHub=ku -smallClusterHub=ku \ -buildDir=`pwd` hg19) > do.log 2>&1 & tail -f do.log #real 1m9.967s cat hg19.keySignature.txt #3dc697674e89bb99099419f6483b96d5 # Install cd /hive/data/genomes/hg19/bed/ rm -f idKeys ln -s idKeys.p13.plusMT idKeys ############################################################################## # UCSC to RefSeq, INSDC, Assembly; chromAlias (DONE - 2020-01-21 - Angie) cd /hive/data/genomes/hg19/bed/ucscToINSDC # ucscToINSDC has GenBank IDs for all sequences except chrM, for which it has RefSeq NC_ ID... # chromAlias's genbankToAssembly.txt file has the GenBank ID for MT, so use GenBank ID here. cp ucscToINSDC.p13.bed ucscToINSDC.p13.plusMT.bed echo -e "chrMT\t0\t16569\tJ01415.2" >> ucscToINSDC.p13.plusMT.bed # Add RefSeq ID for MT to ucscToRefSeq. cp ucscToRefSeq.p13.bed ucscToRefSeq.p13.plusMT.bed echo -e "chrMT\t0\t16569\tNC_012920.1" >> ucscToRefSeq.p13.plusMT.bed # loading tables: export db=hg19 export chrSize=`cut -f1 ucscToINSDC.p13.plusMT.bed | awk '{print length($0)}' | sort -n | tail -1` sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \ | hgLoadSqlTab ${db} ucscToINSDC stdin ucscToINSDC.p13.plusMT.bed sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \ | sed -e 's/INSDC/RefSeq/g;' \ | hgLoadSqlTab ${db} ucscToRefSeq stdin ucscToRefSeq.p13.plusMT.bed # ucscToINSDC must be exactly 100% coverage featureBits -countGaps ${db} ucscToINSDC #3234851260 bases of 3234851260 (100.000%) in intersection # ucscToRefSeq must be 100% except for chrM: featureBits -countGaps ${db} ucscToRefSeq #3234834689 bases of 3234851260 (99.999%) in intersection expr 3234851260 - 3234834689 #16571 # construct chromAlias: cd /hive/data/genomes/hg19/bed/chromAlias hgsql -N -e 'select chrom,name from ucscToRefSeq;' ${db} \ | sort -k1,1 > ucsc.refseq.p13.plusMT.tab hgsql -N -e 'select chrom,name from ucscToINSDC;' ${db} \ | sort -k1,1 > ucsc.genbank.p13.plusMT.tab tawk '{print $2, $1;}' ucsc.genbank.p13.plusMT.tab | sort \ | join -t$'\t' -o 1.2,2.2 - genbankToAssembly.txt \ | sort -k1,1 > ucsc.assembly.p13.plusMT.tab ~/kent/src/hg/utils/automation/chromAlias.pl ucsc.*.p13.plusMT.tab \ | sed -re 's/\.p13.plusMT//;' \ > ${db}.chromAlias.p13.plusMT.tab # verify all there: for t in refseq genbank assembly do c0=`cat ucsc.$t.p13.plusMT.tab | wc -l` c1=`grep $t hg19.chromAlias.p13.plusMT.tab | wc -l` ok="OK" if [ "$c0" -ne "$c1" ]; then ok="ERROR" fi printf "# checking $t: $c0 =? $c1 $ok\n" done # checking refseq: 297 =? 297 OK # checking genbank: 298 =? 298 OK # checking assembly: 297 =? 297 OK hgLoadSqlTab hg19 chromAlias $HOME/kent/src/hg/lib/chromAlias.sql ${db}.chromAlias.p13.plusMT.tab ############################################################################## # altSeqLiftOver (DONE - 2020-01-21 - Angie) # consider chrM and chrMT to be each other's alts... mkdir /hive/data/genomes/hg19/bed/altSeqLiftOver.p13.plusMT cd /hive/data/genomes/hg19/bed/altSeqLiftOver.p13.plusMT # Add PSL from blat to the p13 alignments from NCBI blat -noHead /hive/data/genomes/hg19/hg19.2bit:{chrM,chrMT} chrMToChrMT.psl pslSwap chrMToChrMT.psl chrMTToChrM.psl cp /hive/data/genomes/hg19/bed/altSeqLiftOver.p13/altSeqLiftOver.psl . cat chrMToChrMT.psl chrMTToChrM.psl >> altSeqLiftOver.psl hgLoadPsl hg19 -table=altSeqLiftOverPsl altSeqLiftOver.psl # We do not need to make a chrM-to-chrMT liftOver file... now we can just put annotations # directly on chrMT! (In addition to lifting over to chrM for backwards compat) ######################################################################### # ncbiRefSeq.p13 Genes (DONE - 2020-01-21 - Angie) mkdir /hive/data/genomes/hg19/bed/ncbiRefSeq.p13.2020-01-21 cd /hive/data/genomes/hg19/bed/ncbiRefSeq.p13.2020-01-21 time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \ refseq vertebrate_mammalian Homo_sapiens \ GCF_000001405.25_GRCh37.p13 hg19) > do.log 2>&1 & tail -f do.log # *** All done ! Elapsed time: 6m42s #real 6m42.029s cat fb.ncbiRefSeq.hg19.txt #93720294 bases of 2991710746 (3.133%) in intersection ######################################################################### # ENCODE tracks don't seem to have anything for chrM (?) ############################################################################## # Extend GTEX GENE (DONE 2020-01-21 - Angie) # Conveniently, GTEx provided annotations on the real MT. Those were plopped # straight onto chrM with no coord adjustments (close enough for nice bar graphs). # So just duplicate the "chrM" rows onto chrMT. cd /hive/data/genomes/hg19/bed/gtex cp gtexGeneV6.plusP13.bed gtexGeneV6.plusP13.plusMT.bed grep ^chrM gtexGeneV6.bed | sed -re 's/chrM\t/chrMT\t/;' >> gtexGeneV6.plusP13.plusMT.bed hgLoadBed -noBin -type=bed6+ -sqlTable=$HOME/kent/src/hg/lib/gtexGeneBed.sql -renameSqlTable \ hg19 gtexGene gtexGeneV6.plusP13.plusMT.bed grep -w chrM gtexGeneModelV6.initial.gp \ | sed -re 's/chrM\t/chrMT\t/' \ > gtexGeneModelV6.MT.gp sort -k2,2 -k3n,3n gtexGeneModelV6.initial.gp gtexGeneModelV6.p13.gp gtexGeneModelV6.MT.gp \ | hgLoadGenePred hg19 gtexGeneModel stdin ############################################################################## # Oops update original wiggle gc5Base (gc5BaseBw is not released for hg19) (DONE 2020-02-18 - Angie) # Redmine #24648 notes 18 & 19 cd /hive/data/genomes/hg19/bed/gc5Base/ wigEncode hg19.p13.plusMT.gc5Base.wigVarStep.gz gc5Base.p13.plusMT.{wig,wib} # Install ln -s `pwd`/gc5Base.p13.plusMT.wib /gbdb/hg19/wib hgLoadWiggle -pathPrefix=/gbdb/hg19/wib hg19 gc5Base gc5Base.p13.plusMT.wig # Also update the gc5Base varStep wiggle download file: cd /usr/local/apache/htdocs-hgdownload/goldenPath/hg19/gc5Base ln -sf /hive/data/genomes/hg19/bed/gc5Base/hg19.p13.plusMT.gc5Base.wigVarStep.gz \ hg19.gc5Base.txt.gz md5sum hg19.gc5Base.txt.gz > md5sum.txt ############################################################################## +# Sort & reload database tables (DONE 2020-02-20 - Angie) + + # Commands run in hgsql: + hgsql hg19 + + # rename table augustusGene to augustusGeneBak; + # create table augustusGene like augustusGeneBak; + # insert into augustusGene select * from augustusGeneBak order by chrom,txStart; +#Query OK, 33620 rows affected (0.63 sec) +#Records: 33620 Duplicates: 0 Warnings: 0 + # show table status like "augustusGene%"; +#(same number of rows; data size dropped a bit, but contents appear unchanged) + # drop table augustusGeneBak; + + # rename table cpgIslandExt to cpgIslandExtBak; + # create table cpgIslandExt like cpgIslandExtBak; + # insert into cpgIslandExt select * from cpgIslandExtBak order by chrom,chromStart; +#Records: 30344 Duplicates: 0 Warnings: 0 + # show table status like "cpgIslandExt%"; + # drop table cpgIslandExtBak; + + # rename table gap to gapBak; + # create table gap like gapBak; + # insert into gap select * from gapBak order by chrom,chromStart; +#Records: 497 Duplicates: 0 Warnings: 0 + # show table status like "gap%"; + # drop table gapBak; + + # rename table genscan to genscanBak; + # create table genscan like genscanBak; + # insert into genscan select * from genscanBak order by chrom,txStart; +#Records: 107250 Duplicates: 0 Warnings: 0 + # show table status like "genscan%"; + # drop table genscanBak; + + # rename table gold to goldBak; + # create table gold like goldBak; + # insert into gold select * from goldBak order by chrom,chromStart; +#Records: 28665 Duplicates: 0 Warnings: 0 + # show table status like "gold%"; + # drop table goldBak; + + # rename table rmsk to rmskBak; + # create table rmsk like rmskBak; + # insert into rmsk select * from rmskBak order by genoName,genoStart; +#Records: 5481341 Duplicates: 0 Warnings: 0 + # show table status like "rmsk%"; + # drop table rmskBak; + + # rename table simpleRepeat to simpleRepeatBak; + # create table simpleRepeat like simpleRepeatBak; + # insert into simpleRepeat select * from simpleRepeatBak order by chrom,chromStart; +#Records: 1000830 Duplicates: 0 Warnings: 0 + # show table status like "simpleRepeat%"; + # drop table simpleRepeatBak; + + # rename table windowmaskerSdust to windowmaskerSdustBak; + # create table windowmaskerSdust like windowmaskerSdustBak; + # insert into windowmaskerSdust select * from windowmaskerSdustBak order by chrom,chromStart; +#Records: 16745130 Duplicates: 0 Warnings: 0 + # show table status like "windowmaskerSdust%"; + # drop table windowmaskerSdustBak; + # quit + + # Double check: + positionalTblCheck hg19 augustusGene + positionalTblCheck hg19 cpgIslandExt + positionalTblCheck hg19 gap + positionalTblCheck hg19 genscan + positionalTblCheck hg19 gold + positionalTblCheck hg19 rmsk + positionalTblCheck hg19 simpleRepeat + positionalTblCheck hg19 windowmaskerSdust + + +##############################################################################