c6768809fed768591851aa1547d1f5e867727a2f hiram Wed Jul 29 12:41:26 2020 -0700 liftOvers from canFam5 to Fam3 Fam4 and vs. vs. refs #25917 diff --git src/hg/makeDb/doc/canFam5/initialBuild.txt src/hg/makeDb/doc/canFam5/initialBuild.txt index f4384cf..6c82b4d 100644 --- src/hg/makeDb/doc/canFam5/initialBuild.txt +++ src/hg/makeDb/doc/canFam5/initialBuild.txt @@ -366,112 +366,111 @@ ############################################################################# # cytoBandIdeo - (DONE - 2020-07-17 - Hiram) mkdir /hive/data/genomes/canFam5/bed/cytoBand cd /hive/data/genomes/canFam5/bed/cytoBand makeCytoBandIdeo.csh canFam5 ############################################################################# # run up idKeys files for chromAlias/ncbiRefSeq (DONE - 2020-07-17 - Hiram) mkdir /hive/data/genomes/canFam5/bed/idKeys cd /hive/data/genomes/canFam5/bed/idKeys time (doIdKeys.pl \ -twoBit=/hive/data/genomes/canFam5/canFam5.unmasked.2bit \ -buildDir=`pwd` canFam5) > do.log 2>&1 & -XXX - running - Fri Jul 17 17:01:13 PDT 2020 - # real 3m22.298s + # real 1m28.736s cat canFam5.keySignature.txt - # 174191aae5515d1114a9d6320b152b1a + # 20a742890810f31eac281ae06bc3d170 ############################################################################# # gapOverlap (DONE - 2020-07-17 - Hiram) mkdir /hive/data/genomes/canFam5/bed/gapOverlap cd /hive/data/genomes/canFam5/bed/gapOverlap time (doGapOverlap.pl \ -twoBit=/hive/data/genomes/canFam5/canFam5.unmasked.2bit canFam5 ) \ > do.log 2>&1 & -XXX - running - Fri Jul 17 16:56:55 PDT 2020 # real 1m49.489s # there only only nine: wc -l bed.tab # 9 bed.tab cut -f2- bed.tab chr1 41008264 41010364 chr1:41008265-41010364 1000 + 41008264 41010364 0 2 1000,1000 0,1100 chr17 58049274 58051374 chr17:58049275-58051374 1000 + 58049274 58051374 0 2 1000,1000 0,1100 ... etc ... chrX 45160089 45162189 chrX:45160090-45162189 1000 + 45160089 45162189 0 2 1000,1000 0,1100 cat fb.canFam5.gapOverlap.txt # 16158 bases of 2482000080 (0.001%) in intersection ############################################################################# # tandemDups (TBD - 2020-03-31 - Hiram) mkdir /hive/data/genomes/canFam5/bed/tandemDups cd /hive/data/genomes/canFam5/bed/tandemDups time (~/kent/src/hg/utils/automation/doTandemDup.pl \ -twoBit=/hive/data/genomes/canFam5/canFam5.unmasked.2bit canFam5) \ > do.log 2>&1 & -XXX - running - Fri Jul 17 16:57:18 PDT 2020 - # real 188m34.598s + # real 96m40.950s cat fb.canFam5.tandemDups.txt - # 155315479 bases of 3044872214 (5.101%) in intersection + # 38911424 bases of 2343218756 (1.661%) in intersection bigBedInfo canFam5.tandemDups.bb | sed -e 's/^/# /;' # version: 4 # fieldCount: 13 # hasHeaderExtension: yes # isCompressed: yes # isSwapped: 0 # extraIndexCount: 0 -# itemCount: 2,822,307 -# primaryDataSize: 72,710,994 -# primaryIndexSize: 292,560 -# zoomLevels: 9 -# chromCount: 5335 -# basesCovered: 1,635,503,835 -# meanDepth (of bases covered): 14.396921 +# itemCount: 587,116 +# primaryDataSize: 15,889,460 +# primaryIndexSize: 62,440 +# zoomLevels: 8 +# chromCount: 543 +# basesCovered: 1,405,259,423 +# meanDepth (of bases covered): 4.102433 # minDepth: 1.000000 -# maxDepth: 381.000000 -# std of depth: 29.341113 +# maxDepth: 178.000000 +# std of depth: 5.480960 ######################################################################### # ucscToINSDC and ucscToRefSeq table/track (DONE - 2020-07-17 - Hiram) # construct idKeys for the genbank sequence mkdir /hive/data/genomes/canFam5/genbank/idKeys cd /hive/data/genomes/canFam5/genbank/idKeys faToTwoBit ../GCA_*1_genomic.fna.gz canFam5.genbank.2bit time (doIdKeys.pl -buildDir=`pwd` \ -twoBit=`pwd`/canFam5.genbank.2bit genbankCanFam5) > do.log 2>&1 & - # real 3m30.599s + # real 1m30.193s cat genbankCanFam5.keySignature.txt - # 174191aae5515d1114a9d6320b152b1a + # 20a742890810f31eac281ae06bc3d170 mkdir /hive/data/genomes/canFam5/bed/chromAlias cd /hive/data/genomes/canFam5/bed/chromAlias join -t$'\t' ../idKeys/canFam5.idKeys.txt \ ../../genbank/idKeys/genbankCanFam5.idKeys.txt | cut -f2- \ | sort -k1,1 | join -t$'\t' <(sort -k1,1 ../../chrom.sizes) - \ | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' \ | sort -k1,1 -k2,2n > ucscToINSDC.bed +XXX + # should be same line counts throughout: wc -l * ../../chrom.sizes # 2198 ucscToINSDC.bed # 2198 ../../chrom.sizes export chrSize=`cut -f1 ucscToINSDC.bed | awk '{print length($0)}' | sort -n | tail -1` echo $chrSize # 23 # use the $chrSize in this sed sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \ | hgLoadSqlTab canFam5 ucscToINSDC stdin ucscToINSDC.bed # should be quiet for all OK checkTableCoords canFam5 @@ -545,210 +544,239 @@ query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%' searchPriority 8 # verify searches work in the position box git commit -m 'adding search rule for gold/assembly track refs #25917' \ trackDb.ra ########################################################################## # running repeat masker (DONE - 2020-07-17 - Hiram) mkdir /hive/data/genomes/canFam5/bed/repeatMasker cd /hive/data/genomes/canFam5/bed/repeatMasker time (doRepeatMasker.pl -buildDir=`pwd` \ -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ -smallClusterHub=ku canFam5) > do.log 2>&1 -XXX - running - Fri Jul 17 16:57:56 PDT 2020 - # real 293m51.353s + # real 827m31.483s cat faSize.rmsk.txt -# 2482000080 bases (58500 N's 2481941580 real 1403544550 upper -# 1078397030 lower) in 2198 sequences in 1 files -# Total size: mean 1129208.4 sd 8542765.0 min 13084 (chrUn_JAAHUQ010000994v1) -# max 124992030 (chrX) median 43246 -# %43.45 masked total, %43.45 masked real +# 2343218756 bases (6087522 N's 2337131234 real 1361455376 upper +# 975675858 lower) in 794 sequences in 1 files +# Total size: mean 2951157.1 sd 13874454.0 min 1091 (chrUn_REHQ01000052v1) +# max 122894117 (chr1) median 13386 +# %41.64 masked total, %41.75 masked real + egrep -i "versi|relea" do.log # RepeatMasker version development-$Id: RepeatMasker,v 1.332 2017/04/17 19:01:11 rhubley Exp $ -# grep version of RepeatMasker$ /hive/data/staging/data/RepeatMasker/RepeatMasker -# February 01 2017 (open-4-0-8) 1.332 version of RepeatMasker -# grep RELEASE /hive/data/staging/data/RepeatMasker/Libraries/RepeatMaskerLib.embl +# CC Dfam_Consensus RELEASE 20181026; * +# CC RepBase RELEASE 20181026; + + sed -e 's/^/# /;' versionInfo.txt +# The repeat files provided for this assembly were generated using RepeatMasker. +# Smit, AFA, Hubley, R & Green, P., +# RepeatMasker Open-4.0. +# 1996-2010 <http://www.repeatmasker.org>. +# +# VERSION: +# RepeatMasker version development-$Id: RepeatMasker,v 1.332 2017/04/17 19:01:11 rhubley Exp $ +# Search Engine: Crossmatch [ 1.090518 ] +# Master RepeatMasker Database: /hive/data/staging/data/RepeatMasker181121/Libraries/RepeatMaskerLib.embl ( Complete Database: dc20181026-rb20181026 ) +# +# +# RepeatMasker version development-$Id: RepeatMasker,v 1.332 2017/04/17 19:01:11 rhubley Exp $ # CC Dfam_Consensus RELEASE 20181026; * # CC RepBase RELEASE 20181026; * +# # RepeatMasker engine: -engine crossmatch -s +# # RepeatMasker library options: -species 'Canis lupus familiaris' +# +# PARAMETERS: +# /hive/data/staging/data/RepeatMasker/RepeatMasker -engine crossmatch -s -align -species 'Canis lupus familiaris' time featureBits -countGaps canFam5 rmsk - # 1078398935 bases of 2482000080 (43.449%) in intersection - # real 0m35.578s + # 975676256 bases of 2343218756 (41.638%) in intersection + # real 0m33.765s # why is it different than the faSize above ? # because rmsk masks out some N's as well as bases, the faSize count above # separates out the N's from the bases, it doesn't show lower case N's # faster way to get the same result on high contig count assemblies: time hgsql -N -e 'select genoName,genoStart,genoEnd from rmsk;' canFam5 \ | bedSingleCover.pl stdin | ave -col=4 stdin | grep "^total" - # total 1078398935.000000 - # real 0m22.013s + # total 975676256.000000 + # real 0m20.267s ########################################################################## # running simple repeat (DONE - 2020-07-17 - Hiram) mkdir /hive/data/genomes/canFam5/bed/simpleRepeat cd /hive/data/genomes/canFam5/bed/simpleRepeat time (doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=ku \ -dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=ku \ -trf409=6 canFam5) > do.log 2>&1 # real 7m53.400s cat fb.simpleRepeat # 42156507 bases of 2337131234 (1.804%) in intersection -XXX - ready for masking - 2020-07-17 cd /hive/data/genomes/canFam5 # if using the Window Masker result: cd /hive/data/genomes/canFam5 # twoBitMask bed/windowMasker/canFam5.cleanWMSdust.2bit \ # -add bed/simpleRepeat/trfMask.bed canFam5.2bit # you can safely ignore the warning about fields >= 13 # add to rmsk after it is done: twoBitMask canFam5.rmsk.2bit \ -add bed/simpleRepeat/trfMask.bed canFam5.2bit # you can safely ignore the warning about fields >= 13 twoBitToFa canFam5.2bit stdout | faSize stdin > faSize.canFam5.2bit.txt cat faSize.canFam5.2bit.txt -# 2482000080 bases (58500 N's 2481941580 real 1401386884 upper -# 1080554696 lower) in 2198 sequences in 1 files -# Total size: mean 1129208.4 sd 8542765.0 min 13084 (chrUn_JAAHUQ010000994v1) -# max 124992030 (chrX) median 43246 -# %43.54 masked total, %43.54 masked real +# 2343218756 bases (6087522 N's 2337131234 real 1359905780 upper +# 977225454 lower) in 794 sequences in 1 files +# Total size: mean 2951157.1 sd 13874454.0 min 1091 (chrUn_REHQ01000052v1) +# max 122894117 (chr1) median 13386 +# %41.70 masked total, %41.81 masked real rm /gbdb/canFam5/canFam5.2bit ln -s `pwd`/canFam5.2bit /gbdb/canFam5/canFam5.2bit ######################################################################### -# CREATE MICROSAT TRACK (TBD - 2020-03-31 - Hiram) +# CREATE MICROSAT TRACK (DONE - 2020-07-28 - Hiram) ssh hgwdev mkdir /cluster/data/canFam5/bed/microsat cd /cluster/data/canFam5/bed/microsat awk '($5==2 || $5==3) && $6 >= 15 && $8 == 100 && $9 == 0 {printf("%s\t%s\t%s\t%dx%s\n", $1, $2, $3, $6, $16);}' \ ../simpleRepeat/simpleRepeat.bed > microsat.bed hgLoadBed canFam5 microsat microsat.bed - # Read 65981 elements of size 4 from microsat.bed + # Read 57870 elements of size 4 from microsat.bed ########################################################################## -## WINDOWMASKER (TBD - 2020-03-31 - Hiram) +## WINDOWMASKER (DONE - 2020-07-28 - Hiram) mkdir /hive/data/genomes/canFam5/bed/windowMasker cd /hive/data/genomes/canFam5/bed/windowMasker time (doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \ -dbHost=hgwdev canFam5) > do.log 2>&1 - # real 90m16.169s + # real 88m35.943s # Masking statistics cat faSize.canFam5.cleanWMSdust.txt -# 2482000080 bases (58500 N's 2481941580 real 1630728232 upper 851213348 lower) -# in 2198 sequences in 1 files -# Total size: mean 1129208.4 sd 8542765.0 min 13084 (chrUn_JAAHUQ010000994v1) -# max 124992030 (chrX) median 43246 -# %34.30 masked total, %34.30 masked real +# 2343218756 bases (6087522 N's 2337131234 real 1573472737 upper +# 763658497 lower) in 794 sequences in 1 files +# Total size: mean 2951157.1 sd 13874454.0 min 1091 (chrUn_REHQ01000052v1) +# max 122894117 (chr1) median 13386 +# %32.59 masked total, %32.68 masked real cat fb.canFam5.rmsk.windowmaskerSdust.txt - # 598271411 bases of 2482000080 (24.104%) in intersection + # 514628122 bases of 2343218756 (21.962%) in intersection ########################################################################## -# cpgIslands - (TBD - 2020-04-02 - Hiram) +# cpgIslands - (DONE - 2020-07-28 - Hiram) mkdir /hive/data/genomes/canFam5/bed/cpgIslands cd /hive/data/genomes/canFam5/bed/cpgIslands time (doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku \ -workhorse=hgwdev -smallClusterHub=ku canFam5) > do.log 2>&1 - # real 3m29.034s + # real 3m21.080s cat fb.canFam5.cpgIslandExt.txt - # 47618882 bases of 2481941580 (1.919%) in intersection + # 45080636 bases of 2337131234 (1.929%) in intersection ############################################################################## -# genscan - (TBD - 2020-04-02 - Hiram) +# genscan - (DONE - 2020-07-28 - Hiram) mkdir /hive/data/genomes/canFam5/bed/genscan cd /hive/data/genomes/canFam5/bed/genscan time (doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \ -bigClusterHub=ku canFam5) > do.log 2>&1 - # real 8m19.775s + # real 43m47.630s - # two jobs broken: +# four jobs failed, running manually on hgwdev: ./runGsBig2M.csh chr22 000 gtf/000/chr22.gtf pep/000/chr22.pep subopt/000/chr22.bed & -./runGsBig2M.csh chr34 000 gtf/000/chr34.gtf pep/000/chr34.pep subopt/000/chr34.bed +./runGsBig2M.csh chr15 000 gtf/000/chr15.gtf pep/000/chr15.pep subopt/000/chr15.bed & +./runGsBig2M.csh chr20 000 gtf/000/chr20.gtf pep/000/chr20.pep subopt/000/chr20.bed & +./runGsBig2M.csh chr3 000 gtf/000/chr3.gtf pep/000/chr3.pep subopt/000/chr3.bed wait - # real 14m27.845s +XXX - running - Wed Jul 29 12:20:47 PDT 2020 time (doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \ -continue=makeBed -bigClusterHub=ku canFam5) > makeBed.log 2>&1 # real 0m45.365s cat fb.canFam5.genscan.txt # 57650331 bases of 2481941580 (2.323%) in intersection cat fb.canFam5.genscanSubopt.txt # 50129491 bases of 2481941580 (2.020%) in intersection ######################################################################### # Create kluster run files (TBD - 2020-04-02 - Hiram) # numerator is canFam5 gapless bases "real" as reported by: featureBits -noRandom -noHap canFam5 gap - # 36700 bases of 2353522726 (0.002%) in intersection + # 6036826 bases of 2320309602 (0.260%) in intersection # ^^^ # denominator is hg19 gapless bases as reported by: # featureBits -noRandom -noHap hg19 gap # 234344806 bases of 2861349177 (8.190%) in intersection # 1024 is threshold used for human -repMatch: - calc \( 2353522726 / 2861349177 \) \* 1024 - # ( 2353522726 / 2861349177 ) * 1024 = 842.262556 + calc \( 2320309602 / 2861349177 \) \* 1024 + # ( 2320309602 / 2861349177 ) * 1024 = 830.376471 # ==> use -repMatch=800 according to size scaled down from 1024 for human. # and rounded down to nearest 50 cd /hive/data/genomes/canFam5 time blat canFam5.2bit \ /dev/null /dev/null -tileSize=11 -makeOoc=jkStuff/canFam5.11.ooc \ -repMatch=800 - # Wrote 34718 overused 11-mers to jkStuff/canFam5.11.ooc - # real 0m21.985s + # Wrote 28510 overused 11-mers to jkStuff/canFam5.11.ooc + # real 0m20.727s + + # canFam4 at repMatch=800: + # Wrote 34718 overused 11-mers to jkStuff/canFam4.11.ooc # canFam3 at repMatch=900: # Wrote 24788 overused 11-mers to jkStuff/canFam3.11.ooc # real 1m11.629s # there are no non-bridged gaps hgsql -N \ - -e 'select * from gap where bridge="no" order by size;' canFam5 \ - - # HOWEVER, every gap in this assembly is the same 'within scaffold' - # at size 100: - hgsql -N -e 'select size from gap where bridge="yes" order by size;' - canFam5 | sort | uniq -c - # 585 100 - - # using these gaps to make a lift file - # minimum gap size is 100 and produces a reasonable number of lifts - gapToLift -verbose=2 -minGap=100 canFam5 jkStuff/canFam5.nonBridged.lft \ - -bedFile=jkStuff/canFam5.nonBridged.bed - wc -l jkStuff/canFam5.nonBri* - # 2198 jkStuff/canFam5.nonBridged.bed - # 2198 jkStuff/canFam5.nonBridged.lft + -e 'select * from gap where bridge="no" order by size;' canFam5 + + # survey gap sizes: + hgsql -N -e 'select size from gap where bridge="yes" order by size;' \ + canFam5 | ave stdin | sed -e 's/^/# /;' +# Q1 100.000000 +# median 5000.000000 +# Q3 5000.000000 +# average 6081.440559 +# min 4.000000 +# max 144464.000000 +# count 1001 +# total 6087522.000000 +# standard deviation 11814.767347 + + # using ordinary gaps to make a lift file + # minimum gap size at 10000 produces a reasonable number of lifts + gapToLift -verbose=2 -minGap=10000 canFam5 jkStuff/canFam5.10Kgaps.lft \ + -bedFile=jkStuff/canFam5.10Kgaps.bed + wc -l jkStuff/*10K* + # 794 jkStuff/canFam5.10Kgaps.bed + # 794 jkStuff/canFam5.10Kgaps.lft ######################################################################## # lastz/chain/net swap human/hg38 (TBD - 2020-04-10 - Hiram) # original alignment cd /hive/data/genomes/hg38/bed/lastzCanFam5.2020-04-02 cat fb.hg38.chainCanFam5Link.txt # 1549397508 bases of 3110768607 (49.808%) in intersection cat fb.hg38.chainSynCanFam5Link.txt # 1488468205 bases of 3110768607 (47.849%) in intersection time (doRecipBest.pl -load -workhorse=hgwdev -buildDir=`pwd` \ hg38 canFam5) > rbest.log 2>&1 & # real 310m32.196s @@ -928,43 +956,60 @@ # XXX 2019-11-20 - ready for this after genbank runs featureBits -enrichment canFam5 refGene ncbiRefSeq # refGene 0.402%, ncbiRefSeq 3.148%, both 0.402%, cover 99.90%, enrich 31.73x featureBits -enrichment canFam5 ncbiRefSeq refGene # ncbiRefSeq 3.148%, refGene 0.402%, both 0.402%, cover 12.76%, enrich 31.73x featureBits -enrichment canFam5 ncbiRefSeqCurated refGene # ncbiRefSeqCurated 0.401%, refGene 0.402%, both 0.400%, cover 99.66%, enrich 247.79x featureBits -enrichment canFam5 refGene ncbiRefSeqCurated # refGene 0.402%, ncbiRefSeqCurated 0.401%, both 0.400%, cover 99.33%, enrich 247.79x ######################################################################### -# LIFTOVER TO canFam3 (TBD - 2020-04-02 - Hiram) +# LIFTOVER TO canFam4 (DONE - 2020-07-28 - Hiram) + ssh hgwdev + mkdir /hive/data/genomes/canFam5/bed/blat.canFam4.2020-07-28 + cd /hive/data/genomes/canFam5/bed/blat.canFam4.2020-07-28 + doSameSpeciesLiftOver.pl -verbose=2 \ + -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ + -ooc=/hive/data/genomes/canFam5/jkStuff/canFam5.11.ooc \ + canFam5 canFam4 + time (doSameSpeciesLiftOver.pl -verbose=2 \ + -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ + -ooc=/hive/data/genomes/canFam5/jkStuff/canFam5.11.ooc \ + canFam5 canFam4) > doLiftOverToCanFam4.log 2>&1 + # real 299m34.538s + + # see if the liftOver menus function in the browser from canFam5 to canFam3 + +######################################################################### +# LIFTOVER TO canFam3 (DONE - 2020-07-28 - Hiram) ssh hgwdev - mkdir /hive/data/genomes/canFam5/bed/blat.canFam3.2020-04-02 - cd /hive/data/genomes/canFam5/bed/blat.canFam3.2020-04-02 + mkdir /hive/data/genomes/canFam5/bed/blat.canFam3.2020-07-28 + cd /hive/data/genomes/canFam5/bed/blat.canFam3.2020-07-28 doSameSpeciesLiftOver.pl -verbose=2 \ -debug -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ -ooc=/hive/data/genomes/canFam5/jkStuff/canFam5.11.ooc \ canFam5 canFam3 time (doSameSpeciesLiftOver.pl -verbose=2 \ -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ -ooc=/hive/data/genomes/canFam5/jkStuff/canFam5.11.ooc \ canFam5 canFam3) > doLiftOverToCanFam3.log 2>&1 - # real 1100m17.743s + # real 278m52.252s # see if the liftOver menus function in the browser from canFam5 to canFam3 ######################################################################### # BLATSERVERS ENTRY (TBD - 2020-04-02 - Hiram) # After getting a blat server assigned by the Blat Server Gods, ssh hgwdev hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("canFam5", "blat1b", "17904", "1", "0"); \ INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("canFam5", "blat1b", "17905", "0", "1");' \ hgcentraltest # test it with some sequence @@ -1078,31 +1123,30 @@ # when clean, check in: git commit -m 'adding rules for canFam5 refs #25917' all.joiner git push # run up a 'make alpha' in hg/hgTables to get this all.joiner file # into the hgwdev/genome-test system cd /hive/data/genomes/canFam5 time (makeDownloads.pl canFam5) > downloads.log 2>&1 # real 16m11.233s # now ready for pushQ entry mkdir /hive/data/genomes/canFam5/pushQ cd /hive/data/genomes/canFam5/pushQ time ($HOME/kent/src/hg/utils/automation/makePushQSql.pl -redmineList canFam5) > canFam5.pushQ.sql 2> stderr.out # real 15m2.385s -XXXX # remove the tandemDups and gapOverlap from the file list: sed -i -e "/tandemDups/d" redmine.canFam5.table.list sed -i -e "/Tandem Dups/d" redmine.canFam5.releaseLog.txt sed -i -e "/gapOverlap/d" redmine.canFam5.table.list sed -i -e "/Gap Overlaps/d" redmine.canFam5.releaseLog.txt # check for errors in stderr.out, some are OK, e.g.: # WARNING: canFam5 does not have ucscToRefSeq # WARNING: hgwdev does not have /gbdb/canFam5/ncbiRefSeq/ncbiRefSeqVersion.txt # WARNING: hgwdev does not have /gbdb/canFam5/ncbiRefSeq/ncbiRefSeqOther.bb # WARNING: hgwdev does not have /gbdb/canFam5/ncbiRefSeq/ncbiRefSeqOther.ix # WARNING: hgwdev does not have /gbdb/canFam5/ncbiRefSeq/ncbiRefSeqOther.ixx # WARNING: hgwdev does not have /gbdb/canFam5/ncbiRefSeq/seqNcbiRefSeq.rna.fa # WARNING: canFam5 does not have seq