2df46316a4bbb699e3c24bf57485b216e3c32af4 hiram Fri May 3 12:06:07 2024 -0700 making liftOver big files for track diff --git src/hg/utils/automation/doBlastzChainNet.pl src/hg/utils/automation/doBlastzChainNet.pl index b42db8a..7e36792 100755 --- src/hg/utils/automation/doBlastzChainNet.pl +++ src/hg/utils/automation/doBlastzChainNet.pl @@ -984,30 +984,43 @@ my $whatItDoes = "It generates nets (without repeat/gap stats -- those are added later on $dbHost) from chains, and generates axt, maf and .over.chain from the nets."; my $bossScript = new HgRemoteScript("$runDir/netChains.csh", $workhorse, $runDir, $whatItDoes, $DEF); $bossScript->add(<<_EOF_ # Make nets ("noClass", i.e. without rmsk/class stats which are added later): chainPreNet $inclHap $chain $defVars{SEQ1_LEN} $defVars{SEQ2_LEN} stdout \\ | chainNet $inclHap stdin -minSpace=1 $defVars{SEQ1_LEN} $defVars{SEQ2_LEN} stdout /dev/null \\ | netSyntenic stdin noClass.net # Make liftOver chains: netChainSubset -verbose=0 noClass.net $chain stdout \\ | chainStitchId stdin stdout | gzip -c > $tDb.$qDb.over.chain.gz +hgLoadChain -test -noBin -tIndex $tDb chainLiftOver$QDb $tDb.$qDb.over.chain.gz +wget --no-check-certificate -O bigChain.as 'http://genome-source.soe.ucsc.edu/gitlist/kent.git/raw/master/src/hg/lib/bigChain.as' +wget --no-check-certificate -O bigLink.as 'http://genome-source.soe.ucsc.edu/gitlist/kent.git/raw/master/src/hg/lib/bigLink.as' +sed 's/.000000//' chain.tab | awk 'BEGIN {OFS="\\t"} {print \$2, \$4, \$5, \$11, 1000, \$8, \$3, \$6, \$7, \$9, \$10, \$1}' > chainLiftOver${QDb}.tab +bedToBigBed -type=bed6+6 -as=bigChain.as -tab chainLiftOver${QDb}.tab $defVars{SEQ1_LEN} chainLiftOver${QDb}.bb +awk 'BEGIN {OFS="\\t"} {print \$1, \$2, \$3, \$5, \$4}' link.tab | sort -k1,1 -k2,2n > chainLiftOver${QDb}Link.tab +bedToBigBed -type=bed4+1 -as=bigLink.as -tab chainLiftOver${QDb}Link.tab $defVars{SEQ1_LEN} chainLiftOver${QDb}Link.bb +set totalBases = `ave -col=2 $defVars{SEQ1_LEN} | grep "^total" | awk '{printf "%d", \$2}'` +set basesCovered = `bedSingleCover.pl chainLiftOver${QDb}Link.tab | ave -col=4 stdin | grep "^total" | awk '{printf "%d", \$2}'` +set percentCovered = `echo \$basesCovered \$totalBases | awk '{printf "%.3f", 100.0*\$1/\$2}'` +printf "%d bases of %d (%s%%) in intersection\\n" "\$basesCovered" "\$totalBases" "\$percentCovered" > ../fb.$tDb.chainLiftOver${QDb}Link.txt +rm -f link.tab chain.tab bigChain.as bigLink.as chainLiftOver${QDb}Link.tab chainLiftOver${QDb}.tab + _EOF_ ); my $seq1Dir = $defVars{'SEQ1_DIR'}; my $seq2Dir = $defVars{'SEQ2_DIR'}; if ($splitRef) { $bossScript->add(<<_EOF_ # Make axtNet for download: one .axt per $tDb seq. netSplit noClass.net net cd .. mkdir -p axtNet foreach f (axtChain/net/*.net) netToAxt \$f axtChain/chain/\$f:t:r.chain \\ $seq1Dir $seq2Dir stdout \\ | axtSort stdin stdout \\ | gzip -c > axtNet/\$f:t:r.$tDb.$qDb.net.axt.gz