21ecc89a7989c1f60d6da19c6e9eeb2dbe65525d braney Wed Sep 1 12:07:30 2021 -0700 update to blastTabs to mm39 from mm10 diff --git src/hg/utils/otto/knownGene/buildLoadOther.sh src/hg/utils/otto/knownGene/buildLoadOther.sh new file mode 100755 index 0000000..e3dafda --- /dev/null +++ src/hg/utils/otto/knownGene/buildLoadOther.sh @@ -0,0 +1,123 @@ +#!/bin/sh -ex +cd $dir + +{ +# Run nice Perl script to make all protein blast runs for +# Gene Sorter and Known Genes details page. Takes about +# 45 minutes to run. +rm -rf $dir/hgNearBlastp +mkdir $dir/hgNearBlastp +cd $dir/hgNearBlastp +# make sure all the fasta is there +ls -l $tempFa +ls -l $xdbFa +ls -l $ratFa +ls -l $fishFa +ls -l $flyFa +ls -l $wormFa +ls -l $yeastFa + +( cat << ThisEnd +# Latest human vs. other Gene Sorter orgs: +# mouse, rat, zebrafish, worm, yeast, fly + +targetGenesetPrefix known +targetDb $tempDb +queryDbs $xdb $ratDb $fishDb $flyDb $wormDb $yeastDb + +${tempDb}Fa $tempFa +${xdb}Fa $xdbFa +${ratDb}Fa $ratFa +${fishDb}Fa $fishFa +${flyDb}Fa $flyFa +${wormDb}Fa $wormFa +${yeastDb}Fa $yeastFa + +buildDir $dir/hgNearBlastp +scratchDir $scratchDir/brHgNearBlastp +ThisEnd +) > config.ra + +rm -rf $scratchDir/brHgNearBlastp +doHgNearBlastp.pl -noLoad -clusterHub=ku -distrHost=hgwdev -dbHost=hgwdev -workhorse=hgwdev config.ra + +# Load self +cd $dir/hgNearBlastp/run.$tempDb.$tempDb +# builds knownBlastTab +./loadPairwise.csh + +# Load human and rat +cd $dir/hgNearBlastp/run.$tempDb.$xdb +hgLoadBlastTab $tempDb $xBlastTab -maxPer=1 out/*.tab +cd $dir/hgNearBlastp/run.$tempDb.$ratDb +hgLoadBlastTab $tempDb $rnBlastTab -maxPer=1 out/*.tab + +# Remove non-syntenic hits for human and rat +# Takes a few minutes +mkdir -p /gbdb/$tempDb/liftOver +rm -f /gbdb/$tempDb/liftOver/${tempDb}To$RatDb.over.chain.gz /gbdb/$tempDb/liftOver/${tempDb}To$Xdb.over.chain.gz +ln -s $genomes/$db/bed/liftOver/${db}To$RatDb.over.chain.gz \ + /gbdb/$tempDb/liftOver/${tempDb}To$RatDb.over.chain.gz +ln -s $genomes/$db/bed/liftOver/${db}To${Xdb}.over.chain.gz \ + /gbdb/$tempDb/liftOver/${tempDb}To$Xdb.over.chain.gz + +# delete non-syntenic genes from rat and mouse blastp tables +cd $dir/hgNearBlastp +synBlastp.csh $tempDb $xdb +# old number of unique query values: 62743 +# old number of unique target values 27998 +# new number of unique query values: 54818 +# new number of unique target values 26309 + +synBlastp.csh $tempDb $ratDb knownGene ensGene +# old number of unique query values: 63123 +# old number of unique target values 21163 +# new number of unique query values: 57012 +# new number of unique target values 20298 + +# Make reciprocal best subset for the blastp pairs that are too +# Far for synteny to help + +# Us vs. fish +cd $dir/hgNearBlastp +export aToB=run.$tempDb.$fishDb +export bToA=run.$fishDb.$tempDb +cat $aToB/out/*.tab > $aToB/all.tab +cat $bToA/out/*.tab > $bToA/all.tab +blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab +hgLoadBlastTab $tempDb drBlastTab $aToB/recipBest.tab + +# Us vs. fly +cd $dir/hgNearBlastp +export aToB=run.$tempDb.$flyDb +export bToA=run.$flyDb.$tempDb +cat $aToB/out/*.tab > $aToB/all.tab +cat $bToA/out/*.tab > $bToA/all.tab +blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab +hgLoadBlastTab $tempDb dmBlastTab $aToB/recipBest.tab + +# Us vs. worm +cd $dir/hgNearBlastp +export aToB=run.$tempDb.$wormDb +export bToA=run.$wormDb.$tempDb +cat $aToB/out/*.tab > $aToB/all.tab +cat $bToA/out/*.tab > $bToA/all.tab +blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab +hgLoadBlastTab $tempDb ceBlastTab $aToB/recipBest.tab + +# Us vs. yeast +cd $dir/hgNearBlastp +export aToB=run.$tempDb.$yeastDb +export bToA=run.$yeastDb.$tempDb +cat $aToB/out/*.tab > $aToB/all.tab +cat $bToA/out/*.tab > $bToA/all.tab +blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab +hgLoadBlastTab $tempDb scBlastTab $aToB/recipBest.tab + +# Clean up +cd $dir/hgNearBlastp +cat run.$tempDb.$tempDb/out/*.tab | gzip -c > run.$tempDb.$tempDb/all.tab.gz +gzip run.*/all.tab + +echo "BuildBlast successfully finished" +} > doBlast.log 2>&1