66b1030888d2fbc504ed737350256b20442367c7
braney
  Fri Sep 3 10:43:22 2021 -0700
ongoing work on knownGene automation

diff --git src/hg/utils/otto/knownGene/buildBlast.sh src/hg/utils/otto/knownGene/buildBlast.sh
index e3dafda..f72f856 100755
--- src/hg/utils/otto/knownGene/buildBlast.sh
+++ src/hg/utils/otto/knownGene/buildBlast.sh
@@ -1,123 +1,122 @@
 #!/bin/sh -ex
 cd $dir
 
 {
 # Run nice Perl script to make all protein blast runs for
 # Gene Sorter and Known Genes details page.  Takes about
 # 45 minutes to run.
 rm -rf   $dir/hgNearBlastp
 mkdir  $dir/hgNearBlastp
 cd $dir/hgNearBlastp
 # make sure all the fasta is there
 ls -l $tempFa
 ls -l $xdbFa
 ls -l $ratFa
 ls -l $fishFa
 ls -l $flyFa
 ls -l $wormFa
 ls -l $yeastFa
 
 ( cat << ThisEnd
 # Latest human vs. other Gene Sorter orgs:
 # mouse, rat, zebrafish, worm, yeast, fly
 
 targetGenesetPrefix known
 targetDb $tempDb
 queryDbs $xdb $ratDb $fishDb $flyDb $wormDb $yeastDb
 
 ${tempDb}Fa $tempFa
 ${xdb}Fa $xdbFa
 ${ratDb}Fa $ratFa
 ${fishDb}Fa $fishFa
 ${flyDb}Fa $flyFa
 ${wormDb}Fa $wormFa
 ${yeastDb}Fa $yeastFa
 
 buildDir $dir/hgNearBlastp
 scratchDir $scratchDir/brHgNearBlastp
 ThisEnd
 ) > config.ra
 
 rm -rf  $scratchDir/brHgNearBlastp
 doHgNearBlastp.pl -noLoad -clusterHub=ku -distrHost=hgwdev -dbHost=hgwdev -workhorse=hgwdev config.ra 
 
 # Load self
 cd $dir/hgNearBlastp/run.$tempDb.$tempDb
 # builds knownBlastTab
 ./loadPairwise.csh
 
-# Load human and rat
 cd $dir/hgNearBlastp/run.$tempDb.$xdb
 hgLoadBlastTab $tempDb $xBlastTab -maxPer=1 out/*.tab
 cd $dir/hgNearBlastp/run.$tempDb.$ratDb
 hgLoadBlastTab $tempDb $rnBlastTab -maxPer=1 out/*.tab
 
 # Remove non-syntenic hits for human and rat
 # Takes a few minutes
 mkdir -p /gbdb/$tempDb/liftOver
 rm -f /gbdb/$tempDb/liftOver/${tempDb}To$RatDb.over.chain.gz /gbdb/$tempDb/liftOver/${tempDb}To$Xdb.over.chain.gz
 ln -s $genomes/$db/bed/liftOver/${db}To$RatDb.over.chain.gz \
     /gbdb/$tempDb/liftOver/${tempDb}To$RatDb.over.chain.gz
 ln -s $genomes/$db/bed/liftOver/${db}To${Xdb}.over.chain.gz \
     /gbdb/$tempDb/liftOver/${tempDb}To$Xdb.over.chain.gz
 
 # delete non-syntenic genes from rat and mouse blastp tables
 cd $dir/hgNearBlastp
 synBlastp.csh $tempDb $xdb
 # old number of unique query values: 62743
 # old number of unique target values 27998
 # new number of unique query values: 54818
 # new number of unique target values 26309
 
 synBlastp.csh $tempDb $ratDb knownGene ensGene
 # old number of unique query values: 63123
 # old number of unique target values 21163
 # new number of unique query values: 57012
 # new number of unique target values 20298
 
 # Make reciprocal best subset for the blastp pairs that are too
 # Far for synteny to help
 
 # Us vs. fish
 cd $dir/hgNearBlastp
 export aToB=run.$tempDb.$fishDb
 export bToA=run.$fishDb.$tempDb
 cat $aToB/out/*.tab > $aToB/all.tab
 cat $bToA/out/*.tab > $bToA/all.tab
 blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab
 hgLoadBlastTab $tempDb drBlastTab $aToB/recipBest.tab
 
 # Us vs. fly
 cd $dir/hgNearBlastp
 export aToB=run.$tempDb.$flyDb
 export bToA=run.$flyDb.$tempDb
 cat $aToB/out/*.tab > $aToB/all.tab
 cat $bToA/out/*.tab > $bToA/all.tab
 blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab
 hgLoadBlastTab $tempDb dmBlastTab $aToB/recipBest.tab
 
 # Us vs. worm
 cd $dir/hgNearBlastp
 export aToB=run.$tempDb.$wormDb
 export bToA=run.$wormDb.$tempDb
 cat $aToB/out/*.tab > $aToB/all.tab
 cat $bToA/out/*.tab > $bToA/all.tab
 blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab
 hgLoadBlastTab $tempDb ceBlastTab $aToB/recipBest.tab
 
 # Us vs. yeast
 cd $dir/hgNearBlastp
 export aToB=run.$tempDb.$yeastDb
 export bToA=run.$yeastDb.$tempDb
 cat $aToB/out/*.tab > $aToB/all.tab
 cat $bToA/out/*.tab > $bToA/all.tab
 blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab
 hgLoadBlastTab $tempDb scBlastTab $aToB/recipBest.tab
 
 # Clean up
 cd $dir/hgNearBlastp
 cat run.$tempDb.$tempDb/out/*.tab | gzip -c > run.$tempDb.$tempDb/all.tab.gz
 gzip run.*/all.tab
 
 echo "BuildBlast successfully finished"
 } > doBlast.log 2>&1