66b1030888d2fbc504ed737350256b20442367c7
braney
  Fri Sep 3 10:43:22 2021 -0700
ongoing work on knownGene automation

diff --git src/hg/utils/otto/knownGene/buildLoadOther.sh src/hg/utils/otto/knownGene/buildLoadOther.sh
index e3dafda..8384b15 100755
--- src/hg/utils/otto/knownGene/buildLoadOther.sh
+++ src/hg/utils/otto/knownGene/buildLoadOther.sh
@@ -1,123 +1,32 @@
 #!/bin/sh -ex
 cd $dir
 
 {
-# Run nice Perl script to make all protein blast runs for
-# Gene Sorter and Known Genes details page.  Takes about
-# 45 minutes to run.
-rm -rf   $dir/hgNearBlastp
-mkdir  $dir/hgNearBlastp
-cd $dir/hgNearBlastp
-# make sure all the fasta is there
-ls -l $tempFa
-ls -l $xdbFa
-ls -l $ratFa
-ls -l $fishFa
-ls -l $flyFa
-ls -l $wormFa
-ls -l $yeastFa
-
-( cat << ThisEnd
-# Latest human vs. other Gene Sorter orgs:
-# mouse, rat, zebrafish, worm, yeast, fly
-
-targetGenesetPrefix known
-targetDb $tempDb
-queryDbs $xdb $ratDb $fishDb $flyDb $wormDb $yeastDb
-
-${tempDb}Fa $tempFa
-${xdb}Fa $xdbFa
-${ratDb}Fa $ratFa
-${fishDb}Fa $fishFa
-${flyDb}Fa $flyFa
-${wormDb}Fa $wormFa
-${yeastDb}Fa $yeastFa
-
-buildDir $dir/hgNearBlastp
-scratchDir $scratchDir/brHgNearBlastp
-ThisEnd
-) > config.ra
-
-rm -rf  $scratchDir/brHgNearBlastp
-doHgNearBlastp.pl -noLoad -clusterHub=ku -distrHost=hgwdev -dbHost=hgwdev -workhorse=hgwdev config.ra 
-
-# Load self
-cd $dir/hgNearBlastp/run.$tempDb.$tempDb
-# builds knownBlastTab
-./loadPairwise.csh
-
-# Load human and rat
-cd $dir/hgNearBlastp/run.$tempDb.$xdb
-hgLoadBlastTab $tempDb $xBlastTab -maxPer=1 out/*.tab
-cd $dir/hgNearBlastp/run.$tempDb.$ratDb
-hgLoadBlastTab $tempDb $rnBlastTab -maxPer=1 out/*.tab
-
-# Remove non-syntenic hits for human and rat
-# Takes a few minutes
-mkdir -p /gbdb/$tempDb/liftOver
-rm -f /gbdb/$tempDb/liftOver/${tempDb}To$RatDb.over.chain.gz /gbdb/$tempDb/liftOver/${tempDb}To$Xdb.over.chain.gz
-ln -s $genomes/$db/bed/liftOver/${db}To$RatDb.over.chain.gz \
-    /gbdb/$tempDb/liftOver/${tempDb}To$RatDb.over.chain.gz
-ln -s $genomes/$db/bed/liftOver/${db}To${Xdb}.over.chain.gz \
-    /gbdb/$tempDb/liftOver/${tempDb}To$Xdb.over.chain.gz
-
-# delete non-syntenic genes from rat and mouse blastp tables
-cd $dir/hgNearBlastp
-synBlastp.csh $tempDb $xdb
-# old number of unique query values: 62743
-# old number of unique target values 27998
-# new number of unique query values: 54818
-# new number of unique target values 26309
-
-synBlastp.csh $tempDb $ratDb knownGene ensGene
-# old number of unique query values: 63123
-# old number of unique target values 21163
-# new number of unique query values: 57012
-# new number of unique target values 20298
-
-# Make reciprocal best subset for the blastp pairs that are too
-# Far for synteny to help
-
-# Us vs. fish
-cd $dir/hgNearBlastp
-export aToB=run.$tempDb.$fishDb
-export bToA=run.$fishDb.$tempDb
-cat $aToB/out/*.tab > $aToB/all.tab
-cat $bToA/out/*.tab > $bToA/all.tab
-blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab
-hgLoadBlastTab $tempDb drBlastTab $aToB/recipBest.tab
+# # Finally, need to wait until after testing, but update databases in other organisms
+# with blastTabs
 
-# Us vs. fly
+# Load blastTabs
 cd $dir/hgNearBlastp
-export aToB=run.$tempDb.$flyDb
-export bToA=run.$flyDb.$tempDb
-cat $aToB/out/*.tab > $aToB/all.tab
-cat $bToA/out/*.tab > $bToA/all.tab
-blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab
-hgLoadBlastTab $tempDb dmBlastTab $aToB/recipBest.tab
-
-# Us vs. worm
-cd $dir/hgNearBlastp
-export aToB=run.$tempDb.$wormDb
-export bToA=run.$wormDb.$tempDb
-cat $aToB/out/*.tab > $aToB/all.tab
-cat $bToA/out/*.tab > $bToA/all.tab
-blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab
-hgLoadBlastTab $tempDb ceBlastTab $aToB/recipBest.tab
-
-# Us vs. yeast
-cd $dir/hgNearBlastp
-export aToB=run.$tempDb.$yeastDb
-export bToA=run.$yeastDb.$tempDb
-cat $aToB/out/*.tab > $aToB/all.tab
-cat $bToA/out/*.tab > $bToA/all.tab
-blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab
-hgLoadBlastTab $tempDb scBlastTab $aToB/recipBest.tab
+hgLoadBlastTab $xdb $blastTab run.$xdb.$tempDb/out/*.tab
+echo Loaded $xdb.$blastTab
+hgLoadBlastTab $ratDb $blastTab run.$ratDb.$tempDb/out/*.tab
+echo Loaded $ratDb.$blastTab
+hgLoadBlastTab $flyDb $blastTab run.$flyDb.$tempDb/recipBest.tab
+echo Loaded $flyDb.$blastTab
+hgLoadBlastTab $wormDb $blastTab run.$wormDb.$tempDb/recipBest.tab
+echo Loaded $wormDb.$blastTab
+hgLoadBlastTab $yeastDb $blastTab run.$yeastDb.$tempDb/recipBest.tab
+echo Loaded $yeastDb.$blastTab
+hgLoadBlastTab $fishDb $blastTab run.$fishDb.$tempDb/recipBest.tab
+echo Loaded $fishDb.$blastTab
+
+# Do synteny on mouse/human/rat
+synBlastp.csh $xdb $db
+
+synBlastp.csh $ratDb $db ensGene knownGene
 
 # Clean up
-cd $dir/hgNearBlastp
-cat run.$tempDb.$tempDb/out/*.tab | gzip -c > run.$tempDb.$tempDb/all.tab.gz
-gzip run.*/all.tab
+#rm -r run.*/out
 
-echo "BuildBlast successfully finished"
-} > doBlast.log 2>&1
+echo "LoadOther successfully finished"
+} > doLoadOther.log 2>&1