66b1030888d2fbc504ed737350256b20442367c7 braney Fri Sep 3 10:43:22 2021 -0700 ongoing work on knownGene automation diff --git src/hg/utils/otto/knownGene/buildLoadOther.sh src/hg/utils/otto/knownGene/buildLoadOther.sh index e3dafda..8384b15 100755 --- src/hg/utils/otto/knownGene/buildLoadOther.sh +++ src/hg/utils/otto/knownGene/buildLoadOther.sh @@ -1,123 +1,32 @@ #!/bin/sh -ex cd $dir { -# Run nice Perl script to make all protein blast runs for -# Gene Sorter and Known Genes details page. Takes about -# 45 minutes to run. -rm -rf $dir/hgNearBlastp -mkdir $dir/hgNearBlastp -cd $dir/hgNearBlastp -# make sure all the fasta is there -ls -l $tempFa -ls -l $xdbFa -ls -l $ratFa -ls -l $fishFa -ls -l $flyFa -ls -l $wormFa -ls -l $yeastFa - -( cat << ThisEnd -# Latest human vs. other Gene Sorter orgs: -# mouse, rat, zebrafish, worm, yeast, fly - -targetGenesetPrefix known -targetDb $tempDb -queryDbs $xdb $ratDb $fishDb $flyDb $wormDb $yeastDb - -${tempDb}Fa $tempFa -${xdb}Fa $xdbFa -${ratDb}Fa $ratFa -${fishDb}Fa $fishFa -${flyDb}Fa $flyFa -${wormDb}Fa $wormFa -${yeastDb}Fa $yeastFa - -buildDir $dir/hgNearBlastp -scratchDir $scratchDir/brHgNearBlastp -ThisEnd -) > config.ra - -rm -rf $scratchDir/brHgNearBlastp -doHgNearBlastp.pl -noLoad -clusterHub=ku -distrHost=hgwdev -dbHost=hgwdev -workhorse=hgwdev config.ra - -# Load self -cd $dir/hgNearBlastp/run.$tempDb.$tempDb -# builds knownBlastTab -./loadPairwise.csh - -# Load human and rat -cd $dir/hgNearBlastp/run.$tempDb.$xdb -hgLoadBlastTab $tempDb $xBlastTab -maxPer=1 out/*.tab -cd $dir/hgNearBlastp/run.$tempDb.$ratDb -hgLoadBlastTab $tempDb $rnBlastTab -maxPer=1 out/*.tab - -# Remove non-syntenic hits for human and rat -# Takes a few minutes -mkdir -p /gbdb/$tempDb/liftOver -rm -f /gbdb/$tempDb/liftOver/${tempDb}To$RatDb.over.chain.gz /gbdb/$tempDb/liftOver/${tempDb}To$Xdb.over.chain.gz -ln -s $genomes/$db/bed/liftOver/${db}To$RatDb.over.chain.gz \ - /gbdb/$tempDb/liftOver/${tempDb}To$RatDb.over.chain.gz -ln -s $genomes/$db/bed/liftOver/${db}To${Xdb}.over.chain.gz \ - /gbdb/$tempDb/liftOver/${tempDb}To$Xdb.over.chain.gz - -# delete non-syntenic genes from rat and mouse blastp tables -cd $dir/hgNearBlastp -synBlastp.csh $tempDb $xdb -# old number of unique query values: 62743 -# old number of unique target values 27998 -# new number of unique query values: 54818 -# new number of unique target values 26309 - -synBlastp.csh $tempDb $ratDb knownGene ensGene -# old number of unique query values: 63123 -# old number of unique target values 21163 -# new number of unique query values: 57012 -# new number of unique target values 20298 - -# Make reciprocal best subset for the blastp pairs that are too -# Far for synteny to help - -# Us vs. fish -cd $dir/hgNearBlastp -export aToB=run.$tempDb.$fishDb -export bToA=run.$fishDb.$tempDb -cat $aToB/out/*.tab > $aToB/all.tab -cat $bToA/out/*.tab > $bToA/all.tab -blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab -hgLoadBlastTab $tempDb drBlastTab $aToB/recipBest.tab +# # Finally, need to wait until after testing, but update databases in other organisms +# with blastTabs -# Us vs. fly +# Load blastTabs cd $dir/hgNearBlastp -export aToB=run.$tempDb.$flyDb -export bToA=run.$flyDb.$tempDb -cat $aToB/out/*.tab > $aToB/all.tab -cat $bToA/out/*.tab > $bToA/all.tab -blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab -hgLoadBlastTab $tempDb dmBlastTab $aToB/recipBest.tab - -# Us vs. worm -cd $dir/hgNearBlastp -export aToB=run.$tempDb.$wormDb -export bToA=run.$wormDb.$tempDb -cat $aToB/out/*.tab > $aToB/all.tab -cat $bToA/out/*.tab > $bToA/all.tab -blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab -hgLoadBlastTab $tempDb ceBlastTab $aToB/recipBest.tab - -# Us vs. yeast -cd $dir/hgNearBlastp -export aToB=run.$tempDb.$yeastDb -export bToA=run.$yeastDb.$tempDb -cat $aToB/out/*.tab > $aToB/all.tab -cat $bToA/out/*.tab > $bToA/all.tab -blastRecipBest $aToB/all.tab $bToA/all.tab $aToB/recipBest.tab $bToA/recipBest.tab -hgLoadBlastTab $tempDb scBlastTab $aToB/recipBest.tab +hgLoadBlastTab $xdb $blastTab run.$xdb.$tempDb/out/*.tab +echo Loaded $xdb.$blastTab +hgLoadBlastTab $ratDb $blastTab run.$ratDb.$tempDb/out/*.tab +echo Loaded $ratDb.$blastTab +hgLoadBlastTab $flyDb $blastTab run.$flyDb.$tempDb/recipBest.tab +echo Loaded $flyDb.$blastTab +hgLoadBlastTab $wormDb $blastTab run.$wormDb.$tempDb/recipBest.tab +echo Loaded $wormDb.$blastTab +hgLoadBlastTab $yeastDb $blastTab run.$yeastDb.$tempDb/recipBest.tab +echo Loaded $yeastDb.$blastTab +hgLoadBlastTab $fishDb $blastTab run.$fishDb.$tempDb/recipBest.tab +echo Loaded $fishDb.$blastTab + +# Do synteny on mouse/human/rat +synBlastp.csh $xdb $db + +synBlastp.csh $ratDb $db ensGene knownGene # Clean up -cd $dir/hgNearBlastp -cat run.$tempDb.$tempDb/out/*.tab | gzip -c > run.$tempDb.$tempDb/all.tab.gz -gzip run.*/all.tab +#rm -r run.*/out -echo "BuildBlast successfully finished" -} > doBlast.log 2>&1 +echo "LoadOther successfully finished" +} > doLoadOther.log 2>&1