70e4f023e0cd98a096dfe959142bbada59c4e7a9 braney Mon May 20 17:28:31 2024 -0700 ongoing work on pair browser diff --git src/hg/utils/buildPairAssembly/makePair src/hg/utils/buildPairAssembly/makePair index efa37ed..9c92f70 100755 --- src/hg/utils/buildPairAssembly/makePair +++ src/hg/utils/buildPairAssembly/makePair @@ -1,140 +1,186 @@ #!/bin/sh -ex pairRefDb=$1 pairRefSeq=$2 pairRefDb2bit=$3 pairQueryDb=$4 pairQuerySeq=$5 pairQueryDb2bit=$6 pairChain=$7 filterChain=$pairRefSeq.$pairQuerySeq.$pairChain chainFilter -t=$pairRefSeq -q=$pairQuerySeq $pairChain > $filterChain buildPairAssembly $filterChain $pairRefSeq $pairRefDb2bit $pairQuerySeq $pairQueryDb2bit out.fa query.psl target.psl map.bed dup.bed miss.bed baseTarget.bed baseQuery.bed rm -rf hub mkdir hub faToTwoBit out.fa hub/buildPair.2bit twoBitInfo hub/buildPair.2bit hub/buildPair.sizes.txt bedToBigBed -type=bed9+4 map.bed hub/buildPair.sizes.txt hub/map.bb axtChain -psl -linearGap=medium query.psl hub/buildPair.2bit $pairQueryDb2bit query.chain hgLoadChain -noBin -test hg38 bigChain query.chain sed 's/\.000000//' chain.tab | awk 'BEGIN {OFS="\t"} {print $2, $4, $5, $11, 1000, $8, $3, $6, $7, $9, $10, $1}' > query.bigChain bedToBigBed -type=bed6+6 -as=$HOME/kent/src/hg/lib/bigChain.as -tab query.bigChain hub/buildPair.sizes.txt hub/queryChain.bb awk 'BEGIN {OFS="\t"} {print $1, $2, $3, $5, $4}' link.tab | sort -k1,1 -k2,2n > bigChain.bigLink bedToBigBed -type=bed4+1 -as=$HOME/kent/src/hg/lib/bigLink.as -tab bigChain.bigLink hub/buildPair.sizes.txt hub/queryChain.link.bb axtChain -psl -linearGap=medium target.psl hub/buildPair.2bit $pairRefDb2bit target.chain hgLoadChain -noBin -test hg38 bigChain target.chain sed 's/\.000000//' chain.tab | awk 'BEGIN {OFS="\t"} {print $2, $4, $5, $11, 1000, $8, $3, $6, $7, $9, $10, $1}' > targetChain.bigChain bedToBigBed -type=bed6+6 -as=$HOME/kent/src/hg/lib/bigChain.as -tab targetChain.bigChain hub/buildPair.sizes.txt hub/targetChain.bb awk 'BEGIN {OFS="\t"} {print $1, $2, $3, $5, $4}' link.tab | sort -k1,1 -k2,2n > bigChain.bigLink bedToBigBed -type=bed4+1 -as=$HOME/kent/src/hg/lib/bigLink.as -tab bigChain.bigLink hub/buildPair.sizes.txt hub/targetChain.link.bb chainSwap query.chain swap.query.chain chainSwap target.chain swap.target.chain awk '{print $1,$2,$3,$7}' dup.bed | liftOver -minMatch=0.001 stdin swap.query.chain tmp foo awk '{print $4,$5,$6,$7}' dup.bed | liftOver -minMatch=0.001 stdin swap.target.chain tmp2 foo sort tmp tmp2 > tmp3 bedToBigBed -type=bed4 -tab tmp3 hub/buildPair.sizes.txt hub/dups.bb bedToBigBed -type=bed4 miss.bed hub/buildPair.sizes.txt hub/mismatch.bb bedToBigBed -type=bed4+1 baseTarget.bed hub/buildPair.sizes.txt hub/baseTarget.bb genome=$pairQueryDb gcX="${genome:0:3}" d0="${genome:4:3}" d1="${genome:7:3}" d2="${genome:10:3}" tmp=`cd /usr/local/apache/htdocs/hubs; echo $gcX/$d0/$d1/$d2/$genome/bbi/*ens*.bb ` ensGeneBb=https://hgdownload.soe.ucsc.edu/hubs/$tmp tmp=`cd /usr/local/apache/htdocs/hubs; echo $gcX/$d0/$d1/$d2/$genome/bbi/*rmsk.bb ` rmskBb=https://hgdownload.soe.ucsc.edu/hubs/$tmp +tmp=`cd /usr/local/apache/htdocs/hubs; echo $gcX/$d0/$d1/$d2/$genome/bbi/*simpleRepeat.bb ` +simpleBb=https://hgdownload.soe.ucsc.edu/hubs/$tmp +cat << _EOF_ > hub/groups.txt +name map +label Mapping +priority 2 +defaultIsClosed 0 + +name target +label From $pairRefDb +priority 3 +defaultIsClosed 0 + +name query +label From $pairQueryDb +priority 3 +defaultIsClosed 0 +_EOF_ + cat << _EOF_ > hub/hub.txt hub buildPair longLabel buildPair email braney@ucsc.edu shortLabel buildPair useOneFile on -genome buildPair -defaultPos buildPair:1-2 -organism buildPair +genome $pairRefSeq-$pairQuerySeq +defaultPos buildPair:1-20000 +organism $pairRefDb-$pairQueryDb twoBitPath buildPair.2bit +groups groups.txt track map type bigBed 9 bigDataUrl map.bb -shortLabel map -longLabel map +shortLabel Origin +longLabel Origin itemRgb on +group map track queryChain type bigChain $pairQueryDb bigDataUrl queryChain.bb -shortLabel queryChain -longLabel queryChain +shortLabel $pairQueryDb Alignment +longLabel $pairQueryDb Alignment +group map track targetChain type bigChain $pairRefDb bigDataUrl targetChain.bb -shortLabel targetChain -longLabel targetChain +shortLabel $pairRefDb Alignment +longLabel $pairRefDb Alignment +group map track ncbiRefSeqInsta searchIndex name color 12,12,120 altColor 120,12,12 shortLabel RefSeq All type bigGenePred bigDataUrl /gbdb/hg38/ncbiRefSeq/ncbiRefSeq.bb longLabel NCBI RefSeq genes, curated and predicted sets (NM_*, XM_*, NR_*, XR_*, NP_* or YP_*) baseColorUseCds given baseColorDefault genomicCodons priority 1 quickLiftUrl targetChain.bb canPack 1 +group target -track dups +track qdups type bigBed 4 bigDataUrl dups.bb -shortLabel dups -longLabel dups +shortLabel $pairQueryDb Dups +longLabel $pairQueryDb Dups +group map track baseTarget type bigBed 4 bigDataUrl baseTarget.bb shortLabel baseTarget longLabel baseTarget track mismatch type bigBed 4 bigDataUrl mismatch.bb -shortLabel mismatch -longLabel mismatch - -track rmsk -type bigBed -shortLabel Query Rmsk -longLabel Query Rmsk +shortLabel Mismatches +longLabel Mismatches between $pairRefDb and $pairQueryDb +group map + +#track trmsk +#type bigBed 9 +#itemRgb on +#shortLabel Rmsk +#longLabel $pairRefDb Rmsk +#bigDataUrl $rmskBb +#quickLiftUrl targetChain.bb +#group target + +track qrmsk +type bigBed 9 +itemRgb on +shortLabel Rmsk +longLabel $pairQueryDb Rmsk bigDataUrl $rmskBb quickLiftUrl queryChain.bb +group query + +track qsimple +type bigBed 12 +shortLabel simpleRepeat +longLabel $pairQueryDb simpleRepeat +bigDataUrl $simpleBb +quickLiftUrl queryChain.bb +group query track ensGeneQuery type bigGenePred -shortLabel Query Ensembl -longLabel Query Ensembl +shortLabel Ensembl +longLabel Ensembl bigDataUrl $ensGeneBb quickLiftUrl queryChain.bb +group query _EOF_