6fe242bade666e4fcc0b23d6da503633727a6863 braney Wed Aug 14 11:39:42 2019 -0700 adding instructions to build knownToMupit diff --git src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes19.sh src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes19.sh index 169592f..b5b973a 100755 --- src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes19.sh +++ src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes19.sh @@ -1001,15 +1001,40 @@ $cd .. # hgMapToGene -exclude=abGenes.txt -tempDb=$tempDb $db ensGene knownGene knownToEnsembl -noLoad #awk '{print $2,$1}' ../knownToEnsembl.tab | sort | uniq > ensTransUcsc.tab $hgsql $db -e "select value,name from knownToEnsembl" | sort | uniq > ensTransUcsc.tab $echo "select transcript,protein from ensGtp" | hgsql hg38 | sort | uniq | awk '{if (NF==2) print}' > ensTransProt.tab $join ensTransUcsc.tab ensTransProt.tab | awk '{if (NF==3)print $3, $2}' | sort | uniq > ensProtToUc.tab $join ensProtToUc.tab ensToTreefam.tab | sort -u | awk 'BEGIN {OFS="\t"} {print $2,$3}' | sort -u > knownToTreefam.tab $hgLoadSqlTab $tempDb knownToTreefam $kent/src/hg/lib/knownTo.sql knownToTreefam.tab #end section not done # make bigKnownGene.bb cd $dir makeBigKnown hg38 rm -f /gbdb/hg38/knownGene29.bb ln -s `pwd`/hg38.knownGene.bb /gbdb/hg38/knownGene29.bb + +# Build knownToMupit + +mkdir mupit +cd mupit + +# mupit-pdbids.txt was emailed from Kyle Moad (kmoad@insilico.us.com) +# wc -l mupit-pdbids.txt +for db in "hg38" "hg19" "hg18"; do \ + # get knownGene IDs and associated PDB IDS + # the extDb{Ref} parts come from hg/hgGene/domains.c:domainsPrint() + hgsql -Ne "select kgID, extAcc1 from $db.kgXref x \ + inner join sp180404.extDbRef sp on x.spID = sp.acc \ + inner join sp180404.extDb e on sp.extDb=e.id \ + where x.spID != '' and e.val='PDB' order by kgID" \ + > $db.knownToPdb.txt; + # filter out pdbIds not found in mupit + cat mupit-pdbids.txt | tr '[a-z]' '[A-Z]' | \ + grep -Fwf - $db.knownToPdb.txt > $db.knownToMupit.txt; + # check that it filtered correctly: + # cut -f2 $db.knownToMuipit.txt | sort -u | wc -l; + # load new table for hgGene/hgc + hgLoadSqlTab $db knownToMupit ~/kent/src/hg/lib/knownTo.sql $db.knownToMupit.txt +done +