6fe242bade666e4fcc0b23d6da503633727a6863
braney
  Wed Aug 14 11:39:42 2019 -0700
adding instructions to build knownToMupit

diff --git src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes19.sh src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes19.sh
index 169592f..b5b973a 100755
--- src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes19.sh
+++ src/hg/makeDb/doc/ucscGenes/hg38.ucscGenes19.sh
@@ -1001,15 +1001,40 @@
 $cd ..
 # hgMapToGene -exclude=abGenes.txt -tempDb=$tempDb $db ensGene knownGene knownToEnsembl -noLoad
 #awk '{print $2,$1}' ../knownToEnsembl.tab | sort | uniq > ensTransUcsc.tab
 $hgsql $db -e "select value,name from knownToEnsembl" | sort | uniq > ensTransUcsc.tab
 $echo "select transcript,protein from ensGtp" | hgsql hg38 | sort | uniq | awk '{if (NF==2) print}'  > ensTransProt.tab
 $join ensTransUcsc.tab ensTransProt.tab | awk '{if (NF==3)print $3, $2}' | sort | uniq  > ensProtToUc.tab
 $join ensProtToUc.tab ensToTreefam.tab | sort -u | awk 'BEGIN {OFS="\t"} {print $2,$3}' | sort -u > knownToTreefam.tab
 $hgLoadSqlTab $tempDb knownToTreefam $kent/src/hg/lib/knownTo.sql knownToTreefam.tab
 #end section not done
 
 # make bigKnownGene.bb
 cd $dir
 makeBigKnown hg38
 rm -f /gbdb/hg38/knownGene29.bb
 ln -s `pwd`/hg38.knownGene.bb /gbdb/hg38/knownGene29.bb
+
+# Build knownToMupit
+
+mkdir mupit
+cd mupit
+
+# mupit-pdbids.txt was emailed from Kyle Moad (kmoad@insilico.us.com)
+# wc -l mupit-pdbids.txt
+for db in "hg38" "hg19" "hg18"; do \
+    # get knownGene IDs and associated PDB IDS
+    # the extDb{Ref} parts come from hg/hgGene/domains.c:domainsPrint()
+    hgsql -Ne "select kgID, extAcc1 from $db.kgXref x \
+        inner join sp180404.extDbRef sp on x.spID = sp.acc \
+        inner join sp180404.extDb e on sp.extDb=e.id \
+        where x.spID != '' and e.val='PDB' order by kgID" \
+        > $db.knownToPdb.txt;
+    # filter out pdbIds not found in mupit
+    cat mupit-pdbids.txt | tr '[a-z]' '[A-Z]' | \
+        grep -Fwf - $db.knownToPdb.txt >  $db.knownToMupit.txt;
+    # check that it filtered correctly:
+    # cut -f2 $db.knownToMuipit.txt | sort -u | wc -l;
+    # load new table for hgGene/hgc
+    hgLoadSqlTab $db knownToMupit ~/kent/src/hg/lib/knownTo.sql $db.knownToMupit.txt
+done
+