src/hg/near/makeNear.doc 1.42
1.42 2009/10/16 17:35:33 kent
Adding update procedure for hg19 P2P tracks.
Index: src/hg/near/makeNear.doc
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/near/makeNear.doc,v
retrieving revision 1.41
retrieving revision 1.42
diff -b -B -U 4 -r1.41 -r1.42
--- src/hg/near/makeNear.doc 14 Oct 2009 23:37:40 -0000 1.41
+++ src/hg/near/makeNear.doc 16 Oct 2009 17:35:33 -0000 1.42
@@ -267,9 +267,9 @@
#vidal
cat nature04209-s17.xls | gawk '{print $1 "\t" $3 "\t" "1.0"}' > humanVidal.p2p
-hgNetDist humanVidal.p2p hg18 humanVidalP2P -threshold=2 -sqlRemap="select distinct locusLinkID, kgID from refLink, kgXref where refLink.mrnaAcc = kgXref.mRNA"
+hgLoadNetDist humanVidal.p2p hg18 humanVidalP2P -threshold=2 -sqlRemap="select distinct locusLinkID, kgID from refLink, kgXref where refLink.mrnaAcc = kgXref.mRNA"
#Added to hgNearData/Human/hg18/columnDb.ra
#-------------
name vidalP2p
@@ -432,38 +432,52 @@
---
#----------------------------------------------------------
-## (kg3 hg18 upgrade done galt 2007-03-29)
-# kg3 hg18 upgrade of Human p2p Protein-to-protein network - P2P columns
-# I used the hgNetDist program to calculate network-distances for all gene pairs from
-# the interaction.p2p files and created *.pathLengths files as output.
-# These were then read by hgLoadNetDist to remap the ids and create the
-# actual mysql tables used. Note that hgLoadNetDist was split off as a
-# separate program from hgNetDist to make the planned incremental kg3 upgrades
-# much easier and faster, since only the hgLoadNetDist has to be run
-# once the pathLengths files have been created.
+## (kent 2009-10-16)
+# HPRD p2p update used in hg19
+
+# First go to http://www.hprd.org, follow the download link, fill in the information they
+# request for academic users, and download HPRD_SINGLE_PSIMI_070609.xml.tar.gz into
+# /hive/data/outside/hprd/070609, and then unpack it with
+ cd /hive/data/outside/hprd/070609
+ tar -zxvf HPRD_SINGLE_PSIMI_070609.xml.tar.gz
+
+# Now run the hprdXmlToTab program, which was largely generated by autoDtd/autoXml.
+ hprdXmlToTab HPRD_SINGLE_PSIMI_070609.xml p2p.tab complex.tab
+# interaction count = 40075
+
+# Now use hgNetDist to generate pathLengthrs file. This takes an hour or two.
+ hgNetDist -verbose=2 -weighted -threshold=2 p2p.tab hprd.pathLengths
+
+#----------------------------------------------------------
+
+## (kg3 hg19 upgrade done kent 2009-10-13)
+# kg3 hg19 creation of Human p2p Protein-to-protein network - P2P columns
+# Note could just reuse the pathLengths files calculated in the hg18 build, since
+# these don't depend on an assembly.
+
+#Copy in from hg18 database
+ cp /hive/data/genomes/hg18/p2p /hive/data/genomes/hg19
#hprd
- hgNetDist -verbose=2 -weighted -threshold=2 \
- /cluster/data/hg18/p2p/hprd/hprd.p2p \
- /cluster/data/hg18/p2p/hprd/hprd.pathLengths
- hgLoadNetDist /cluster/data/hg18/p2p/hprd/hprd.pathLengths hg18 humanHprdP2P \
+ hgLoadNetDist /hive/data/outside/hprd/070609/hprd.pathLengths hg19 humanHprdP2P \
-sqlRemap="select distinct value, name from knownToHprd"
+ # hgLoadNetDist 86 id-remapping misses, see missing.tab
+
#vidal
- hgNetDist -verbose=2 -skipFirst -threshold=2 \
- /cluster/data/hg18/p2p/vidal/humanVidal.p2p \
- /cluster/data/hg18/p2p/vidal/humanVidal.pathLengths
- hgLoadNetDist /cluster/data/hg18/p2p/vidal/humanVidal.pathLengths hg18 humanVidalP2P \
+ hgLoadNetDist /hive/data/genomes/hg19/p2p/vidal/humanVidal.pathLengths hg19 humanVidalP2P \
-sqlRemap="select distinct locusLinkID, kgID from refLink, kgXref where refLink.mrnaAcc = kgXref.mRNA"
+ # hgLoadNetDist 22 id-remapping misses, see missing.tab
+
#wanker
- hgNetDist -verbose=2 -skipFirst -threshold=2 \
- /cluster/data/hg18/p2p/wanker/humanWanker.p2p \
- /cluster/data/hg18/p2p/wanker/humanWanker.pathLengths
- hgLoadNetDist /cluster/data/hg18/p2p/wanker/humanWanker.pathLengths hg18 humanWankerP2P \
+ hgLoadNetDist /hive/data/genomes/hg19/p2p/wanker/humanWanker.pathLengths hg19 humanWankerP2P \
-sqlRemap="select distinct locusLinkID, kgID from refLink, kgXref where refLink.mrnaAcc = kgXref.mRNA"
+ # hgLoadNetDist 54 id-remapping misses, see missing.tab
+
+
###############################################################
# Affy All Exon GeneSorter column. (DONE Andy, 2008-03-17)
# NOTE - in future doing this in genome database rather than