696e68b89c07237e19c04ed974d0e0ee90dbc784
lrnassar
  Wed Sep 11 13:56:30 2024 -0700
Adding the doPublicCrawl which is part of the public hub search index process into the kent tree, refs #34415

diff --git src/utils/qa/doPublicCrawl src/utils/qa/doPublicCrawl
new file mode 100755
index 0000000..90154ad
--- /dev/null
+++ src/utils/qa/doPublicCrawl
@@ -0,0 +1,59 @@
+#This first section checks to see if the RR has different URLs in the hubPublic table 
+hgsql -e "select hubUrl from hubPublic" hgcentraltest | sort -u > dev.URLs
+hgsql -h genome-centdb hgcentral -e "select hubUrl from hubPublic" | sort -u > rr.URLs
+comm -2 -3 rr.URLs dev.URLs
+check=`comm -2 -3 rr.URLs dev.URLs | wc | awk '{print $1}'`
+if [ "$check" -gt 0 ]
+  then echo;
+  echo "The URL(s) above differs in hgcentral on the RR compared with what is on hgwdev's hubPublic table."
+  echo "When there are mismatching URLs the search files about to be built will fail to work.";
+  echo;
+  echo "Please update hubPublic on dev, beta, and the RR to have the same correct URL for the above hub(s)."
+  exit 1;
+fi
+rm -f rr.URLs dev.URLs
+#This section starts the building of the index files. 
+rm publicHubCrawl.txt
+
+# Added 6/21/17 to support new public hub search mechanism
+function cleanUp {
+rm -f $workingDir/$$.jobFile
+rm -f $workingDir/$$.*.txt
+}
+trap cleanUp EXIT
+workingDir=`pwd`
+
+i=0
+# end added 6/21/17
+#
+hgsql -e "select hubUrl from hubPublic" hgcentraltest | tail -n +2  |  \
+while read url; 
+do 
+    # rm -rf /data/tmp/braney/hubCrawl
+    # hubCrawl -udcDir=/data/tmp/braney/hubCrawl $url >> publicHubCrawl.txt
+    # hubCheck -searchFile=publicHubCrawl.txt -udcDir=/data/tmp/braney/hubCrawl $url
+    # Added 6/21/17 to support new public hub search mechanism
+    printf "hubCrawl -udcDir=/hive/groups/browser/hubCrawl/udcCache $url > $$.$i.txt || true\n" >> $$.jobFile
+    ((i++))
+    # end added 6/21/17
+    #echo $url
+done
+# Added 6/21/17 to support new public hub search mechanism
+xargs -a $$.jobFile -P 25 -I % sh -c '%'
+cat $$.*.txt > publicHubCrawl.txt
+cleanUp
+#chmod -R 775 /hive/groups/browser/hubCrawl/udcCache
+#Get directory and file permissions in order before pushing
+find udcCache -type d | xargs chmod 777
+find udcCache -type f | xargs chmod 666
+hgLoadSqlTab hgcentraltest hubSearchText ~/kent/src/hg/lib/hubSearchText.sql publicHubCrawl.txt
+echo "Now ask the admins to push the /hive/groups/browser/hubCrawl/udcCache directory from here to hgnfs1,"
+echo "euro, and asia at /usr/local/apache/userdata/hgHubConnect/,"
+echo "and also ask them to push the hubSearchText table from hgcentraltest to hgcentralbeta/rr/etc."
+echo "Don't forget to test!"
+# end added 6/21/17
+# ixIxx publicHubCrawl.txt public.ix public.ixx
+
+#rm -f /gbdb/hubs/public.ix /gbdb/hubs/public.ixx
+#ln -s `pwd`/public.ix /gbdb/hubs
+#ln -s `pwd`/public.ixx /gbdb/hubs