997f530b3abf1e33b636be6255a30243205796d1
mspeir
Tue Nov 22 13:22:59 2016 -0800
Adding new script that scrapes public hubs on the RR for contact emails and places them on a genecats webpage, refs #18316
diff --git src/utils/qa/getPubHubContact.sh src/utils/qa/getPubHubContact.sh
new file mode 100755
index 0000000..3a97a2a
--- /dev/null
+++ src/utils/qa/getPubHubContact.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+base="/usr/local/apache/htdocs-genecats/qa/test-results/publicHubContactInfo"
+contactFile="$base/publicHubContact.html"
+
+# Check if old contact file exists and if it does move it to archive file
+if [ -e $contactFile ]
+then
+ mv $contactFile $contactFile.old
+fi
+
+# Make header for html file
+echo -e "
+
+" >> $contactFile
+
+while read url label
+do
+ # Save hub shortLabel as a file so we can use it in a filename later
+ label=$(echo $label | sed -e 's/ /_/g')
+
+ # Make name for hub.txt output file that includes hub shortLabel
+ hubFile="$base/hubFiles/$label.hub.txt"
+ wget -t 5 -O $hubFile $url &> /dev/null
+
+ # Extract email from hub.txt file we saved
+ email=$(egrep "^email" $hubFile)
+
+ # If email is empty, then wget failed or hub is down
+ if [[ $email == "" ]]
+ then
+ # Attempt to get hub.txt file w/ curl
+ curl --retry 5 $url -o $hubFile &> /dev/null
+ # Extract email from hub.txt file we saved
+ email=$(grep "^email" $hubFile)
+
+ # If email is still empty, hub is likely down and
+ # we want to use the last email we have as contact email
+ if [[ $email == "" ]] && [ -e $contactFile.old ]
+ then
+ email=$(grep "$url" $contactFile.old | awk '{print $4" "$5}')
+ fi
+ fi
+
+ # Create hyperlinks to hub.txt files
+ hubLink="$label"
+
+ # Write contact email + hubUrl to file
+ echo -e "$hubLink $email
" >> $contactFile
+
+done<<<"$(hgsql -h genome-centdb -Ne "select hubUrl,shortLabel from hubPublic" hgcentral)"