997f530b3abf1e33b636be6255a30243205796d1 mspeir Tue Nov 22 13:22:59 2016 -0800 Adding new script that scrapes public hubs on the RR for contact emails and places them on a genecats webpage, refs #18316 diff --git src/utils/qa/getPubHubContact.sh src/utils/qa/getPubHubContact.sh new file mode 100755 index 0000000..3a97a2a --- /dev/null +++ src/utils/qa/getPubHubContact.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +base="/usr/local/apache/htdocs-genecats/qa/test-results/publicHubContactInfo" +contactFile="$base/publicHubContact.html" + +# Check if old contact file exists and if it does move it to archive file +if [ -e $contactFile ] +then + mv $contactFile $contactFile.old +fi + +# Make header for html file +echo -e " + +" >> $contactFile + +while read url label +do + # Save hub shortLabel as a file so we can use it in a filename later + label=$(echo $label | sed -e 's/ /_/g') + + # Make name for hub.txt output file that includes hub shortLabel + hubFile="$base/hubFiles/$label.hub.txt" + wget -t 5 -O $hubFile $url &> /dev/null + + # Extract email from hub.txt file we saved + email=$(egrep "^email" $hubFile) + + # If email is empty, then wget failed or hub is down + if [[ $email == "" ]] + then + # Attempt to get hub.txt file w/ curl + curl --retry 5 $url -o $hubFile &> /dev/null + # Extract email from hub.txt file we saved + email=$(grep "^email" $hubFile) + + # If email is still empty, hub is likely down and + # we want to use the last email we have as contact email + if [[ $email == "" ]] && [ -e $contactFile.old ] + then + email=$(grep "$url" $contactFile.old | awk '{print $4" "$5}') + fi + fi + + # Create hyperlinks to hub.txt files + hubLink="$label" + + # Write contact email + hubUrl to file + echo -e "$hubLink $email
" >> $contactFile + +done<<<"$(hgsql -h genome-centdb -Ne "select hubUrl,shortLabel from hubPublic" hgcentral)"