57de18ab529e4d49f08d9e9ca5e937ce2b1ceec6
kuhn
  Thu Oct 17 12:28:47 2013 -0700
working with Matt.  removed checking of local ucsc links, esp topbar pulldowns.  checking links one-by-one instead of all at once (was using checkLinks option on htmlCheck) so we can fix timeout on slow links later.  now checking on beta if RR is specified, so it doesn't hammer RR unnecessarily.  renamed some variables so they are easier to understand when reading script.  dropped hgw7&8 from RR list.  merged description.html into tables list, even though that's a different cgi.  refs #11809
diff --git src/utils/qa/checkTrackUiLinks.csh src/utils/qa/checkTrackUiLinks.csh
index 36e5134..e7f378e 100755
--- src/utils/qa/checkTrackUiLinks.csh
+++ src/utils/qa/checkTrackUiLinks.csh
@@ -3,37 +3,39 @@
 
 ################################
 #  
 #  01-24-07
 #  Robert Kuhn
 #
 #  checks all links on trackUi pages for a track
 #
 ################################
 
 onintr cleanup
 
 set tableinput=""
 set tables=""
 set machine="hgwbeta"
+set host=""
 set rr="false"
 set baseUrl=""
 set target=""
+set cgi=""
 set hgsid=""
 set db=""
-set errorCount=0
-set totalCount=0
+set pgsWErrors=0
+set totalPgs=0
 
 if ( $#argv < 2 || $#argv > 3 ) then
   echo
   echo "  checks all links on trackUi pages for a track."
   echo
   echo "    usage:  database tablelist [machine]"
   echo '           tablelist may also be single table or "all"'
   echo "           machine defaults to hgwbeta"
   echo
   echo '    note: includes assembly description.html page if "all"'
   echo
   exit
 else
   set db=$argv[1]
   set tableinput=$argv[2]
@@ -67,88 +69,81 @@
 endif
 
 # check if it is a file or a tablename and set list
 file $tableinput | egrep "ASCII text" > /dev/null
 if (! $status) then
   set tables=`cat $tableinput`
 else
   set tables=$tableinput
 endif
 
 # set hgsid so don't fill up sessionDb table
 set baseUrl="http://$machine.cse.ucsc.edu"
 set hgsid=`htmlCheck  getVars $baseUrl/cgi-bin/hgGateway | grep hgsid \
   | head -1 | awk '{print $4}'`
 
-echo "hgw1 hgw2 hgw3 hgw4 hgw5 hgw6 hgw7 hgw8 " | grep $machine
+echo "hgw1 hgw2 hgw3 hgw4 hgw5 hgw6" | grep $machine
 if ( $status ) then 
   set rr="true"
 endif
 
-# process "all" choice
+# process descriptions page for "all" choice
 if ( "all" == $tableinput ) then
-  set tables=`getField.csh $db trackDb tableName $machine \
-     | grep -v tableName`
-  set target="$baseUrl/cgi-bin/hgGateway?hgsid=$hgsid&db=$db"
-  # check description page if doing all of an assembly
-  htmlCheck checkLinks "$target" >& error$$
-  if ( `wc -w error$$ | awk '{print $1}'` != 0 ) then
-    echo
-    echo "description.html page:"
-    echo "======================"
-    cat error$$
-    @ errorCount = $errorCount + 1
+  if ( "hgwdev" == $machine ) then
+    set tables=`hgsql -Ne "SELECT tableName FROM trackDb WHERE html != ''" $db`
+  else
+    set tables=`hgsql -h $sqlbeta -Ne "SELECT tableName FROM trackDb WHERE html != ''" $db`
+    set host="-h $sqlbeta"
   endif
-  @ totalCount = $totalCount + 1
-  rm -f error$$
+  set tables="description.html $tables"
 endif
 
+# check links on all table.html pages 
 foreach table ( $tables )
-  # check to see if the table exists on the machine
-  getField.csh $db trackDb tableName $machine | grep -w $table > /dev/null
-  if ( $status ) then
-    echo "no such track"
-    continue
-  endif
-  set target="$baseUrl/cgi-bin/hgTrackUi?hgsid=$hgsid&db=$db&g=$table"
-  htmlCheck checkLinks "$target" >& error$$
-  # trap internal same-page anchors and discard
-  cat error$$ | egrep -v "doesn't exist" > error2$$
-  mv error2$$ error$$
-  # slow it down if hitting the RR
-  if ( "true" == $rr ) then
-    sleep 2
-  endif
-  if ( `wc -w error$$ | awk '{print $1}'` != 0 ) then
-    if ( `cat error$$` != "403 from http://hgwbeta.cse.ucsc.edu/cgi-bin/" ) then
-      echo
+  rm -f errorsOnPg$$
+  if ( $table == "description.html" ) then
+    set cgi="hgGateway?hgsid=$hgsid&db=$db"
+  else
+    set cgi="hgTrackUi?hgsid=$hgsid&db=$db&g=$table"
+  endif
+  set target="$baseUrl/cgi-bin/$cgi"
+  htmlCheck getLinks "$target" | egrep "ftp:|http:" | grep -v ucsc > $db.$table.outsideLinks$$
+  if ( ! -z  $db.$table.outsideLinks$$ ) then
+    foreach link ( `cat $db.$table.outsideLinks$$` )
+      echo $link >>& errorsOnPg$$
+      htmlCheck ok "$link" >>& errorsOnPg$$
+    end
+    grep -B1 -v http errorsOnPg$$ > /dev/null
+    if ( ! $status ) then
       echo $table
-      echo "============="
-      cat error$$
-      @ errorCount = $errorCount + 1
+      echo "=============="
+      grep -B1 -v http errorsOnPg$$
+      echo
+      @ pgsWErrors= $pgsWErrors + 1
     endif
   endif
-  @ totalCount = $totalCount + 1
-  rm -f error$$
+  @ totalPgs = $totalPgs + 1
+  rm -f errorsOnPg$$
+  rm -f $db.$table.outsideLinks$$
 end
-echo
+
 echo "Summary"
-echo "======="
-if ( $totalCount == 1 ) then
-  echo $totalCount "page checked"
+echo "======= ======="
+if ( $totalPgs == 1 ) then
+  echo $totalPgs "page checked"
 else
-  echo $totalCount "pages checked"
+  echo $totalPgs "pages checked"
 endif
-if ( $errorCount > 0) then
-  if ( $errorCount == 1) then
-    echo $errorCount "page with error(s) found"
+if ( $pgsWErrors > 0 ) then
+  if ( $pgsWErrors == 1 ) then
+    echo $pgsWErrors "page with error(s) found"
   else
-    echo $errorCount "pages with errors found"
+    echo $pgsWErrors "pages with errors found"
   endif
 else
   echo "No errors found!"
 endif
 echo
 
 cleanup:
 rm -f error$$
-rm -f error2$$
+rm -f $db.$table.outsideLinks$$