src/utils/qa/GS1.csh 5adcf6bc2904690de7b7b30a83ec8a7a0996abe9

5adcf6bc2904690de7b7b30a83ec8a7a0996abe9
galt
  Tue Aug 21 00:01:25 2018 -0700
changing cse subdomain to soe

diff --git src/utils/qa/GS1.csh src/utils/qa/GS1.csh
index 48adf99..2c99949 100755
--- src/utils/qa/GS1.csh
+++ src/utils/qa/GS1.csh
@@ -1,316 +1,316 @@
 #!/bin/tcsh
 source `which qaConfig.csh`
 
 ############################
 #  03-11-04
 #  last updated
 #  04-09-04
 # 
 #  added command line db
 # 
 ############################
 
 set newRows=""
 set db=""
 set betaDb=""
 
 if ($#argv < 1 ||  $#argv > 2 ) then
   echo
   echo "  runs Gene Sort testing"
   echo "  list of tables, fbTablesAll, is expected up one directory"
   echo
   echo "    usage: database, [oldDb]"
   echo "      where oldDb is name of previous aseembly on beta."
   echo
   exit
 else
   set db=$argv[1]
 endif
-set url1="http://hgwdev.cse.ucsc.edu/cgi-bin/hgTracks?db=$db"
+set url1="http://hgwdev.soe.ucsc.edu/cgi-bin/hgTracks?db=$db"
 
 if ($#argv == 2 ) then
   set betaDb=$argv[2]
 else
   set betaDb=$db
 endif
 
 echo $db $betaDb
 
 if (-e ../fbTablesAll)
   cp ../fbTablesAll .
   set tablelist="fbTablesAll"
 else
   echo
   echo " sorry, need file:  ../fbTablesAll "
   echo " this is the file of tables specific to this assembly's GS"
   echo
   exit
 endif
 
 echo "using:"
 echo "db = $db, betaDb = $betaDb"
 echo "tablelist=$tablelist"
 echo
 
 # -------------------------------------------------
 # check update times against beta:
 
 
 echo "check update times against beta:"
 if ( $betaDb != "" ) then
   echo "\n note: time comparison against other db is irrelevant.|n"
 endif
 
 updateTimes.csh $db $tablelist 
 
 
 # -------------------------------------------------
 # check rowcounts against beta:
 
 echo
 echo "check rowcounts against beta"
 echo "old version, beta ($betaDb),  listed first"
 foreach table (`cat $tablelist`)
   echo
   echo $table
   echo "================="
   rm -f junk 
   set old=`hgsql -h $sqlbeta -N  -e "SELECT COUNT(*) FROM $table" $betaDb`
   set new=`hgsql -N  -e "SELECT COUNT(*) FROM $table" $db`
   if ($old != "") then
     set newRows=`expr $new - $old`
     set percent=`echo $newRows $old | awk '{printf "%.2f\n", ($1/$2)*100}'`
   else
     set percent="no old table"
   endif
   echo "   old:  $old"
   echo "   new:  $new"
   echo "   more: $newRows, ($percent)%"
 end
 echo
 
 
 # -------------------------------------------------
 # compare description of all tables with previous:
 
 echo 
 echo "compare description of all tables with previous (shows diffs):"
 foreach table (`cat $tablelist`)
   echo $table
   hgsql -h $sqlbeta -N  -e "DESCRIBE $table" $betaDb >  $betaDb.beta.$table.desc
   hgsql -N  -e "DESCRIBE $table" $db > $db.dev.$table.desc
   diff $betaDb.beta.$table.desc $db.dev.$table.desc
   echo
 end
 echo
 
 # -------------------------------------------------
 # SELECT one record FROM knownCanonical:
 # SELECT one record FROM knownIsoforms:
 
 echo
 echo "SELECT two records FROM knownCanonical:"
 echo "================="
 hgsql -t -e "SELECT * FROM knownCanonical limit 2" $db
 echo
 
 echo
 echo "SELECT two records FROM knownIsoforms:"
 echo "================="
 hgsql -t -e "SELECT * FROM knownIsoforms limit 2" $db
 echo
 
 # -------------------------------------------------
 # check that all knownGenes are in knownIsoforms:
 
 echo
 echo "check that all knownGenes are in knownIsoforms:"
 hgsql -N -e "SELECT name FROM knownGene" $db > $db.KG.name
 sort $db.KG.name | uniq > $db.KG.name.uniq
 hgsql -N -e "SELECT name FROM knownGene" $db | sort | uniq > $db.KG.name.uniq
 hgsql -N -e "SELECT transcript FROM knownCanonical" $db > $db.knCanonical.transcript
 hgsql -N -e "SELECT transcript FROM knownIsoforms" $db  > $db.knIsoforms.transcript
 sort $db.knCanonical.transcript | uniq > $db.knCanonical.transcript.uniq
 sort $db.knIsoforms.transcript  | uniq > $db.knIsoforms.transcript.uniq
 wc -l $db.knCanonical.transcript.uniq 
 wc -l $db.knIsoforms.transcript.uniq 
 wc -l $db.KG.name.uniq 
 echo
 
 # -------------------------------------------------
 # names in common between KG.name and knIsoforms.transcript (expect all):
 
 echo
 echo  "names in common between KG.name and knIsoforms.transcript (expect all):"
 comm -12 $db.KG.name.uniq $db.knIsoforms.transcript.uniq  | wc -l
 echo
 
 
 # -------------------------------------------------
 # transcripts in common between knIsoforms and knCanonical
 
 echo
 echo "transcripts in common between knIsoforms and knCanonical"
 echo "   (expect one list to be found entirely within other):"
 comm -12 $db.knCanonical.transcript.uniq $db.knIsoforms.transcript.uniq  | wc -l
 echo
 
 # -------------------------------------------------
 # check that there are more in new release:
 
 echo
 echo
 echo "check that there are more in new release:"
 hgsql -h $sqlbeta -N -e "SELECT name FROM knownGene" $betaDb \
   > beta.$betaDb.KG.name
 sort beta.$betaDb.KG.name | uniq > beta.$betaDb.KG.name.uniq
 hgsql -h $sqlbeta -N -e "SELECT transcript FROM knownCanonical" $betaDb \
   > beta.$betaDb.knCanonical.transcript
 hgsql -h $sqlbeta -N -e "SELECT transcript FROM knownIsoforms" $betaDb \
   > beta.$betaDb.knIsoforms.transcript
 sort beta.$betaDb.knCanonical.transcript | uniq \
   > beta.$betaDb.knCanonical.transcript.uniq
 sort beta.$betaDb.knIsoforms.transcript  | uniq \
   > beta.$betaDb.knIsoforms.transcript.uniq
 echo
 wc -l *transcript* | grep -v total
 echo
 wc -l *KG* | grep -v total
 echo
 
 
 
 echo "-------------------------------------------------"
 # make list of dupes:
 
 echo
 echo "make list of dupes"
 hgsql -e "SELECT clusterID from knownIsoforms" $db | sort | uniq -c \
    | sort -nr > $db.knIsoforms.clusterID.sort
 # delete those found only once:
 sed -e "/    1\t/d" $db.knIsoforms.clusterID.sort \
    > $db.knIsoforms.clusterID.dupes
 echo "number of duplicated knownIsoforms.clusterID:"
 wc -l $db.knIsoforms.clusterID.dupes
 echo
 echo "most common duplicated knownIsoforms.clusterID:"
 head $db.knIsoforms.clusterID.dupes 
 echo
 # make links for the three biggest:
 echo "see $db.knIsoforms.mostCommon and check that each \
          clusterID refers to a set of overlapping transcripts \
          in the Browser:"
 foreach cluster ( `head -3 $db.knIsoforms.clusterID.dupes | awk '{print $2}'` )
   set pos=`hgsql -Ne 'SELECT chrom, chromStart, chromEnd FROM knownCanonical \
     WHERE clusterId = "'$cluster'"' $db | sed "s/\t/:/" | sed "s/\t/-/"`
   echo "$url1&position=$pos"
 end
 
 echo
 echo "check these coordinate blocks manually in GB and click-through to GS."
 echo "the number of transcripts in the window might not match the number \
      expected because the canonical may be smaller than one of the isoforms. \
      zoom out a little to get them all"
 echo
 
 echo "-------------------------------------------------"
 # check for dupes in knownIsoforms
 
 echo
 echo check for dupes in knownIsoforms
 hgsql -N -e "SELECT * FROM knownIsoforms" $db | sort > $db.knownIsoforms.record
 uniq $db.knownIsoforms.record > $db.knownIsoforms.record.uniq
 wc -l *record* | grep -v total
 echo "difference is number of dupes"
 echo "  dupes:"
 comm -23 $db.knownIsoforms.record  $db.knownIsoforms.record.uniq \
   > $db.knownIsoforms.record.dupes
 head $db.knownIsoforms.record.dupes
 echo
 
  
 # -------------------------------------------------
 # checking off chrom end:
 
 echo
 echo "checking off chrom end:"
 set table=knownCanonical
 # hgsql -e "SELECT chromInfo.chrom, chromInfo.size - MAX($table.chromEnd) AS dist_from_end FROM chromInfo, $table WHERE chromInfo.chrom = $table.chrom GROUP BY chromInfo.chrom" $db | awk '{if ($2<0) print $1 $2; else print $1, "ok"}'
 
 hgsql -e "SELECT chromInfo.chrom, chromInfo.size - MAX($table.chromEnd) AS dist_from_end FROM chromInfo, $table WHERE chromInfo.chrom = $table.chrom GROUP BY chromInfo.chrom" $db >  $db.GS.tx.offEnd
 
 echo "lines from $db.GS.tx.offEnd > 0:"
 awk '{if($2<0) {print $2} }' $db.GS.tx.offEnd
 echo "expect blank or check file $db.GS.tx.offEnd"
 echo
 
 # -------------------------------------------------
 # checking start < end and start < 0:
 
 echo
 echo "checking start < end and start < 0:"
 hgsql -N -e "SELECT * FROM $table WHERE chromStart >= chromEnd" $db
 hgsql -N -e "SELECT * FROM $table WHERE chromStart < 0" $db
 echo "expect nothing"
 echo
 
 
 # -------------------------------------------------
 # show indices:
 
 # echo
 # echo "show indices:"
 # foreach table (`cat $tablelist`)
 #   hgsql -t -e "SHOW INDEX FROM $table" $db
 #   echo
 # end
 # echo 
 
 
 echo "-------------------------------------------------"
 # get one record from each table:
 
 echo "get one record from each table:"
 echo
 
 
 cat $tablelist | grep knownTo > $db.knownTo
 foreach table (`cat $db.knownTo`)
   echo $table
   echo "============="
   echo "one record:"
   hgsql -t -e "SELECT * FROM $table LIMIT 1" $db
   if ($table == "knownToSuper") then
   # ?? they are the same -- where were you going with this?
     set old=`hgsql -h $sqlbeta -N -e "SELECT gene FROM $table" $betaDb \
         | sort | uniq | wc -l`
     set this=`hgsql -N -e "SELECT gene FROM $table" $db | sort | uniq | wc -l`
   else
     set old=`hgsql -h $sqlbeta -N -e "SELECT name FROM $table" $betaDb \
         | sort | uniq | wc -l`
     set this=`hgsql -N -e "SELECT name FROM $table" $db | sort | uniq | wc -l`
   endif
   set kgNames=`hgsql -N -e  'SELECT name FROM knownGene' $db \
       | sort | uniq | wc -l`
   echo "number of uniq names in the tables"
   echo "beta       = "$old
   echo "this table = "$this
   echo "knownGene  = "$kgNames
   echo
 end
 
 
 echo " -------------------------------------------------"
 echo "            now run GS2.csh           "
 echo " -------------------------------------------------"
 echo
 
 echo " -------------------------------------------------"
 echo "            don't forget to change gdbPdb entry for this \
                  assembly as tables are pushed to beta and RR."
 echo " -------------------------------------------------"
 echo
 
 echo "end"
 rm -f *desc