d4d7d316841c2aeced33a4aaa088dd40741e942d rhead Fri Aug 24 17:21:27 2012 -0700 Changed the check for genomeClade so that it matches the whole word from the dbDb.genome field instead of using mysql LIKE. This was causing problems for pairs of organisms with similar names, such as tree shrew and shrew and guinea pig and pig. diff --git src/utils/qa/checkMetaData.csh src/utils/qa/checkMetaData.csh index 0188c87..d4e91f3 100755 --- src/utils/qa/checkMetaData.csh +++ src/utils/qa/checkMetaData.csh @@ -179,45 +179,37 @@ hgsql $host1 -Ne 'SELECT * FROM liftOverChain WHERE fromDb = "'$db'" \ or toDb = "'$db'"' $centdb1 | sort \ > $metatable.$db.$out1 hgsql $host2 -Ne 'SELECT * FROM liftOverChain WHERE fromDb = "'$db'" \ or toDb = "'$db'"' $centdb2 | sort \ > $metatable.$db.$out2 # check genomeClade # get genome name for the assembly to query genomeClade table. set genome=`hgsql -N -e 'SELECT genome FROM dbDb WHERE name = "'$db'"' \ hgcentraltest` -# pull out last word of the find, if in the format "G. species" -# and use LIKE to query genomeClade. -set secondWord=`echo $genome | gawk -F" " '{print $2}'` -if ( $secondWord != "" ) then - set genome=$secondWord -endif - set metatable="genomeClade" # get lookup for clade check -# filter out "/" when it appears in genome name - to avoid e.g, Dog/Human -hgsql $host1 -Ne 'SELECT * FROM genomeClade WHERE genome LIKE "%'$genome'"' \ - $centdb1 | grep -v "/" | sort > $metatable.$db.$out1 -hgsql $host2 -Ne 'SELECT * FROM genomeClade WHERE genome LIKE "%'$genome'"' \ - $centdb2 | grep -v "/" | sort > $metatable.$db.$out2 +hgsql $host1 -Ne "SELECT * FROM genomeClade WHERE genome='$genome'" \ + $centdb1 | sort > $metatable.$db.$out1 +hgsql $host2 -Ne "SELECT * FROM genomeClade WHERE genome='$genome'" \ + $centdb2 | sort > $metatable.$db.$out2 set metatable="" # compare and print results # should replace with commTiro.csh, but for now simply sorting genomeClade above foreach table ( `echo $metatables` ) comm -23 $table.$db.$out1 $table.$db.$out2 > $table.$db.${out1}Only comm -13 $table.$db.$out1 $table.$db.$out2 > $table.$db.${out2}Only comm -12 $table.$db.$out1 $table.$db.$out2 > $table.$db.common wc -l $table.$db.${out1}Only $table.$db.${out2}Only $table.$db.common \ | gawk '{ printf("%3d %-45s\n", $1, $2) }' \ | grep -v "total" echo end