81ed6a428f3655d2f32f1cc523993632955e2e6f chmalee Fri Jul 24 17:04:40 2020 -0700 Fix up LOVD otto script after code review, refs #25925 diff --git src/hg/utils/otto/lovd/checkAndLoad.sh src/hg/utils/otto/lovd/checkAndLoad.sh index a6a58b2..d5770fc 100755 --- src/hg/utils/otto/lovd/checkAndLoad.sh +++ src/hg/utils/otto/lovd/checkAndLoad.sh @@ -1,78 +1,63 @@ #!/bin/sh -e # Do not modify this script, modify the source tree copy: # src/utils/otto/lovd/check.sh # This script is used via a cron job umask 002 WORKDIR="/hive/data/outside/otto/lovd" TAWK=/cluster/bin/scripts/tawk KENTBIN=/cluster/bin/x86_64/ if [ ! -d "${WORKDIR}" ]; then echo "ERROR in lovd release, Can not find the directory: ${WORKDIR}" exit 255 fi cd "${WORKDIR}" today=`date +%F` cd $today # count columns and make sure new files have 6 columns new38Cc=`cat lovd.hg38.bed | $TAWK '(NF!=6)' | wc -l` new19Cc=`cat lovd.hg19.bed | $TAWK '(NF!=6)' | wc -l` -#new18Cc=`cat lovd.hg18.bed | $TAWK '(NF!=6)' | wc -l` if [ "$new38Cc" -ne "0" ]; then echo LVOD hg38 $today: found rows with not six columns, quitting exit 255 fi if [ "$new19Cc" -ne "0" ]; then echo LVOD hg19 $today: found rows with not six columns, quitting exit 255 fi -#if [ "$new18Cc" -ne "0" ]; then -# echo LVOD hg18 $today: found rows with not six columns, quitting -# exit 255 -#fi - -# compare old and new line counts and abort if no increase -$KENTBIN/bedClip lovd.hg38.bed /cluster/data/hg38/chrom.sizes stdout | sed -e 's/^chrM/chrMT/g' > lovd.hg38.clipped.bed -$KENTBIN/bedClip lovd.hg19.bed /cluster/data/hg19/chrom.sizes stdout | sed -e 's/^chrM/chrMT/g' > lovd.hg19.clipped.bed -echo -cat lovd.hg38.clipped.bed | awk '(($3-$2)<50)' > lovd.hg38.short.bed -cat lovd.hg19.clipped.bed | awk '(($3-$2)<50)' > lovd.hg19.short.bed -echo -e 'chrM\t0\t16571\tCheck chrMT\tPlease look at chrMT, not chrM, for LOVD annotations.\t' >> lovd.hg38.short.bed -echo -e 'chrM\t0\t16571\tCheck chrMT\tPlease look at chrMT, not chrM, for LOVD annotations.\t' >> lovd.hg19.short.bed -cat lovd.hg38.clipped.bed | awk '(($3-$2)>=50)' > lovd.hg38.long.bed -cat lovd.hg19.clipped.bed | awk '(($3-$2)>=50)' > lovd.hg19.long.bed -echo -e 'chrM\t0\t16571\tCheck chrMT\tPlease look at chrMT, not chrM, for LOVD annotations.\t' >> lovd.hg38.long.bed -echo -e 'chrM\t0\t16571\tCheck chrMT\tPlease look at chrMT, not chrM, for LOVD annotations.\t' >> lovd.hg19.long.bed -old38ShortLc=`$KENTBIN/hgsql hg38 -e "SELECT COUNT(*) from lovdShort" -NB` -old38LongLc=`$KENTBIN/hgsql hg38 -e "SELECT COUNT(*) from lovdLong" -NB` -new38ShortLc=`wc -l lovd.hg38.short.bed | cut -d' ' -f1 ` -new38LongLc=`wc -l lovd.hg38.long.bed | cut -d' ' -f1 ` -old19ShortLc=`$KENTBIN/hgsql hg19 -e "SELECT COUNT(*) from lovdShort" -NB` -old19LongLc=`$KENTBIN/hgsql hg19 -e "SELECT COUNT(*) from lovdLong" -NB` -new19ShortLc=`wc -l lovd.hg19.short.bed | cut -d' ' -f1 ` -new19LongLc=`wc -l lovd.hg19.long.bed | cut -d' ' -f1 ` - -echo hg38 short rowcount: old $old38ShortLc new: $new38ShortLc -echo hg38 long rowcount: old $old38LongLc new: $new38LongLc -echo hg19 short rowcount: old $old19ShortLc new: $new19ShortLc -echo hg19 long rowcount: old $old19LongLc new: $new19LongLc +for db in hg38 hg19 +do + if [ ${db} == "hg19" ] + then + $KENTBIN/bedClip lovd.${db}.bed /cluster/data/${db}/chrom.sizes stdout | sed -e 's/^chrM/chrMT/g' > lovd.${db}.clipped.bed + else + $KENTBIN/bedClip lovd.${db}.bed /cluster/data/${db}/chrom.sizes lovd.${db}.clipped.bed + fi -echo $old38ShortLc $new38ShortLc | awk '{if (($2-$1)/$1 > 0.1) {printf "validate on hg38 LOVD short failed: old count: %d, new count: %d\n", $1,$2; exit 1;}}' -echo $old38LongLc $new38LongLc | awk '{if (($2-$1)/$1 > 0.1) {printf "validate on hg38 LOVD long failed: old count: %d, new count: %d\n", $1,$2; exit 1;}}' -echo $old19ShortLc $new19ShortLc | awk '{if (($2-$1)/$1 > 0.1) {printf "validate on hg19 LOVD short failed: old count: %d, new count: %d\n", $1,$2; exit 1;}}' -echo $old19LongLc $new19LongLc | awk '{if (($2-$1)/$1 > 0.1) {printf "validate on hg19 LOVD long failed: old count: %d, new count: %d\n", $1,$2; exit 1;}}' + cat lovd.${db}.clipped.bed | awk '(($3-$2)<50)' > lovd.${db}.short.bed + cat lovd.${db}.clipped.bed | awk '(($3-$2)>=50)' > lovd.${db}.long.bed + if [ ${db} == "hg19" ] + then + echo -e 'chrM\t0\t16571\tCheck chrMT\tPlease look at chrMT, not chrM, for LOVD annotations.\t' >> lovd.${db}.short.bed + echo -e 'chrM\t0\t16571\tCheck chrMT\tPlease look at chrMT, not chrM, for LOVD annotations.\t' >> lovd.${db}.long.bed + fi + oldShortLc=`$KENTBIN/hgsql ${db} -e "SELECT COUNT(*) from lovdShort" -NB` + oldLongLc=`$KENTBIN/hgsql ${db} -e "SELECT COUNT(*) from lovdLong" -NB` + newShortLc=`wc -l lovd.${db}.short.bed | cut -d' ' -f1 ` + newLongLc=`wc -l lovd.${db}.long.bed | cut -d' ' -f1 ` + echo ${db} short rowcount: old $oldShortLc new: $newShortLc + echo ${db} long rowcount: old $oldLongLc new: $newLongLc + echo $oldShortLc $newShortLc | awk -v d=${db} '{if (($2-$1)/$1 > 0.1) {printf "validate on %s LOVD short failed: old count: %d, new count: %d\n", d,$1,$2; exit 1;}}' + echo $oldLongLc $newLongLc | awk -v d=${db} '{if (($2-$1)/$1 > 0.1) {printf "validate on %s LOVD long failed: old count: %d, new count: %d\n", d,$1,$2; exit 1;}}' # bedDetail4.sql was generated like this: # egrep -v 'score|strand|thick|reserved|block|chromStarts' /cluster/home/max/kent/src/hg/lib/bedDetail.sql > bedDetail4.sql -# need to use bedClip as current files include invalid coords which LOVD won't fix. -#$KENTBIN/bedClip lovd.hg19.bed /cluster/data/hg19/chrom.sizes lovd.hg19.clipped.bed -cat lovd.hg38.short.bed | $KENTBIN/hgLoadBed hg38 lovdShort stdin -tab -sqlTable=../bedDetail4.sql -renameSqlTable -noBin -cat lovd.hg38.long.bed | $KENTBIN/hgLoadBed hg38 lovdLong stdin -tab -sqlTable=../bedDetail4.sql -renameSqlTable -noBin -cat lovd.hg19.short.bed | $KENTBIN/hgLoadBed hg19 lovdShort stdin -tab -sqlTable=../bedDetail4.sql -renameSqlTable -noBin -cat lovd.hg19.long.bed | $KENTBIN/hgLoadBed hg19 lovdLong stdin -tab -sqlTable=../bedDetail4.sql -renameSqlTable -noBin -#$KENTBIN/bedClip lovd.hg18.bed /cluster/data/hg18/chrom.sizes stdout | $KENTBIN/hgLoadBed hg18 lovd stdin -tab -sqlTable=../bedDetail4.sql -renameSqlTable -noBin + cat lovd.${db}.short.bed | $KENTBIN/hgLoadBed ${db} lovdShort stdin -tab -sqlTable=../bedDetail4.sql -renameSqlTable -noBin + cat lovd.${db}.long.bed | $KENTBIN/hgLoadBed ${db} lovdLong stdin -tab -sqlTable=../bedDetail4.sql -renameSqlTable -noBin +done + echo LOVD update: OK