8891db7a328c48a8cbacec4679514c0caec6fec9
kuhn
  Sun Feb 24 10:28:56 2013 -0800
script to make an ideogram for orgs with no cytology
diff --git src/utils/qa/makeCytoBandIdeo.csh src/utils/qa/makeCytoBandIdeo.csh
new file mode 100755
index 0000000..eb921d3
--- /dev/null
+++ src/utils/qa/makeCytoBandIdeo.csh
@@ -0,0 +1,101 @@
+#!/bin/tcsh
+source `which qaConfig.csh`
+
+set db=""
+set sql=~/kent/src/hg/lib/cytoBandIdeo.sql
+
+if ( $#argv == 0 || $#argv > 2 ) then
+  # no command line args
+  echo
+  echo "  make a cytoBandIdeo table for navigation if no real cytology available."
+  echo "  checks for existing cytoBandIdeo table and optionally overwrites."
+  echo "  note: the script may break on load step if chromNames are too long."
+  echo
+  echo "    usage:  database [overwrite]"
+  echo
+  exit
+else
+  set db=$argv[1]
+endif
+
+if ( $#argv == 2 ) then
+  if ( $argv[2] == "overwrite"  ) then
+    hgsql -e "DROP TABLE cytoBandIdeo" $db
+  else
+    echo
+    echo 'second argument must be "overwrite"'
+    echo 'which will overwrite existing cytoBandIdeo table'
+    echo
+    exit
+  endif
+endif
+
+if ( `hgsql -N -e "SHOW TABLES LIKE 'cytoBandIdeo'" $db` == "cytoBandIdeo" ) then
+  set ideoCount=`hgsql -N -e "SELECT COUNT(*) FROM cytoBandIdeo" $db` 
+  echo
+  echo "$db cytoBandIdeo table has $ideoCount rows"
+  echo 'run program with "overwrite" argument to continue'
+  $0
+  exit
+endif 
+
+# get chroms and sizes
+hgsql -N -e 'SELECT chrom, size FROM chromInfo' $db > $db.chroms
+# find all chromnames with centromeres and the cen coords
+hgsql -N -e 'SELECT chrom, chromStart, chromEnd FROM gap WHERE type = "centromere"' $db \
+  | sort > $db.cens
+
+file $db.cens | grep empty > /dev/null
+if ( $status ) then # process cens
+  # get the names only
+  cat $db.cens | awk '{print $1}' > $db.cenNames
+
+  # process cens into pieces
+  rm -f $db.splitChroms
+  foreach cenchrom (`cat $db.cenNames`)
+    set   pEnd=`cat $db.cens   | grep -w $cenchrom | awk '{print $2}'`
+    set qStart=`cat $db.cens   | grep -w $cenchrom | awk '{print $3}'`
+    set   qEnd=`cat $db.chroms | grep -w $cenchrom | awk '{print $2}'`
+    # make pArm
+    if ( $pEnd != 0 ) then # no pArm if acrocentric
+      echo $cenchrom 0 $pEnd p gneg | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5}' \
+        >> $db.splitChroms
+    endif
+    # make qArm
+    if ( $qStart != $qEnd ) then # no qArm if acrocentric (yes, it happens: see ornAna1 chrX2)
+      echo $cenchrom $qStart $qEnd q gneg | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5}' \
+        >> $db.splitChroms
+    endif
+  
+    # split the centromere into halves for making triangles in ideogram
+    set cen1start=$pEnd
+    set   cen2end=$qStart
+    @ censize = $cen2end - $cen1start
+    @ cen1end = $cen1start + $censize / 2
+    @ cen2start = $cen1end
+    echo $cenchrom $cen1start $cen1end cen acen | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5}' \
+      >> $db.splitChroms
+    echo $cenchrom $cen2start $cen2end cen acen | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5}' \
+      >> $db.splitChroms
+    cat $db.splitChroms > $db.cytoBand
+  end
+  # remove cen chroms from full list of chroms and format for cytoBandIdeo table
+  cat $db.chroms | grep -w -v -f $db.cenNames | sort > $db.chromsNoCens
+  cat $db.chromsNoCens | awk '{print $1"\t"0"\t"$2"\t""\t""gneg"}' >> $db.cytoBand
+else
+  cat $db.chroms | awk '{print $1"\t"0"\t"$2"\t""\t""gneg"}' > $db.cytoBand
+endif
+bedSort $db.cytoBand $db.cytoBand
+
+hgLoadSqlTab $db cytoBandIdeo $sql $db.cytoBand
+if ( $status ) then
+  set char=`hgsql -N -e "SELECT MAX(LENGTH(chrom)) FROM chromInfo" $db`
+  echo
+  echo "Load failed.  Possibly due to long chrom names."
+  echo "Edit kent/src/hg/lib/cytoBandIdeo.sql to increase chrom KEY size:"
+  grep  PRIMARY ~/kent/src/hg/lib/cytoBandIdeo.sql
+  grep  UNIQUE  ~/kent/src/hg/lib/cytoBandIdeo.sql
+  echo "Your longest chromName is $char long"
+  echo 
+endif
+