199b2d4113d9498515030061888af66cbf7e8246
hiram
  Thu Mar 26 17:40:17 2020 -0700
adding some of the scripts that do the comparisons refs #24547

diff --git src/hg/makeDb/doc/assemblyEquivalence/runOne src/hg/makeDb/doc/assemblyEquivalence/runOne
new file mode 100755
index 0000000..897c424
--- /dev/null
+++ src/hg/makeDb/doc/assemblyEquivalence/runOne
@@ -0,0 +1,90 @@
+#!/bin/bash
+
+set -beEu -o pipefail
+
+if [ $# -ne 4 ]; then
+  printf "usage: runOne <sourceAuth> <sourceId> <destAuth> <destId>\n" 1>&2
+  printf "example:\n" 1>&2
+  printf "./runOne refseq GCF_009858895.2_ASM985889v3 \\
+    ensembl Caenorhabditis_elegans.WBcel235\n" 1>&2
+  exit 255
+fi
+
+##########################################################################
+### translate a GCA/GCF assembly ID into a directory path
+##########################################################################
+function gcDir() {
+local asmId=$1
+
+local gcX=`echo "$asmId" | cut -c1-3`
+local d0=`echo "$asmId" | cut -c5-7`
+local d1=`echo "$asmId" | cut -c8-10`
+local d2=`echo "$asmId" | cut -c11-13`
+
+local outDir="${gcX}/${d0}/${d1}/${d2}"
+
+printf "%s" "${outDir}"
+
+}
+
+export sourceAuth=$1
+export sourceId=$2
+export destAuth=$3
+export destId=$4
+
+function idKeys() {
+export buildDir="notFound"
+export auth=$1
+export id=$2
+case "${auth}" in
+  ucsc) buildDir="/hive/data/genomes/${id}/bed/idKeys"
+        ;;
+  ensembl) speciesDir=`echo $id | cut -d. -f1`
+   buildDir="/hive/data/outside/ensembl/genomes/release-99/idKeys/$speciesDir"
+        ;;
+  genbank) gca=`gcDir $id`
+   buildDir="/hive/data/genomes/asmHubs/genbankBuild/$gca/$id/idKeys"
+        ;;
+  refseq)  gcf=`gcDir $id`
+   buildDir="/hive/data/genomes/asmHubs/refseqBuild/$gcf/$id/idKeys"
+        ;;
+  *) printf "ERROR: can not find source authority, must be one of:\n" 1>&2
+     printf "ucsc ensembl genbank refseq\n" 1>&2
+     exit 255;
+        ;;
+esac
+printf "%s" "${buildDir}/${id}.idKeys.txt"
+}
+
+export sourceKeys=`idKeys $sourceAuth $sourceId`
+export destKeys=`idKeys $destAuth $destId`
+
+if [ ! -s "${sourceKeys}" ]; then
+  printf "ERROR: can not find source keys: %s\n" "${sourceKeys}" 1>&2
+  exit 255
+fi
+if [ ! -s "${destKeys}" ]; then
+  printf "ERROR: can not find dest keys: %s\n" "${destKeys}" 1>&2
+  exit 255
+fi
+
+## ls -og "${sourceKeys}" "${destKeys}"
+
+export aDupCount=`cat ${sourceKeys} | wc -l`
+export bDupCount=`cat ${destKeys} | wc -l`
+export aCount=`cut -f1 ${sourceKeys} | sort -u | wc -l`
+export bCount=`cut -f1 ${destKeys} | sort -u | wc -l`
+export joinCount=`join -t$'\t' <(sort -k1,1 -u $sourceKeys) <(sort -k1,1 -u $destKeys) | wc -l`
+export diffA=`echo $aCount $joinCount | awk '{printf "%d", $1-$2}'`
+export diffB=`echo $bCount $joinCount | awk '{printf "%d", $1-$2}'`
+export dupsA=`echo $aDupCount $aCount | awk '{printf "%d", $1-$2}'`
+export dupsB=`echo $bDupCount $bCount | awk '{printf "%d", $1-$2}'`
+export missedA=`echo $aDupCount $joinCount | awk '{printf "%d", $1-$2}'`
+export missedB=`echo $bDupCount $joinCount | awk '{printf "%d", $1-$2}'`
+export missedAperCent=`echo $aDupCount $joinCount | awk '{printf "%d", 100*($1-$2)/$1}'`
+export missedBperCent=`echo $bDupCount $joinCount | awk '{printf "%d", 100*($1-$2)/$1}'`
+if [ "${missedAperCent}" -lt 10 -a "${missedBperCent}" -lt 10 ]; then
+  printf "%s\t%s\t%s\t%s\t%d\t%d\t%d\tcount A %d, count B %d, diffA %d, diffB %d, dupsA %d, dupsB %d, missA %d, missB %d, AperCent %d, BperCent %d\n" "${sourceId}" "${destId}" "${sourceAuth}" "${destAuth}" "${joinCount}" "${aDupCount}" "${bDupCount}" "${aCount}" "${bCount}" "${diffA}" "${diffB}" "${dupsA}" "${dupsB}" "${missedA}" "${missedB}" "${missedAperCent}" "${missedBperCent}"
+else
+  printf "%s\t%s\t%s\t%s\t%d\t%d\t%d\tcount A %d, count B %d, diffA %d, diffB %d, dupsA %d, dupsB %d, missA %d, missB %d, AperCent %d, BperCent %d\n" "${sourceId}" "${destId}" "${sourceAuth}" "${destAuth}" "${joinCount}" "${aDupCount}" "${bDupCount}" "${aCount}" "${bCount}" "${diffA}" "${diffB}" "${dupsA}" "${dupsB}" "${missedA}" "${missedB}" "${missedAperCent}" "${missedBperCent}" 1>&2
+fi