199b2d4113d9498515030061888af66cbf7e8246
hiram
  Thu Mar 26 17:40:17 2020 -0700
adding some of the scripts that do the comparisons refs #24547

diff --git src/hg/makeDb/doc/assemblyEquivalence/A.vs.B.sh src/hg/makeDb/doc/assemblyEquivalence/A.vs.B.sh
new file mode 100755
index 0000000..a5c73ab
--- /dev/null
+++ src/hg/makeDb/doc/assemblyEquivalence/A.vs.B.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+if [ $# -ne 2 ]; then
+  printf "usage: ./A.vs.B.sh <a> <b>\n" 1>&2
+  printf "where <a> <b> is a pair from:\n" 1>&2
+  printf "ucsc, genbank, refseq, ensembl\n" 1>&2
+  printf "creates files: nearMiss.a.b.run.list nearMiss.b.a.run.list\n" 1>&2
+  exit 255
+fi
+
+export A=$1
+export B=$2
+
+printf "# existing list size:\n"
+wc -l nearMiss.${A}.${B}.run.list nearMiss.${B}.${A}.run.list
+
+cut -f1 ../${A}.${B}.exact.txt | sort -u > ${A}.${B}.done.list
+cut -f1 ../${B}.${A}.exact.txt | sort -u > ${B}.${A}.done.list
+
+cut -f2 ../${B}/${B}.keySignatures.txt  | sort -u > ${B}.full.list
+cut -f2 ../${A}/${A}.keySignatures.txt  | sort -u > ${A}.full.list
+
+comm -13 ${B}.${A}.done.list ${B}.full.list > ${B}.${A}.toMatch.list
+comm -13 ${A}.${B}.done.list ${A}.full.list > ${A}.${B}.toMatch.list
+
+join -t$'\t' -2 2 ${B}.${A}.toMatch.list \
+    <(sort -k2,2 ../${B}/${B}.keySignatures.txt) \
+      | awk '{printf "%d\t%s\t%d\n", $4,$1,$3}' | sort -n \
+         > uniqueCounts.${B}.${A}.txt
+
+join -t$'\t' -2 2 ${A}.${B}.toMatch.list \
+  <(sort -k2,2 ../${A}/${A}.keySignatures.txt) \
+      | awk '{printf "%d\t%s\t%d\n", $4,$1,$3}' | sort -n \
+         > uniqueCounts.${A}.${B}.txt
+
+/cluster/home/hiram/kent/src/hg/makeDb/doc/assemblyEquivalence
+~/kent/src/hg/makeDb/doc/assemblyEquivalence/createNearMissRunList.pl 10 \
+   uniqueCounts.${B}.${A}.txt uniqueCounts.${A}.${B}.txt \
+     > nearMiss.${A}.${B}.run.list
+
+~/kent/src/hg/makeDb/doc/assemblyEquivalence/createNearMissRunList.pl 10 \
+   uniqueCounts.${A}.${B}.txt uniqueCounts.${B}.${A}.txt \
+     > nearMiss.${B}.${A}.run.list
+
+printf "# newly created list size:\n"
+wc -l nearMiss.${A}.${B}.run.list nearMiss.${B}.${A}.run.list