dc0ff6b7cda41fce2d04b9cce778901be0c760bf hiram Tue Sep 1 10:57:30 2020 -0700 add duplicate verification refs #26145 diff --git src/hg/makeDb/doc/assemblyEquivalence/update.2020-08-31.txt src/hg/makeDb/doc/assemblyEquivalence/update.2020-08-31.txt index bd298f1..23eba08 100644 --- src/hg/makeDb/doc/assemblyEquivalence/update.2020-08-31.txt +++ src/hg/makeDb/doc/assemblyEquivalence/update.2020-08-31.txt @@ -149,25 +149,34 @@ join hgFixed.asmEquivalent.tsv existing.2020-08-31.tsv | wc 5100 66300 702561 ### probably should *not* be losing any from before: join -v 2 hgFixed.asmEquivalent.tsv existing.2020-08-31.tsv | wc -l 0 # if not 0, investigate. Sometimes a new assembly is now an # exact match to something where it was a near match before to # a previous assembly of that organism. ### there should be some new ones join -v 1 hgFixed.asmEquivalent.tsv existing.2020-08-31.tsv | wc 20 140 1784 +### There should be no duplicate equivalents: +cut -f1,2 hgFixed.asmEquivalent.tsv | sort | uniq -c | sort -rn | head + 1 zonAlb1 Zonotrichia_albicollis.Zonotrichia_albicollis-1.0.1 + 1 zonAlb1 GCF_000385455.1_Zonotrichia_albicollis-1.0.1 + 1 zonAlb1 GCA_000385455.1_Zonotrichia_albicollis-1.0.1 + 1 xipMac1 GCA_000241075.1_Xiphophorus_maculatus-4.4.2 + 1 xenTro9 Xenopus_tropicalis.Xenopus_tropicalis_v9.1 +... etc ... + #### To load up new table contents: hgLoadSqlTab hgFixed asmEquivalent ~/kent/src/hg/lib/asmEquivalent.sql \ hgFixed.asmEquivalent.tsv hgsql -N -e 'select * from asmEquivalent;' hgFixed \ | sort > updated.2020-08-31.tsv wc -l updated.2020-08-31.tsv existing.2020-08-31.tsv 2344 updated.2020-08-31.tsv 2320 existing.2020-08-31.tsv