f47753d62c5f4905a2ffa14547117e322d19c699
hiram
  Fri Mar 12 12:50:55 2021 -0800
added ucscToEnsembl table and update chromAlias table refs #27194

diff --git src/hg/makeDb/doc/mm39/initialBuild.txt src/hg/makeDb/doc/mm39/initialBuild.txt
index 03ac9d4..ec8c4ce 100644
--- src/hg/makeDb/doc/mm39/initialBuild.txt
+++ src/hg/makeDb/doc/mm39/initialBuild.txt
@@ -599,30 +599,73 @@
      ok="ERROR"
   fi
   printf "# checking $t: $c0 =? $c1 $ok\n"
 done
 # checking assembly: 61 =? 61 OK
 # checking genbank: 61 =? 61 OK
 
     # verify chrM is here properly:
     grep chrM mm39.chromAlias.tab
 # AY172335.1      chrM    genbank
 # MT      chrM    assembly
 # NC_005089.1     chrM    refseq
 
     hgLoadSqlTab mm39 chromAlias ~/kent/src/hg/lib/chromAlias.sql \
         mm39.chromAlias.tab
+`
+    # Adding Ensembl 2021-03-12 upon release of v103.
+    # And refseq names exist now too
+
+    cd /hive/data/genomes/mm39/bed/chromAlias
+    hgsql -N -e 'select * from ucscToEnsembl;' mm39 > ucsc.ensembl.tab
+
+    join -t$'\t' ../idKeys/mm39.idKeys.txt \
+ /hive/data/genomes/asmHubs/refseqBuild/GCF/000/001/635/GCF_000001635.27_GRCm39/idKeys/GCF_000001635.27_GRCm39.idKeys.txt \
+      | cut -f2-3 | sort > ucsc.refseq.tab
+
+    mv mm39.chromAlias.tab mm39.chromAlias.tab.0
+
+     ~/kent/src/hg/utils/automation/chromAlias.pl ucsc.*.tab \
+         > mm39.chromAlias.tab
+# working: assembly
+# working: ensembl
+# working: genbank
+# working: refseq
+
+for t in assembly ensembl genbank refseq
+do
+  c0=`cat ucsc.$t.tab | wc -l`
+  c1=`grep $t mm39.chromAlias.tab | wc -l`
+  ok="OK"
+  if [ "$c0" -ne "$c1" ]; then
+     ok="ERROR"
+  fi
+  printf "# checking $t: $c0 =? $c1 $ok\n"
+done
+# checking assembly: 61 =? 61 OK
+# checking ensembl: 61 =? 61 OK
+# checking genbank: 61 =? 61 OK
+# checking refseq: 61 =? 61 OK
+
+    # verify chrM is here properly:
+    grep chrM mm39.chromAlias.tab
+# AY172335.1      chrM    genbank
+# MT      chrM    assembly,ensembl
+# NC_005089.1     chrM    refseq
+
+    hgLoadSqlTab mm39 chromAlias ~/kent/src/hg/lib/chromAlias.sql \
+        mm39.chromAlias.tab
 
 #########################################################################
 # fixup search rule for assembly track/gold table (DONE - 2020-07-27 - Hiram)
     cd ~/kent/src/hg/makeDb/trackDb/mouse/mm39
     # preview prefixes and suffixes:
     hgsql -N -e "select frag from gold;" mm39 \
       | sed -e 's/[0-9.]\+//;' | sort | uniq -c | sed -e 's/^/# /;'
 #   15228 AC
 #     816 AEKQ
 #       8 AEKR
 #       1 AF
 #    3876 AL
 #       1 AY
 #     844 BX
 #     191 CAAA
@@ -1344,15 +1387,42 @@
 # update 2020-10-27 (DONE - Hiram - 2020-10-27)
 
   mkdir /hive/data/genomes/mm39/bed/ncbiRefSeq.2020-10-27
   cd /hive/data/genomes/mm39/bed/ncbiRefSeq.2020-10-27
 
   time (~/kent/src/hg/utils/automation/doNcbiRefSeq.pl -buildDir=`pwd` \
       -bigClusterHub=ku -dbHost=hgwdev \
       -fileServer=hgwdev -smallClusterHub=hgwdev -workhorse=hgwdev \
       GCF_000001635.27_GRCm39 mm39) > do.log 2>&1 &
   # real    10m2.220s
 
   cat fb.ncbiRefSeq.mm39.txt
   # 128640844 bases of 2654624157 (4.846%) in intersection
 
 #############################################################################
+# create ucscToEnsembl name mapping (DONE - 2021-03-12 - Hiram)
+    # this allows the "ensembl" blue bar button to appear
+    mkdir /hive/data/genomes/mm39/bed/ucscToEnsembl
+    cd /hive/data/genomes/mm39/bed/ucscToEnsembl
+
+    join -t$'\t' ../idKeys/mm39.idKeys.txt  \
+/hive/data/outside/ensembl/genomes/release-103/idKeys/Mus_musculus/Mus_musculus.GRCm39.idKeys.txt \
+  | cut -f2-3 | sort > ucscToEnsembl.tab
+
+    # determine size of PRIMARY KEY index
+    awk '{print length($1)}' *.tab | sort -n | tail
+    #  22
+
+    printf '# UCSC to Ensembl chr name translation
+CREATE TABLE ucscToEnsembl (
+    ucsc varchar(255) not null,        # UCSC chromosome name
+    ensembl varchar(255) not null,     # Ensembl chromosome name
+              #Indices
+    PRIMARY KEY(ucsc(22))
+);
+' > ucscToEnsembl.sql
+
+    hgLoadSqlTab mm39 ucscToEnsembl ucscToEnsembl.sql ucscToEnsembl.tab
+
+    # verify the blue bar "ensembl" link is now available under the 'View'
+    # tab
+#############################################################################