9ea6efeaf7d92fa6d49865aa8f2bcfa3e72272a9
hiram
  Mon Feb 13 10:57:42 2023 -0800
document the chromAlias construction for both chromAlias table and download files no redmine

diff --git src/hg/makeDb/doc/vicPac2.txt src/hg/makeDb/doc/vicPac2.txt
index b9178e1..460fbac 100644
--- src/hg/makeDb/doc/vicPac2.txt
+++ src/hg/makeDb/doc/vicPac2.txt
@@ -477,19 +477,128 @@
     # 450836
 
     hgsql -N -e "select frag from gold;" vicPac2 | egrep -e '[AN][BC][R_][R0]0[0-9]+(\.1)?' | wc -l
     # 450836
 
     hgsql -N -e "select frag from gold;" vicPac2 | egrep -v -e '[AN][BC][R_][R0]0[0-9]+(\.1)?' | wc -l
     # 0
 
     # hence, add to trackDb/alpaca/vicPac2/trackDb.ra
 searchTable gold
 shortCircuit 1
 termRegex [AN][BC][R_][R0]0[0-9]+(\.1)?
 query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%'
 searchPriority 8
 
-#########################################################################
 ##############################################################################
 # TransMap V3 tracks. see makeDb/doc/transMapTracks.txt (2014-12-21 markd)
 ##############################################################################
+
+##############################################################################
+### create chromAlias tables and text files (DONE - 2023-02-13 - Hiram)
+    mkdir /hive/data/genomes/vicPac2/bed/chromAlias
+    cd /hive/data/genomes/vicPac2/bed/chromAlias
+
+export chrM=`grep chrM ../../chrom.sizes | cut -f2`
+
+printf "chrM: %s\n" "${chrM}"
+# 16652
+
+# the sort -u eliminates the duplicate sequences
+
+join -t$'\t' ../idKeys/vicPac2.idKeys.txt \
+    <(sort -k1,1 -u  /hive/data/genomes/asmHubs/genbankBuild/GCA/000/164/845/GCA_000164845.2_Vicugna_pacos-2.0.1/idKeys/GCA_000164845.2_Vicugna_pacos-2.0.1.idKeys.txt) | cut -f2- \
+          | sort -k1,1 | join -t$'\t' <(sort -k1,1 ../../chrom.sizes) - \
+            | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' \
+               | sort -k1,1 -k2,2n > ucscToINSDC.bed
+
+printf "chrM\t0\t%d\t%s\n" "${chrM}" "Y19184.1" >> ucscToINSDC.bed
+
+join -t$'\t' ../idKeys/vicPac2.idKeys.txt \
+    <(sort -k1,1 -u /hive/data/genomes/asmHubs/refseqBuild/GCF/000/164/845/GCF_000164845.1_Vicugna_pacos-2.0.1/idKeys/GCF_000164845.1_Vicugna_pacos-2.0.1.idKeys.txt) | cut -f2- \
+          | sort -k1,1 | join -t$'\t' <(sort -k1,1 ../../chrom.sizes) - \
+            | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' \
+               | sort -k1,1 -k2,2n > ucscToRefSeq.bed
+
+    grep -v "^#"  /hive/data/outside/ncbi/genomes/GCA/000/164/845/GCA_000164845.2_Vicugna_pacos-2.0.1/GCA_000164845.2_Vicugna_pacos-2.0.1_assembly_report.txt \
+  | cut -f1,7 | sort -k2,2 \
+    | join -t$'\t' -1 2 -2 4 - <(sort -k4,4 ucscToRefSeq.bed) \
+      | awk -F$'\t' '{printf "%s\t%d\t%d\t%s\n", $3, $4, $5, $2}' \
+         | sort -k1,1 -k2,2n > ucscToAssembly.bed
+
+export chrSize=`cut -f1 ucscToINSDC.bed | awk '{print length($0)}' | sort -n | tail -1`
+printf "# chrSize: %d\n" "${chrSize}"
+sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
+         | hgLoadSqlTab vicPac2 ucscToINSDC stdin ucscToINSDC.bed
+export chrSize=`cut -f1 ucscToRefSeq.bed | awk '{print length($0)}' | sort -n | tail -1`
+printf "# chrSize: %d\n" "${chrSize}"
+sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
+       | sed -e 's/INSDC/RefSeq/g;' \
+         | hgLoadSqlTab vicPac2 ucscToRefSeq stdin ucscToRefSeq.bed
+
+# should be quiet for all OK
+checkTableCoords vicPac2 ucscToINSDC
+checkTableCoords vicPac2 ucscToRefSeq
+
+# should cover %100 entirely:
+featureBits -countGaps vicPac2 ucscToINSDC
+#  2172177994 bases of 2172177994 (100.000%) in intersection
+featureBits -countGaps vicPac2 ucscToRefSeq
+#  2172177994 bases of 2172177994 (100.000%) in intersection
+
+cut -f1,4  ucscToAssembly.bed | sort > ucsc.assembly.tab
+cut -f1,4  ucscToINSDC.bed | sort > ucsc.genbank.tab
+cut -f1,4  ucscToRefSeq.bed | sort > ucsc.refseq.tab
+
+~/kent/src/hg/utils/automation/chromAlias.pl ucsc.*.tab \
+  > vicPac2.chromAlias.tab
+
+for t in refseq genbank assembly
+do
+  c0=`cat ucsc.$t.tab | wc -l`
+  c1=`grep $t vicPac2.chromAlias.tab | wc -l`
+  ok="OK"
+  if [ "$c0" -ne "$c1" ]; then
+     ok="ERROR"
+  fi
+  printf "# checking $t: $c0 =? $c1 $ok\n"
+done
+# checking refseq: 276611 =? 276611 OK
+# checking genbank: 276611 =? 276611 OK
+# checking assembly: 276611 =? 276611 OK
+
+grep chrM vicPac2.chromAlias.tab
+# MT      chrM    assembly
+# NC_002504.1     chrM    refseq
+# Y19184.1        chrM    genbank
+
+hgLoadSqlTab vicPac2 chromAlias ~/kent/src/hg/lib/chromAlias.sql \
+        vicPac2.chromAlias.tab
+
+export asmId="vicPac2"
+
+$HOME/kent/src/hg/utils/automation/aliasTabToText.pl $asmId.chromAlias.tab \
+  > $asmId.chromAlias.txt
+
+$HOME/kent/src/hg/utils/automation/aliasTextToBed.pl \
+  -chromSizes=../../chrom.sizes \
+    -aliasText=${asmId}.chromAlias.txt \
+      -aliasBed=${asmId}.chromAlias.bed \
+        -aliasAs=${asmId}.chromAlias.as \
+        -aliasBigBed=${asmId}.chromAlias.bb
+
+bigBedToBed -header ${asmId}.chromAlias.bb test.chromAlias.bed
+$HOME/kent/src/hg/utils/automation/aliasBedToCt.pl \
+  test.chromAlias.bed ${asmId}.chromAlias.bb .
+
+# create symLinks to get the files into bigZips
+
+[hiram@hgwdev /hive/data/genomes/vicPac2/goldenPath/bigZips] ls -ogrt *Alias*
+total 2467461
+lrwxrwxrwx 1        43 Feb 13 10:41 vicPac2.chromAlias.txt -> ../../bed/chromAlias/vicPac2.chromAlias.txt
+lrwxrwxrwx 1        42 Feb 13 10:41 vicPac2.chromAlias.bb -> ../../bed/chromAlias/vicPac2.chromAlias.bb
+
+# copy those to hgdownload:
+  rsync -a -L ./vicPac2.chromAlias.* \
+      qateam@hgdownload:/mirrordata/goldenPath/vicPac2/bigZips/
+
+##############################################################################