c164fcc4428454a539e9226d343b2f7f6373ecce
jcasper
  Mon Apr 11 13:26:42 2022 -0700
Initial makedoc for single cell merged track on hg38, plus update to trackDb entry, refs #28062

diff --git src/hg/makeDb/doc/hg38/singleCell.txt src/hg/makeDb/doc/hg38/singleCell.txt
new file mode 100644
index 0000000..1de7bc2
--- /dev/null
+++ src/hg/makeDb/doc/hg38/singleCell.txt
@@ -0,0 +1,83 @@
+# for emacs: -*- mode: sh; -*-
+
+# This file describes how we made the browser database on
+# NCBI build 38 (December 2013 freeze) aka:
+#	GRCh38 - Genome Reference Consortium Human Reference 38
+#	Assembly Accession: GCA_000001405.2
+
+#############################################################################
+## Single Cell Merged - DONE - 2022-04-11 (Jim, Brittney, Jonathan)
+# Make working directory for a new browser track that merges together many individual
+# single cell datasets
+
+# Existing tracks being incorporated into this merged set: bloodHao, pancreasBaron,
+# placentaVentoTormo10x, fetalGeneAtlas, kidneySteward, muscleDeMicheli, ileumWang,
+# cortexVelmeshev, colonWang, heartCellAtlas, liverMacParland, rectumWang,
+# lungTravaglini2020droplet, skinSoleBoldo
+
+mkdir  /hive/data/genomes/hg38/bed/singleCell/merged
+cd  /hive/data/genomes/hg38/bed/singleCell/merged
+
+# Each dataset can have an independent gene set.  So, we need to make up a common gene
+# set they all can map to.
+
+# Reuse some of the code for creating gene sets from collections of mRNA to create
+# a graph of genes with slight differences in which gene was chosen looking a lot like
+# splice variants
+txBedToGraph \
+    /hive/data/genomes/hg38/bed/singleCell/bloodHao/mapping.bed /hive/data/genomes/hg38/bed/singleCell/bloodHao/mapping.bed \
+    /hive/data/genomes/hg38/bed/singleCell/pancreasBaron/mapping.bed /hive/data/genomes/hg38/bed/singleCell/pancreasBaron/mapping.bed \
+    /hive/data/genomes/hg38/bed/singleCell/placentaVentoTormo/10x/mapping.bed /hive/data/genomes/hg38/bed/singleCell/placentaVentoTormo/10x/mapping.bed \
+    /hive/data/genomes/hg38/bed/singleCell/fetalGeneAtlas/mapping.bed /hive/data/genomes/hg38/bed/singleCell/fetalGeneAtlas/mapping.bed \
+    /hive/data/genomes/hg38/bed/singleCell/kidneyStewart/mapping.bed /hive/data/genomes/hg38/bed/singleCell/kidneyStewart/mapping.bed \
+    /hive/data/genomes/hg38/bed/singleCell/muscleDeMicheli/mapping.bed /hive/data/genomes/hg38/bed/singleCell/muscleDeMicheli/mapping.bed \
+    /hive/data/genomes/hg38/bed/singleCell/ileumWang/mapping.bed /hive/data/genomes/hg38/bed/singleCell/ileumWang/mapping.bed \
+    /hive/data/genomes/hg38/bed/singleCell/cortexVelmeshev/mapping.bed /hive/data/genomes/hg38/bed/singleCell/cortexVelmeshev/mapping.bed \
+    /hive/data/genomes/hg38/bed/singleCell/colonWang/mapping.bed /hive/data/genomes/hg38/bed/singleCell/colonWang/mapping.bed \
+    /hive/data/genomes/hg38/bed/singleCell/heartCellAtlas/mapping.bed /hive/data/genomes/hg38/bed/singleCell/heartCellAtlas/mapping.bed \
+    /hive/data/genomes/hg38/bed/singleCell/liverMacParland/mapping.bed /hive/data/genomes/hg38/bed/singleCell/liverMacParland/mapping.bed \
+    /hive/data/genomes/hg38/bed/singleCell/rectumWang/mapping.bed /hive/data/genomes/hg38/bed/singleCell/rectumWang/mapping.bed \
+    /hive/data/genomes/hg38/bed/singleCell/lungTravaglini2020/droplet/mapping.bed /hive/data/genomes/hg38/bed/singleCell/lungTravaglini2020/droplet/mapping.bed \
+    /hive/data/genomes/hg38/bed/singleCell/skinSoleBoldo/mapping.bed /hive/data/genomes/hg38/bed/singleCell/skinSoleBoldo/mapping.bed \
+    all_mapping.txg
+
+# Hand wrangle copies of the existing cell_type.stats files for each of those data sets, creating
+# "cell_type.stats.detailed" files, which include data about organ, stage, cell class, subclass,
+# cell type, average total, and color (copied from the trackDb entries for the relevant tracks, so that
+# the merged track colors match those in the individual tracks).
+
+# Create a unify.tsv that references those files to then run hcaUnifyMatrix.  The weight values will
+# be ignored when we renormalize.
+
+printf "Blood   6       33.5    /hive/data/genomes/hg38/bed/singleCell/bloodHao/mapping.bed     /hive/data/genomes/hg38/bed/singleCell/bloodHao/clust/cell_type.matrix  /hive/data/genomes/hg38/bed/singleCell/bloodHao/clust/cell_type.refStats        /hive/data/genomes/hg38/bed/singleCell/bloodHao/bbi/cell_type.stats.detailed
+Brain   9       20      /hive/data/genomes/hg38/bed/singleCell/cortexVelmeshev/mapping.bed      /hive/data/genomes/hg38/bed/singleCell/cortexVelmeshev/clust/cell_type.matrix   /hive/data/genomes/hg38/bed/singleCell/cortexVelmeshev/clust/cell_type.refStats /hive/data/genomes/hg38/bed/singleCell/cortexVelmeshev/bbi/cell_type.stats.detailed
+Colon   5       40      /hive/data/genomes/hg38/bed/singleCell/colonWang/mapping.bed    /hive/data/genomes/hg38/bed/singleCell/colonWang/clust/cell_type.matrix /hive/data/genomes/hg38/bed/singleCell/colonWang/clust/cell_type.refStats       /hive/data/genomes/hg38/bed/singleCell/colonWang/bbi/cell_type.stats.detailed
+Fetal   12      22.5    /hive/data/genomes/hg38/bed/singleCell/fetalGeneAtlas/mapping.bed       /hive/data/genomes/hg19/bed/singleCell/fetalGeneAtlas/clust/cell_type.matrix    /hive/data/genomes/hg19/bed/singleCell/fetalGeneAtlas/clust/cell_type.refStats  /hive/data/genomes/hg38/bed/singleCell/fetalGeneAtlas/bbi/cell_type.stats.detailed
+Heart   1       40      /hive/data/genomes/hg38/bed/singleCell/heartCellAtlas/mapping.bed       /hive/data/genomes/hg38/bed/singleCell/heartCellAtlas/clust/cell_type.matrix    /hive/data/genomes/hg38/bed/singleCell/heartCellAtlas/clust/cell_type.refStats  /hive/data/genomes/hg38/bed/singleCell/heartCellAtlas/bbi/cell_type.stats.detailed
+Ileum   5       40      /hive/data/genomes/hg38/bed/singleCell/ileumWang/mapping.bed    /hive/data/genomes/hg38/bed/singleCell/ileumWang/clust/cell_type.matrix /hive/data/genomes/hg38/bed/singleCell/ileumWang/clust/cell_type.refStats       /hive/data/genomes/hg38/bed/singleCell/ileumWang/bbi/cell_type.stats.detailed
+Kidney  7       50      /hive/data/genomes/hg38/bed/singleCell/kidneyStewart/mapping.bed        /hive/data/genomes/hg38/bed/singleCell/kidneyStewart/clust/cell_type.matrix     /hive/data/genomes/hg38/bed/singleCell/kidneyStewart/clust/cell_type.refStats   /hive/data/genomes/hg38/bed/singleCell/kidneyStewart/bbi/cell_type.stats.detailed
+Liver   3       66.5    /hive/data/genomes/hg38/bed/singleCell/liverMacParland/mapping.bed      /hive/data/genomes/hg38/bed/singleCell/liverMacParland/clust/cell_type.matrix   /hive/data/genomes/hg38/bed/singleCell/liverMacParland/clust/cell_type.refStats /hive/data/genomes/hg38/bed/singleCell/liverMacParland/bbi/cell_type.stats.detailed
+Lung    10      30      /hive/data/genomes/hg38/bed/singleCell/lungTravaglini2020/droplet/mapping.bed   /hive/data/genomes/hg38/bed/singleCell/lungTravaglini2020/droplet/clust/cell_type.matrix        /hive/data/genomes/hg38/bed/singleCell/lungTravaglini2020/droplet/clust/cell_type.refStats      /hive/data/genomes/hg38/bed/singleCell/lungTravaglini2020/droplet/bbi/cell_type.stats.detailed
+Muscle  8       50      /hive/data/genomes/hg38/bed/singleCell/muscleDeMicheli/mapping.bed      /hive/data/genomes/hg38/bed/singleCell/muscleDeMicheli/clust/cell_type.matrix   /hive/data/genomes/hg38/bed/singleCell/muscleDeMicheli/clust/cell_type.refStats /hive/data/genomes/hg38/bed/singleCell/muscleDeMicheli/bbi/cell_type.stats.detailed
+Pancreas        2       40      /hive/data/genomes/hg38/bed/singleCell/pancreasBaron/mapping.bed        /hive/data/genomes/hg38/bed/singleCell/pancreasBaron/clust/cell_type.matrix     /hive/data/genomes/hg38/bed/singleCell/pancreasBaron/clust/cell_type.refStats   /hive/data/genomes/hg38/bed/singleCell/pancreasBaron/bbi/cell_type.stats.detailed
+Placenta        4       50      /hive/data/genomes/hg38/bed/singleCell/placentaVentoTormo/10x/mapping.bed       /hive/data/genomes/hg38/bed/singleCell/placentaVentoTormo/10x/clust/cell_type.matrix    /hive/data/genomes/hg38/bed/singleCell/placentaVentoTormo/10x/clust/cell_type.refStats  /hive/data/genomes/hg38/bed/singleCell/placentaVentoTormo/10x/bbi/cell_type.stats.detailed
+Rectum  5       40      /hive/data/genomes/hg38/bed/singleCell/rectumWang/mapping.bed   /hive/data/genomes/hg38/bed/singleCell/rectumWang/clust/cell_type.matrix        /hive/data/genomes/hg38/bed/singleCell/rectumWang/clust/cell_type.refStats      /hive/data/genomes/hg38/bed/singleCell/rectumWang/bbi/cell_type.stats.detailed
+Skin    11      80      /hive/data/genomes/hg38/bed/singleCell/skinSoleBoldo/mapping.bed        /hive/data/genomes/hg19/bed/singleCell/skinSoleBoldo/clust/cell_type.matrix     /hive/data/genomes/hg19/bed/singleCell/skinSoleBoldo/clust/cell_type.refStats   /hive/data/genomes/hg38/bed/singleCell/skinSoleBoldo/bbi/cell_type.stats.detailed" > unify.tsv
+
+# Create a merged matrix out of individual matrices which are specified in unify.tsv
+hcaUnifyMatrix all_mapping.txg unify.tsv outMatrix.tsv -bed=out.bed -empty=0 -trackDb=out.ra -stats=singleCellMerged.stats
+
+# Convert merged gene matrix and gene sets into a bigBarChart
+matrixToBarChartBed outMatrix.tsv out.bed barChart.bed
+bedSort barChart.bed barChart.bed
+bedToBigBed barChart.bed /hive/data/genomes/hg38/chrom.sizes singleCellMerged.bb -type=bed6+3 -as=/cluster/home/kent/src/hg/lib/simpleBarChartBed.as
+
+# Link bigBed and also stats file used by faceted details page to runtime accessible directory
+ln -s /hive/data/genomes/hg38/bed/singleCell/merged/singleCellMerged.bb /gbdb/hg38/bbi/singleCellMerged/
+ln -s /hive/data/genomes/hg38/bed/singleCell/merged/singleCellMerged.stats /gbdb/hg38/bbi/singleCellMerged/
+
+# Add the contents of out.ra to the hg38 trackDb.ra settings along with references to the above files and
+# some useful barChart track settings.
+
+#############################################################################
+