b4fd34e71f94b2847253ef2b20cd3d8aa2acaa24 jcasper Mon May 19 10:31:30 2025 -0700 Fix a few issues with MaveDB json parse (score bounds, item count display for cells with multiple values. Also committing trackDb and doc for a native version, refs #31812 diff --git src/hg/makeDb/doc/hg38/hg38.txt src/hg/makeDb/doc/hg38/hg38.txt index fcbc04a4d06..d57aacaaa4b 100644 --- src/hg/makeDb/doc/hg38/hg38.txt +++ src/hg/makeDb/doc/hg38/hg38.txt @@ -7395,16 +7395,54 @@ ## $ ll /hive/data/outside/cosmic/ucsc_export*.bed.gz ## rw-rw-r- 1 max protein 198M Jan 14 16:15 ## /hive/data/outside/cosmic/ucsc_export.v101.hg19.bed.gz ## rw-rw-r- 1 max protein 198M Jan 14 13:35 ## /hive/data/outside/cosmic/ucsc_export.v101.hg38.bed.gz mkdir -p /hive/data/outside/cosmic/{hg19,hg38}/v101 cd /hive/data/outside/cosmic/hg38/v101 zcat /hive/data/outside/cosmic/ucsc_export.v101.hg38.bed.gz | awk -F'\t' -v OFS="\t" '{ print $1, $2, $3, $7, 0, $6, $4, $5, $8 }' | sort -k1,1 -k2,2n > cosmic.bed bedToBigBed -type=bed6+3 -as=/hive/data/outside/cosmic/hg38/v98/cosmic.as cosmic.bed /hive/data/genomes/hg38/chrom.sizes cosmic.bb -tab cd /gbdb/hg38/cosmic/ ln -s /hive/data/outside/cosmic/hg38/v98/cosmic.bb cosmicv98.bb ln -sf /hive/data/outside/cosmic/hg38/v101/cosmic.bb #Updated human/hg38/trackDb.ra and human/hg38/cosmicMuts.html + +######################################################################### +# MaveDB (04-14-2025) Jonathan + +# MaveDB data for this track was taken from mappings they created and linked to +# from https://github.com/VariantEffect/dcd_mapping2. More specifically, from +# https://mavedb-mapping.s3.us-east-2.amazonaws.com/mappings.tar.gz. + +mkdir -p /hive/data/outside/mavedb/dcd_mappings +cd /hive/data/outside/mavedb/dcd_mappings +wget https://mavedb-mapping.s3.us-east-2.amazonaws.com/mappings.tar.gz +tar xzvf mappings.tar.gz + +mkdir -p /hive/data/genomes/hg38/bed/mavedb/2025_04_12 +cd /hive/data/genomes/hg38/bed/mavedb/2025_04_12 + +# We skip 00000053-a-1.json because it's a very large dataset that also only +# contains haplotype data, which we don't have a display for. +time for fil in `grep -l hgvs.p /hive/data/outside/mavedb/dcd_mappings/mappings/*.json`; + do if [[ "$fil" == *"00000053-a-1.json" ]]; then continue; fi + echo $fil + perl ~/kent/src/oneShot/parseMave/parseMave.pl < $fil >> combined.bed; + done + +bedSort combined.bed combined.bed +cp $HOME/kent/src/hg/lib/heatmap.as . +# edit heatmap.as to add these fields to the end +# lstring maveLink; "Link to MaveDB" +# lstring abstractText; "Abstract" +# lstring methodText; "Methods" + +bedToBigBed -type=bed12+ -tab -as=heatmap.as combined.bed /hive/data/genomes/hg38/chrom.sizes all_mave.bb + +mkdir -p /gbdb/hg38/maveDB +cd /gbdb/hg38/maveDB +ln -s /hive/data/genomes/hg38/bed/mavedb/2025_04_12/all_mave.bb + + #########################################################################