fac69064e72ae464ea9f34d137cd0a7bc38a0ee2 hiram Tue Jan 7 13:15:53 2025 -0800 adding phyloP for 241-way data refs #35032 diff --git src/hg/makeDb/doc/felCat8/cactus241way.txt src/hg/makeDb/doc/felCat8/cactus241way.txt new file mode 100644 index 0000000..9a895756 --- /dev/null +++ src/hg/makeDb/doc/felCat8/cactus241way.txt @@ -0,0 +1,154 @@ +############################################################################ +### phyloP scores from the Zoonomia 241-way alignment +### DONE 2025-01-07 - hiram +############################################################################ + +# files were obtained by dropbox passage from: +# Kerstin Lindblad-Toh +# Michaël Dong +# 20935274303 Dec 19 14:26 cat_241M_PhyloP.onlychr.allBED.split10Mb.tar.gz + +mkdir /hive/data/genomes/felCat8/bed/cactus241way +cd /hive/data/genomes/felCat8/bed/cactus241way + +# files were unpacked into BED_scores/ +# oddly in a bed format instead of wiggle fixed +# converted them to wigFix format with the perl script in +# this working directory: bedToWig.pl via this shell script: + +mkdir -p wigFix + +for GZ in BED_scores/*.gz +do + B=`basename ${GZ} | sed -e 's/_scoresPhyloP_250.wig_scores.bed/.wigFix/;'` + printf "./bedToWig.pl \"${GZ}\" | gzip -c > \"wigFix/${B}\"\n" + ./bedToWig.pl "${GZ}" | gzip -c > "wigFix/${B}" +done + +############################################################################ +#!/usr/bin/env perl + +use strict; +use warnings; + +my %chrNames; # key is genbank name, value is UCSC name + +open (my $fh, "<", "../chromAlias/ucsc.genbank.tab") or die "can not read ../chromAlias/ucsc.genbank.tab"; +while (my $line = <$fh>) { + chomp $line; + my @a = split('\s+', $line); + $chrNames{$a[1]} = $a[0]; +} +close ($fh); + +# chrA1 CM001378.2 +# chrA1_AANG03039881v1_random AANG03039881.1 +# chrA1_AANG03039919v1_random AANG03039919.1 +# chrA1_AANG03039923v1_random AANG03039923.1 +# chrA1_AANG03039971v1_random AANG03039971.1 +# chrA1_AANG03040085v1_random AANG03040085.1 + +my $chr = ""; +my $ucscName = ""; +my $start = -1; + +while (my $file = shift) { + if ($file =~ m/.gz$/) { + printf STDERR "### process: %s\n", $file; + open (my $fh, "-|", "zcat $file") or die "can not zcat $file"; + while (my $line = <$fh>) { + chomp $line; + my @a = split('\s+', $line); + if ($a[0] ne $chr) { + $chr = $a[0]; + $ucscName = $chrNames{$chr}; + } + my $size = $a[2] - $a[1]; + if ($size != 1) { + printf STDERR "ERROR: size not 1 ? %d\n%s\n", $size, $line; + exit 255; + } + if ($start > -1) { + if ($a[1] != ($start + 1)) { + $start = $a[1]; + printf "fixedStep chrom=%s start=%d step=1\n", $ucscName, $start; + } else { + $start += 1; + } + } else { + $start = $a[1]; + printf "fixedStep chrom=%s start=%d step=1\n", $ucscName, $start; + } + printf "%s\n", $a[4]; + } + close ($fh); + } else { + printf STDERR "### process: %s\n", $file; + open (my $fh, "<", $file) or die "can read $file"; + close ($fh); + } +} +############################################################################ +### construct a single wigFix file from the multiple wigFix files: + +# this will order the files correctly by start position so everything +# is in sequence + +ls wigFix/*.gz | tr '.' ' ' | sort -k1,1 -k3,3n | tr ' ' '.' \ + | xargs zcat | gzip -c > felCat8.241way.phyloP.wigFix.gz + +### wigEncode that: + + time wigEncode felCat8.241way.phyloP.wigFix.gz \ + felCat8.241way.phyloP.wig felCat8.241way.phyloP.wib +Converted felCat8.241way.phyloP.wigFix.gz, upper limit 8.90, lower limit -20.00 + +real 7m26.821s +user 9m42.530s +sys 0m19.790s + +### and bigWig that: + +time wigToBigWig felCat8.241way.phyloP.wigFix.gz ../../chrom.sizes \ + felCat8.241way.phyloP.bw + +real 19m52.299s +user 18m51.434s +sys 0m46.904s + + bigWigInfo felCat8.241way.phyloP.bw | sed -e 's/^/# /;' +# version: 4 +# isCompressed: yes +# isSwapped: 0 +# primaryDataSize: 6,509,062,068 +# primaryIndexSize: 74,686,640 +# zoomLevels: 10 +# chromCount: 19 +# basesCovered: 2,374,219,390 +# mean: 0.164832 +# min: -20.000000 +# max: 8.903000 +# std: 1.301299 + +### link the bigWig file into gbdb + +mkdir /gbdb/felCat8/bbi/cactus241way +ln -s `pwd`/felCat8.241way.phyloP.bw /gbdb/felCat8/bbi/cactus241way/ + +### trackDb entry + +track phyloP241wayBW +shortLabel 241-way phyloP +longLabel Basewise Conservation by PhyloP from the Zoonomia 241-way alignment +configurable on +noInherit on +group compGeno +type bigWig -20 8.903 +bigDataUrl /gbdb/felCat8/bbi/cactus241way/felCat8.241way.phyloP.bw +maxHeightPixels 100:50:11 +viewLimits -4.5:7.5 +autoScale off +spanList 1 +windowingFunction mean+whiskers +color 60,60,140 +altColor 140,60,60