61309f7c6fb5fb1d582fde243fbfa83559c1da21 chmalee Tue Oct 5 16:26:13 2021 -0700 Fix float parsing bug in gtex transcript expression track on hg19 and hg38, refs #28280 diff --git src/hg/makeDb/doc/hg38/hg38.txt src/hg/makeDb/doc/hg38/hg38.txt index 03a1488..8f065f8 100644 --- src/hg/makeDb/doc/hg38/hg38.txt +++ src/hg/makeDb/doc/hg38/hg38.txt @@ -6428,44 +6428,50 @@ # Make a clean matrix cut -f 1 ../johnVivianRecompute/gtex.tpm.tab | cut -f 1 -d "." > gtexTranscripts.txt cut -f 2- ../johnVivianRecompute/gtex.tpm.tab > gtexTpmValues.tsv paste gtexTranscripts.txt gtexTpmValues.tsv > gtexMatrix.tsv rowsToCols gtexMatrix.tsv tspsdGtexMatrix.tsv sort tspsdGtexMatrix.tsv > sortedTspsdGtexMatrix.tsv grep -v '(CML)' gtexSamples.tsv | cut -f 1 | sed 's/Run_s/#transcript/g' | sort > sortedCleanGtexSamples.tsv join sortedCleanGtexSamples.tsv sortedTspsdGtexMatrix.tsv > cleanTspsdGtexMatrix.tsv rowsToCols cleanTspsdMatrix.tsv cleanGtexMatrix.tsv # Build a coordinate map hgsql hg38 -e "select * from ensGene" | cut -f 2- | sort > ensGene hgsql hg38 -e "select * from ensemblToGeneName" | sort > ensemblToGeneName join ensGene ensemblToGeneName | awk '{print $2"\t"$4"\t"$5"\t"$1"\t0\t"$3"\t"$16}' > coord.bed +# NOTE: CHRISL10-05-2021 - the above ensGene steps weren't actually done or the files were removed, +# there was a coord.tsv which I used instead so the below re-run could work +tawk '{print $1,$2,$3,$4,0,$5,$6}' coord.tsv > coord.bed +# END CHRISL10-05-2021 NOTE) # Get the gtex ordering hgsql hgFixed -e "select * from gtexTissue" | cut -f 3 | sed 's/ - /-/g' | sed 's/ /_/g' | sed '1D' > gtexGroupOrder.txt # Use the meta data, matrix, and coordinate map to generate a barchart bed +# NOTE: CHRISL10-05-2021 - re-ran this step to fix float parsing bug: time expMatrixToBarchartBed cleanGtexSamples.tsv cleanGtexMatrix.tsv coord.bed gtexTransExp.bed --groupOrderFile gtexGroupOrder.txt # NOTE: Use the header line of the bed file to populate the barChartBars field in the trackDb. # The order of the labels in the barChartBars field should match the order of the labels in the # expScores column in the bed file header. # Sort and convert into a bigBed file. sort -k1,1 -k2,2n gtexTransExp.bed > sortedGtexTransExp.bed -bedToBigBed -as=$HOME/kent/src/hg/lib/barChartTranscExp.as -type=bed6+5 sortedGtexTransMed.bed /hive/data/genomes/hg38/chrom.sizes gtexTranscExpr.bb +# NOTE: CHRISL10-05-2021 - re-ran bedToBigBed step with correct file names +bedToBigBed -as=$HOME/kent/src/hg/lib/barChartBed.as -type=bed6+5 sortedGtexTransExp.bed /hive/data/genomes/hg38/chrom.sizes gtexTranscExpr.bb # Link the files into gbdb cd /gbdb/hgFixed/human/expMatrix ln -s /hive/data/outside/gtex/barChartTrack/cleanGtexSamples.tsv cleanGtexSamples.tab ln -s /hive/data/outside/gtex/barChartTrack/cleanGtexMatrix.tsv cleanGtexMatris.tab # <2007-08-30 kate) cd /gbdb/hg38/gtex ln -s /hive/data/outside/gtex/barChartTrack/gtexTranscExpr.bb . ######################################################################### # LASTZ human/hg38 vs. Zebrafish /danRer11 # (DONE - 2017-06-12 - Chris) mkdir /hive/data/genomes/hg38/bed/lastzDanRer11.2017-06-12