src/hg/makeDb/doc/hg19.txt 18af2bb33abcd036c8cd8539580a8943ce8cb8ad

18af2bb33abcd036c8cd8539580a8943ce8cb8ad
chmalee
  Fri Jan 17 08:44:45 2020 -0800
Add full path to gnomad related track building scripts, from Max email

diff --git src/hg/makeDb/doc/hg19.txt src/hg/makeDb/doc/hg19.txt
index 4e460b2..c6fd5ad 100644
--- src/hg/makeDb/doc/hg19.txt
+++ src/hg/makeDb/doc/hg19.txt
@@ -33762,37 +33762,37 @@
 # 289674 total
 
 # check that v19 has all the transcripts:
 comm -12 hg19.gencodeV19.transcripts pliByGene.transcripts | wc -l
 19704
 comm -12 hg19.gencodeV19.transcripts pliByTranscripts.transcripts | wc -l
 80950
 rm hg19.gencodeV19.transcripts
 
 # ok safe to use v19 exon boundaries, just need to drop the version numbers:
 hgsql -Ne "select * from wgEncodeGencodeCompV19" hg19 | cut -f2- | genePredToBed | sed -Ee 's/\.[0-9]+//' | sort -k4 > hg19.gencodeCompV19.bed12
 
 gzip -cd $geneFile | tail -n +2 \
     | tawk '{print $75,$76,$77,$64,$65,$1,$2,$3,$4,$5,$33,$12,$13,$14,$32,$17,$20,$21,$24,$25,$26,$27,$28,$29,$30}' \
     | sort -k7 | join -t $'\t' -1 4 -2 7 hg19.gencodeCompV19.bed12 - \
-    | ./combine.awk -v doTranscripts=false 2>genes.chromMismatches \
+    | ~/kent/src/hg/makeDb/gnomad/combine.awk -v doTranscripts=false 2>genes.chromMismatches \
     | sort -k1,1 -k2,2n > pliByGene.bed
 
 gzip -cd $transcriptFile | tail -n +2 \
     | tawk '{print $76,$77,$78,$65,$66,$1,$2,$4,$5,$6,$34,$13,$14,$15,$33,$18,$21,$22,$25,$26,$27,$28,$29,$30,$31}' \
     | sort -k7 | join -t $'\t' -1 4 -2 7 hg19.gencodeCompV19.bed12 - \
-    | ./combine.awk -v doTranscripts=true 2>transcripts.chromMismatches \
+    | ~/kent/src/hg/makeDb/gnomad/combine.awk -v doTranscripts=true 2>transcripts.chromMismatches \
     | sort -k1,1 -k2,2n > pliByTranscript.bed
 
 # make .as file:
 #  table pliMetrics
 #  "bed12+5 for displaying gnomAD haploinsufficiency prediction scores"
 #      (
 #      string chrom;      "Reference sequence chromosome or scaffold"
 #      uint   chromStart; "Start position in chromosome"
 #      uint   chromEnd;   "End position in chromosome"
 #      string name;       "ENST or ENSG Name"
 #      uint   score;      "pLI score between 0-1000"
 #      char[1] strand;    "strand of transcript"
 #      uint thickStart;   "Start of where display is thick"
 #      uint thickEnd;     "End of where display should be thick"
 #      uint itemRgb;    "Color of item"
@@ -33906,41 +33906,41 @@
     # Info.csv  Table_S4.csv
 
     # Table_S4.csv is where it's at:
     # head -2 148353-3/Table_S4.csv
     transcript  gene    chr amino_acids genomic_start   genomic_end obs_mis exp_mis obs_exp chisq_diff_null region_name
     ENST00000337907.3   RERE    1   1-507   8716356 8424825 97  197.9807    0.489947    51.505535   RERE_1
     ENST00000337907.3	RERE	1	508-1567	8424824	8415147	355	438.045275	0.810419	15.743847	RERE_2
 
     # now I need to get this into exons somehow
     hgsql -Ne "select * from wgEncodeGencodeCompV19" | cut -f2- | genePredToBed > hg19.gencodeV19.txt
     bedToPsl /hive/data/genomes/hg19/chrom.sizes hg19.gencodeV19.txt v19.psl
     # pslMap would work here but since I don't know how to make a psl for RERE:1-507 I can't supply
     # the input psl that pslMap needs. thus I'll need a new util
 
     # first trim the utrs from v19:
-    ./trimUtrs.py hg19.gencodeV19.txt trimmedUtrs.txt
+    ~/kent/src/hg/makeDb/gnomad/trimUtrs.py hg19.gencodeV19.txt trimmedUtrs.txt
     # 99448 transcript added to transcript dict
     # are these correct?
     bedToExons trimmedUtrs.txt my.gencode.exonsOnly
     bedToExons -cdsOnly hg19.gencodeV19.txt gencode.exonsOnly
     # the awk removes the non-coding transcripts
     diff <(cut -f1-4 gencode.exonsOnly | tawk '{if ($3 != $2) print}' | sort -k4) <(cut -f1-4 trimmedUtrs.txt | sort -k4)
     # no diffs so we're good
 
     # now chop up exons according to the amino acids:
-    ./aaToGenomic.py trimmedUtrs.txt 148353-3/Table_S4.csv > aaToBed.out
+    ~/kent/src/hg/makeDb/gnomad/aaToGenomic.py trimmedUtrs.txt 148353-3/Table_S4.csv > aaToBed.out
     # make autoSql file, regular bed12 plus one for the gene name and one for the chi square value
     # table missenseConstraint
     # "Parts of transcripts shaded according to how well that region of the transcript tolerates missense variation."
     #     (
     #     string chrom;      "Chromosome (or contig, scaffold, etc.)"
     #     uint   chromStart; "Start position in chromosome"
     #     uint   chromEnd;   "End position in chromosome"
     #     string name;       "Name of item"
     #     uint   score;      "Score from 0-1000"
     #     char[1] strand;    "+ or -"
     #     uint thickStart;   "Start of where display should be thick (start codon)"
     #     uint thickEnd;     "End of where display should be thick (stop codon)"
     #     uint reserved;     "RGB color of item"
     #     int blockCount;    "Number of blocks"
     #     int[blockCount] blockSizes; "Comma separated list of block sizes"