c12ecb21298a3c6d19b6381ede7bd1477cabefab
kuhn
  Mon Feb 28 17:20:04 2022 -0800
changes per BriLee code review, incl conforming to makedoc conventions, adding filters on dogSnp track.  added evaSnp.as file, which was forgotten. refs #29025

diff --git src/hg/makeDb/doc/canFam3.txt src/hg/makeDb/doc/canFam3.txt
index 019e646..7e8ca71 100644
--- src/hg/makeDb/doc/canFam3.txt
+++ src/hg/makeDb/doc/canFam3.txt
@@ -1168,158 +1168,154 @@
 
 gunzip GCA_000002285.2_current_ids.vcf.gz
 
 # fix chromNames chr01 > chr1
 cat GCA_000002285.2_current_ids.vcf | sed "s/chr0/chr/" > dogSNPs.vcf
 bgzip dogSNPs.vcf
 
 # vcfToBed requires a tabix
 tabix -p vcf dogSNPs.vcf.gz
 
 # make a bed file with 3 useful flags
 vcfToBed dogSNPs.vcf.gz dogSNPs3Flags -fields=VC,SID,RS_VALIDATED
 # VC=Variant Class, SID=Submitter ID,  RS_VALIDATED=validated from dbSNP 
 
 wc -l *bed
- 5655126 dogSNPs3Flags.bed
-[kuhn@hgwdev release3]$ wc -l *vcf
-5655174 GCA_000002285.2_current_ids.vcf
+# 5655126 dogSNPs3Flags.bed
+wc -l *vcf
+# 5655174 GCA_000002285.2_current_ids.vcf
 
 # sort file
 bedSort dogSNPs3Flags.bed dogSNPs3Flags.bed
 
 [kuhn@hgwdev release3]$ head -2 dogSNPs3Flags.bed
 #chrom  chromStart      chromEnd        name    score   strand  thickStart      thickEnd        itemRgb ref     alt     FILTER  VC      SID     RS_VALIDATED
 #chr1    111     112     rs850979046     0       .       111     112     0,0,0   A       G       .       SO:0001483      BROAD_VGB_CANINE_PON_SNP_DISCOVERY
 
 # drop unnecessary fields:  thickStart      thickEnd        itemRgb 
 cat dogSNPs3Flags.bed \
   | awk -F"\t" '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$10"\t"$11"\t"$13"\t"$14"\t"$15}' \
   > evaSnps1.bed
 
-[kuhn@hgwdev release3]$ head evaSnps1.bed
+head evaSnps1.bed
 #chrom  chromStart      chromEnd        name    score   strand  ref     alt     VC      SID     RS_VALIDATED
-chr1    111     112     rs850979046     0       .       A       G       SO:0001483      BROAD_VGB_CANINE_PON_SNP_DISCOVERY
+# chr1    111     112     rs850979046     0       .       A       G       SO:0001483      BROAD_VGB_CANINE_PON_SNP_DISCOVERY
 
 # add "No" to last column where needed (RS_VALIDATED)
 cat evaSnps1.bed \
   | awk '{if ($11 != "Yes") { print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$10"\tNo"; } \
               else          { print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$10"\t"$11; }}' \
    > evaSnps2.bed 
    
-[kuhn@hgwdev release3]$ head evaSnps2.bed
+head evaSnps2.bed
 #chrom  chromStart      chromEnd        name    score   strand  ref     alt     VC      SID     No
-chr1    111     112     rs850979046     0       .       A       G       SO:0001483      BROAD_VGB_CANINE_PON_SNP_DISCOVERY      No
+# chr1    111     112     rs850979046     0       .       A       G       SO:0001483      BROAD_VGB_CANINE_PON_SNP_DISCOVERY      No
 
-[kuhn@hgwdev release3]$ cat evaSnps2.bed | awk -F"\t" '{print $11}' | sort | uniq -c
-4644128 No
-1010998 Yes
+cat evaSnps2.bed | awk -F"\t" '{print $11}' | sort | uniq -c
+# 4644128 No
+# 1010998 Yes
 
 # replace Sequence Ontology  "SO:" IDs with actual snp types
-
-[kuhn@hgwdev data]$ cat evaSnps2.bed \
+cat evaSnps2.bed \
   | sed "s/SO:0001483/substitution/" \
   | sed "s/SO:0000159/deletion/" \
   | sed "s/SO:0000667/insertion/" \
   | sed "s/SO:0000705/tandemRepeat/" \
   | sed "s/SO:0002007/multipleNucleotideSubstitution/" \
   | sed "s/SO:1000032/delins/" > evaSnps3.bed
 
-[kuhn@hgwdev release3]$ cat evaSnps3.bed | awk '{print $9}' | sort | uniq -c | sort -nr
-4713243 substitution
- 474379 deletion
- 467490 insertion
-      7 tandemRepeat
-      5 delins
-      1 multipleNucleotideSubstitution
-      1 VC
+cat evaSnps3.bed | awk '{print $9}' | sort | uniq -c | sort -nr
+# 4713243 substitution
+#  474379 deletion
+#  467490 insertion
+#       7 tandemRepeat
+#       5 delins
+#       1 multipleNucleotideSubstitution
+#       1 VC
 
 # still has header row (VC)
 
-[kuhn@hgwdev release3]$ head evaSnps3.bed
+head evaSnps3.bed
 #chrom  chromStart      chromEnd        name    score   strand  ref     alt     VC      SID     No
-chr1    111     112     rs850979046     0       .       A       G       substitution    BROAD_VGB_CANINE_PON_SNP_DISCOVERY      No
+# chr1    111     112     rs850979046     0       .       A       G       substitution    BROAD_VGB_CANINE_PON_SNP_DISCOVERY      No
 
 # make bigBed
-[kuhn@hgwdev data]$ bedToBigBed -tab -as=../evaSnp.as -type=bed6+5 -extraIndex=name evaSnps3.bed ../chromInfo.txt evaSnp.bb
+bedToBigBed -tab -as=../evaSnp.as -type=bed6+5 -extraIndex=name evaSnps3.bed ../chromInfo.txt evaSnp.bb
 
 # error msg:
-evaSnpsr.bed is not sorted at line 115.  Please use "sort -k1,1 -k2,2n" or bedSort and try again.
+# evaSnpsr.bed is not sorted at line 115.  Please use "sort -k1,1 -k2,2n" or bedSort and try again.
 # ?? but it =looks= sorted there:
 
-114 chr1    6189    6189    rs851043138     0       .       T       TAG     insertion       BROAD_VGB_CANINE_PON_SNP_DISCOVERY      No
-115 chr1    6188    6189    rs852166058     0       .       T       G       substitution    BROAD_VGB_CANINE_PON_SNP_DISCOVERY      No
-116 chr1    6193    6194    rs851187136     0       .       G       A       substitution    BROAD_VGB_CANINE_PON_SNP_DISCOVERY      No
+# j114 chr1    6189    6189    rs851043138     0       .       T       TAG     insertion       BROAD_VGB_CANINE_PON_SNP_DISCOVERY      No
+# 115 chr1    6188    6189    rs852166058     0       .       T       G       substitution    BROAD_VGB_CANINE_PON_SNP_DISCOVERY      No
+# 116 chr1    6193    6194    rs851187136     0       .       G       A       substitution    BROAD_VGB_CANINE_PON_SNP_DISCOVERY      No
 
 bedSort evaSnps3.bed evaSnps4.bed
 
-# moved chromInfo.txt and evaSnp.as to local directory
-cp ../chromInfo.txt .
-cp ../evaSnp.as .
-
 # trackDb/ra entry:
 cd kent/src/hg/makeDb/trackDb/dog/canFam3/trackDb.ra
 
-track evaSnp
-shortLabel EVA SNP Release 3
-longLabel Short Genetic Variants from European Variant Archive Release 3
-type bigBed 6 +
-group varRep
-visibility pack
-bigDataUrl /gbdb/canFam3/bbi/evaSnp.bb
-url https://www.ebi.ac.uk/eva/?variant&accessionID=$$
+# track evaSnp
+# shortLabel EVA SNP Release 3
+# longLabel Short Genetic Variants from European Variant Archive Release 3
+# type bigBed 6 +
+# group varRep
+# visibility pack
+# bigDataUrl /gbdb/canFam3/bbi/evaSnp.bb
+# url https://www.ebi.ac.uk/eva/?variant&accessionID=$$
 
 # adding search in trackDb.ra (thanks jonathan):
 # added the following to dog/canFam3/trackDb.ra
 
-search works.
-searchTable evaSnp
-searchType bigBed
-searchMethod exact
-padding 50
-
-[kuhn@hgwdev release3]$ bedToBigBed -tab -as=evaSnp.as -type=bed6+5 -extraIndex=name evaSnps4.bed chromInfo.txt evaSnp.bb
-pass1 - making usageList (39 chroms): 1815 millis
-pass2 - checking and writing primary data (5655125 records, 11 fields): 19207 millis
-Sorting and writing extra index 0: 4299 millis
-[kuhn@hgwdev release3]$ 
-[kuhn@hgwdev release3]$ bigBedInfo evaSnp.bb
-version: 4
-fieldCount: 11
-hasHeaderExtension: yes
-isCompressed: yes
-isSwapped: 0
-extraIndexCount: 1
-itemCount: 5,655,125
-primaryDataSize: 78,986,590
-primaryIndexSize: 365,236
-zoomLevels: 10
-chromCount: 39
-basesCovered: 6,322,774
-meanDepth (of bases covered): 1.025209
-minDepth: 1.000000
-maxDepth: 14.000000
-std of depth: 0.217172
-
-# copy to active directory for track
+# searchTable evaSnp
+# searchType bigBed
+# searchMethod exact
+# padding 50
+
+# search works:
+
+bedToBigBed -tab -as=evaSnp.as -type=bed6+5 -extraIndex=name evaSnps4.bed chromInfo.txt evaSnp.bb
+# pass1 - making usageList (39 chroms): 1815 millis
+# pass2 - checking and writing primary data (5655125 records, 11 fields): 19207 millis
+# Sorting and writing extra index 0: 4299 millis
+
+bigBedInfo evaSnp.bb
+# version: 4
+# fieldCount: 11
+# hasHeaderExtension: yes
+# isCompressed: yes
+# isSwapped: 0
+# extraIndexCount: 1
+# itemCount: 5,655,125
+# primaryDataSize: 78,986,590
+# primaryIndexSize: 365,236
+# zoomLevels: 10
+# chromCount: 39
+# basesCovered: 6,322,774
+# meanDepth (of bases covered): 1.025209
+# minDepth: 1.000000
+# maxDepth: 14.000000
+# std of depth: 0.217172
+
+# copy to active /gbdb directory for track
 cp evaSnp.bb /cluster/data/canFam3/bed/evaSnp/evaSnp.bb
 
 # make gbdb symlink
 cd /gbdb/canFam3/bbi
 ln -s /cluster/data/canFam3/bed/evaSnp/evaSnp.bb evaSnp.bb
 
 # submitters
-[kuhn@hgwdev release3]$ cat evaSnps4.bed | awk '{print $10}' | sed -e "s/,/\n/g" | sort | uniq -c | sort -r
+cat evaSnps4.bed | awk '{print $10}' | sed -e "s/,/\n/g" | sort | uniq -c | sort -r
 3492428 BROAD_VGB_CANINE_PON_SNP_DISCOVERY
 2458565 BROAD_DBSNP.2005.2.4.16.57
  697053 TIGR_1.0
  234166 BROAD_VGB_LYMPHOMA_SNP_DISCOVERY
    4202 DOG_GEN_LAB_1032011
      94 AMOUCD_MAL_20+TERV_1
 ...
 
-[kuhn@hgwdev release3]$ cat evaSnps4.bed | awk '{print $10}' | sed -e "s/,/\n/g" | sort | uniq -c | sort -r
-52
+cat evaSnps4.bed | awk '{print $10}' | sed -e "s/,/\n/g" | sort | uniq -c | sort -r
+# 52
 
 # 52 different submitters.  most from a few places, mostly the Broad
 
 ##############################################################################