c441c8fab1f86d5ffdf876df99ada6adfddd3df9
chmalee
  Thu Aug 27 14:36:52 2020 -0700
Fixing up uniq command after code review, refs #26100

diff --git src/hg/utils/otto/decipher/buildDecipher src/hg/utils/otto/decipher/buildDecipher
index 0e92ca2..b0ff472 100755
--- src/hg/utils/otto/decipher/buildDecipher
+++ src/hg/utils/otto/decipher/buildDecipher
@@ -1,46 +1,46 @@
 #!/bin/sh -e
 # get raw data file from DECIPHER
 set -eEu -o pipefail
 
 # get canonical gene symbols:
 hgsql -Ne "select chrom,chromStart,chromEnd,geneSymbol from knownCanonical kc join kgXref kg on kc.transcript=kg.kgID" hg19 | sort -k1,1 -k2,2n > hg19.knownCanonical.genes
 
 sort $1 | grep -v '#' | tawk '$2 == "MT" {$2 = "M";}; {
     $2 = "chr"$2;
     $3 = $3 - 1;
     printf "%s\t%s\t%s\t%s\t0\t.\t%s\t%s", $2,$3,$4,$1,$3,$4;
     # placeholder itemRgb
     printf "\t0,0,0";
     # size field for filter
     printf "\t%d", $4 - $3;
     # force a float for mean_ratio:
     printf "\t%0.2f", $5
     # rest of the fields:
     for (i = 6; i <= NF; i++) {
         printf "\t%s", $i;
     }
     printf "\n";
     }' | sort -k1,1 -k2,2n > decipherCnv.bed17
 
 # append a list of genes for each cnv:
 ../processDecipher.py decipherCnv.bed17 hg19.knownCanonical.genes | sort -k1,1 -k2,2n > decipherCnv.bed
 oldLc=`bigBedToBed ../release/hg19/decipherCnv.bb stdout | wc -l`
 newLc=`grep -v "^#" decipherCnv.bed | wc -l | cut -d' ' -f1`
 echo decipherCnv rowcount: old $oldLc new: $newLc
 echo $oldLc $newLc | awk '{if (($2-$1)/$1 > 0.1) {printf "validate on DECIPHER CNV failed: old count: %d, new count: %d\n", $1,$2; exit 1;}}'
 bedToBigBed -extraIndex=name -tab -as=../decipherCnv.as -type=bed9+10 decipherCnv.bed /hive/data/genomes/hg19/chrom.sizes decipherCnv.bb
 cp decipherCnv.bb ../release/hg19/
 
 
 # SNVs pipeline
 
-sort $2| grep -v '#' | tawk '$2 == "MT" {$2 = "M";}; {print;}' | uniq >  decipherSnvsRawNew.txt 
+sort $2| grep -v '#' | tawk '$2 == "MT" {$2 = "M";}; {print;}' | sort -u >  decipherSnvsRawNew.txt 
 
 hgsql hg19 -e 'drop table if exists decipherSnvsRawNew'
 hgLoadSqlTab hg19 decipherSnvsRawNew ../decipherSnvsRaw.sql decipherSnvsRawNew.txt
 
 hgsql hg19 -N -e 'select "chr", chr, start-1, end, id from decipherSnvsRawNew ' |\
 sed -e 's/chr\t/chr/' |sort > decipherSnvsNew.bed
 
 # Load decipher snvs table
 hgLoadBed hg19 decipherSnvsNew decipherSnvsNew.bed