c441c8fab1f86d5ffdf876df99ada6adfddd3df9 chmalee Thu Aug 27 14:36:52 2020 -0700 Fixing up uniq command after code review, refs #26100 diff --git src/hg/utils/otto/decipher/buildDecipher src/hg/utils/otto/decipher/buildDecipher index 0e92ca2..b0ff472 100755 --- src/hg/utils/otto/decipher/buildDecipher +++ src/hg/utils/otto/decipher/buildDecipher @@ -1,46 +1,46 @@ #!/bin/sh -e # get raw data file from DECIPHER set -eEu -o pipefail # get canonical gene symbols: hgsql -Ne "select chrom,chromStart,chromEnd,geneSymbol from knownCanonical kc join kgXref kg on kc.transcript=kg.kgID" hg19 | sort -k1,1 -k2,2n > hg19.knownCanonical.genes sort $1 | grep -v '#' | tawk '$2 == "MT" {$2 = "M";}; { $2 = "chr"$2; $3 = $3 - 1; printf "%s\t%s\t%s\t%s\t0\t.\t%s\t%s", $2,$3,$4,$1,$3,$4; # placeholder itemRgb printf "\t0,0,0"; # size field for filter printf "\t%d", $4 - $3; # force a float for mean_ratio: printf "\t%0.2f", $5 # rest of the fields: for (i = 6; i <= NF; i++) { printf "\t%s", $i; } printf "\n"; }' | sort -k1,1 -k2,2n > decipherCnv.bed17 # append a list of genes for each cnv: ../processDecipher.py decipherCnv.bed17 hg19.knownCanonical.genes | sort -k1,1 -k2,2n > decipherCnv.bed oldLc=`bigBedToBed ../release/hg19/decipherCnv.bb stdout | wc -l` newLc=`grep -v "^#" decipherCnv.bed | wc -l | cut -d' ' -f1` echo decipherCnv rowcount: old $oldLc new: $newLc echo $oldLc $newLc | awk '{if (($2-$1)/$1 > 0.1) {printf "validate on DECIPHER CNV failed: old count: %d, new count: %d\n", $1,$2; exit 1;}}' bedToBigBed -extraIndex=name -tab -as=../decipherCnv.as -type=bed9+10 decipherCnv.bed /hive/data/genomes/hg19/chrom.sizes decipherCnv.bb cp decipherCnv.bb ../release/hg19/ # SNVs pipeline -sort $2| grep -v '#' | tawk '$2 == "MT" {$2 = "M";}; {print;}' | uniq > decipherSnvsRawNew.txt +sort $2| grep -v '#' | tawk '$2 == "MT" {$2 = "M";}; {print;}' | sort -u > decipherSnvsRawNew.txt hgsql hg19 -e 'drop table if exists decipherSnvsRawNew' hgLoadSqlTab hg19 decipherSnvsRawNew ../decipherSnvsRaw.sql decipherSnvsRawNew.txt hgsql hg19 -N -e 'select "chr", chr, start-1, end, id from decipherSnvsRawNew ' |\ sed -e 's/chr\t/chr/' |sort > decipherSnvsNew.bed # Load decipher snvs table hgLoadBed hg19 decipherSnvsNew decipherSnvsNew.bed