a24f5436f9ba6dde0c17e14939f40cc88b1292ec
lrnassar
  Fri Sep 1 13:14:53 2023 -0700
Fixing the cosmic track which had items with same start+end, also updated the .as file. Refs #29625

diff --git src/hg/makeDb/doc/hg38/hg38.txt src/hg/makeDb/doc/hg38/hg38.txt
index f45ee29..e0ef920 100644
--- src/hg/makeDb/doc/hg38/hg38.txt
+++ src/hg/makeDb/doc/hg38/hg38.txt
@@ -7026,34 +7026,44 @@
 
 #Reorder to columns to conform to bed 6+3
 zcat ucsc_export.bed.gz | awk -F'\t' -v OFS="\t" '{ print $1, $2, $3, $7, 0, $6, $4, $5, $8 }' | sort -k1,1 -k2,2n > cosmic.bed
 
 #Tiny bit of python to identify the broken lines in the file where chromStart > chromEnd
 
 #for line in myFile:
 #    newLine = line.split("\t")
 #    if int(newLine[1]) > int(newLine[2]):
 #        print(line)
 #        n+=1
 #print(n)
 
 #remove those broken records from the file
 cat cosmic.bed | grep -vf badRecords.bed > cosmic.fixed.bed
-bedToBigBed -type=bed6+3 -as=/hive/data/outside/cosmic/hg38/v98/cosmic.as /hive/data/outside/cosmic/hg38/v98/cosmic.fixed.bed /hive/data/genomes/hg38/chrom.sizes /hive/data/outside/cosmic/hg38/v98/cosmic.bb -tab
+
+#subtract to conform to bed format for all the items that have same star and endPos
+
+cat cosmic.fixed.bed | awk 'BEGIN {OFS="\t"} {
+if ($2 == $3)
+        print $1,$2-1,$3,$4,$5,$6,$7,$8,$9;
+else
+        print $0;
+}' > cosmic.fixedPos.bed
+
+bedToBigBed -type=bed6+3 -as=/hive/data/outside/cosmic/hg38/v98/cosmic.as /hive/data/outside/cosmic/hg38/v98/cosmic.fixedPos.bed /hive/data/genomes/hg38/chrom.sizes /hive/data/outside/cosmic/hg38/v98/cosmic.bb -tab
 
 #make symlink
-ln -s /gbdb/hg38/cosmic/cosmic.bb /hive/data/outside/cosmic/hg38/v98/cosmic.bb
+ln -s /hive/data/outside/cosmic/hg38/v98/cosmic.bb /gbdb/hg38/cosmic/cosmic.bb
 
 ##############################################################################
 # LIFTOVER TO GCA_018873775.2_hg01243.v3.0 (DONE - 2023-08-13 - Hiram)
     ssh hgwdev
     # going to need an ooc for hg38.p14.2bit
     cd /hive/data/genomes/hg38
     time blat hg38.p14.2bit /dev/null /dev/null -tileSize=11 \
       -makeOoc=hg38.p14.ooc -repMatch=1024
     # Wrote 36808 overused 11-mers to hg38.p14.ooc
     # real    0m50.753s
 
     # and ooc for this GenArk hub
     cd /hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0
   time blat GCA_018873775.2_hg01243.v3.0.2bit /dev/null /dev/null -tileSize=11 \
       -makeOoc=GCA_018873775.2_hg01243.v3.0.ooc -repMatch=1024