a24f5436f9ba6dde0c17e14939f40cc88b1292ec lrnassar Fri Sep 1 13:14:53 2023 -0700 Fixing the cosmic track which had items with same start+end, also updated the .as file. Refs #29625 diff --git src/hg/makeDb/doc/hg38/hg38.txt src/hg/makeDb/doc/hg38/hg38.txt index f45ee29..e0ef920 100644 --- src/hg/makeDb/doc/hg38/hg38.txt +++ src/hg/makeDb/doc/hg38/hg38.txt @@ -7026,34 +7026,44 @@ #Reorder to columns to conform to bed 6+3 zcat ucsc_export.bed.gz | awk -F'\t' -v OFS="\t" '{ print $1, $2, $3, $7, 0, $6, $4, $5, $8 }' | sort -k1,1 -k2,2n > cosmic.bed #Tiny bit of python to identify the broken lines in the file where chromStart > chromEnd #for line in myFile: # newLine = line.split("\t") # if int(newLine[1]) > int(newLine[2]): # print(line) # n+=1 #print(n) #remove those broken records from the file cat cosmic.bed | grep -vf badRecords.bed > cosmic.fixed.bed -bedToBigBed -type=bed6+3 -as=/hive/data/outside/cosmic/hg38/v98/cosmic.as /hive/data/outside/cosmic/hg38/v98/cosmic.fixed.bed /hive/data/genomes/hg38/chrom.sizes /hive/data/outside/cosmic/hg38/v98/cosmic.bb -tab + +#subtract to conform to bed format for all the items that have same star and endPos + +cat cosmic.fixed.bed | awk 'BEGIN {OFS="\t"} { +if ($2 == $3) + print $1,$2-1,$3,$4,$5,$6,$7,$8,$9; +else + print $0; +}' > cosmic.fixedPos.bed + +bedToBigBed -type=bed6+3 -as=/hive/data/outside/cosmic/hg38/v98/cosmic.as /hive/data/outside/cosmic/hg38/v98/cosmic.fixedPos.bed /hive/data/genomes/hg38/chrom.sizes /hive/data/outside/cosmic/hg38/v98/cosmic.bb -tab #make symlink -ln -s /gbdb/hg38/cosmic/cosmic.bb /hive/data/outside/cosmic/hg38/v98/cosmic.bb +ln -s /hive/data/outside/cosmic/hg38/v98/cosmic.bb /gbdb/hg38/cosmic/cosmic.bb ############################################################################## # LIFTOVER TO GCA_018873775.2_hg01243.v3.0 (DONE - 2023-08-13 - Hiram) ssh hgwdev # going to need an ooc for hg38.p14.2bit cd /hive/data/genomes/hg38 time blat hg38.p14.2bit /dev/null /dev/null -tileSize=11 \ -makeOoc=hg38.p14.ooc -repMatch=1024 # Wrote 36808 overused 11-mers to hg38.p14.ooc # real 0m50.753s # and ooc for this GenArk hub cd /hive/data/genomes/asmHubs/genbankBuild/GCA/018/873/775/GCA_018873775.2_hg01243.v3.0 time blat GCA_018873775.2_hg01243.v3.0.2bit /dev/null /dev/null -tileSize=11 \ -makeOoc=GCA_018873775.2_hg01243.v3.0.ooc -repMatch=1024