249b307f8e7edcaa916870d673678044c478e953 hiram Mon Sep 9 09:59:05 2019 -0700 Adding Ensembl alias with v95 release refs #22425 diff --git src/hg/makeDb/doc/bosTau9/initialBuild.txt src/hg/makeDb/doc/bosTau9/initialBuild.txt index 4a05a9f..643ccdf 100644 --- src/hg/makeDb/doc/bosTau9/initialBuild.txt +++ src/hg/makeDb/doc/bosTau9/initialBuild.txt @@ -460,45 +460,59 @@ featureBits -countGaps bosTau9 ucscToRefSeq # 2715853792 bases of 2715853792 (100.000%) in intersection ######################################################################### # add chromAlias table (DONE - 2018-11-08 - Hiram) # after ucscToRefSeq and ucscToINSDC tables have been created mkdir /hive/data/genomes/bosTau9/bed/chromAlias cd /hive/data/genomes/bosTau9/bed/chromAlias hgsql -N -e 'select chrom,name from ucscToRefSeq;' bosTau9 \ | sort -k1,1 > ucsc.refseq.tab hgsql -N -e 'select chrom,name from ucscToINSDC;' bosTau9 \ | sort -k1,1 > ucsc.genbank.tab + ### Adding Ensembl alias with v95 release, after idKeys made: 2019-01-16 + join -t$'\t' ../idKeys/bosTau9.idKeys.txt \ + ../../ens95/ensBosTau9.idKeys.txt | cut -f2- \ + | sort -k1,1 | join -t$'\t' <(sort -k1,1 ../../chrom.sizes) - \ + | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' \ + | sort -k1,1 -k2,2n > ucscToEns.bed + # Ensembl is missing a chrM sequence: + wc -l *.bed + 2210 ucscToEns.bed + 2211 ucscToINSDC.bed + 2211 ucscToRefSeq.bed + cut -f1,4 ucscToEns.bed | sort > ucsc.ensembl.tab + ~/kent/src/hg/utils/automation/chromAlias.pl ucsc.*.tab \ > bosTau9.chromAlias.tab -for t in refseq genbank +for t in refseq genbank ensembl do c0=`cat ucsc.$t.tab | wc -l` c1=`grep $t bosTau9.chromAlias.tab | wc -l` ok="OK" if [ "$c0" -ne "$c1" ]; then ok="ERROR" fi printf "# checking $t: $c0 =? $c1 $ok\n" done # checking refseq: 2211 =? 2211 OK # checking genbank: 2211 =? 2211 OK +# checking ensembl: 2210 =? 2210 OK hgLoadSqlTab bosTau9 chromAlias ~/kent/src/hg/lib/chromAlias.sql \ bosTau9.chromAlias.tab ######################################################################### # fixup search rule for assembly track/gold table (DONE - 2018-11-06 - Hiram) cd ~/kent/src/hg/makeDb/trackDb/cow/bosTau9 # preview prefixes and suffixes: hgsql -N -e "select frag from gold;" bosTau9 \ | sed -e 's/[0-9][0-9]*//;' | sort | uniq -c | sed -e 's/^/#\t/;' # 1 NC_.1 # 2210 NKLS.1 # implies a rule: 'N[CK][LS0-9_]+(\.[0-9]+)?'