27d9d88ca49e60586c2d0b211971c8a360d862ac hiram Thu Sep 8 23:00:48 2022 -0700 sequenceName alias should properly be called assembly no redmine diff --git src/hg/makeDb/doc/panPan3/initialBuild.txt src/hg/makeDb/doc/panPan3/initialBuild.txt index 52840d4..70f90bd 100644 --- src/hg/makeDb/doc/panPan3/initialBuild.txt +++ src/hg/makeDb/doc/panPan3/initialBuild.txt @@ -439,63 +439,63 @@ ######################################################################### # add chromAlias table (DONE - 2020-06-12 - Hiram) mkdir /hive/data/genomes/panPan3/bed/chromAlias cd /hive/data/genomes/panPan3/bed/chromAlias hgsql -N -e 'select chrom,name from ucscToRefSeq;' panPan3 \ | sort -k1,1 > ucsc.refseq.tab hgsql -N -e 'select chrom,name from ucscToINSDC;' panPan3 \ | sort -k1,1 > ucsc.genbank.tab grep -v "^#" \ ../../refseq/G*0_assembly_report.txt \ | awk -F$'\t' '{printf "%s\t%s\n", $7,$1}' \ - | sort > refseq.sequenceName.txt + | sort > refseq.assembly.txt # do *not* need the ones that have identical names to UCSC names - join -2 2 -t$'\t' refseq.sequenceName.txt <(sort -k2,2 ucsc.refseq.tab) \ + join -2 2 -t$'\t' refseq.assembly.txt <(sort -k2,2 ucsc.refseq.tab) \ | awk -F$'\t' '{printf "%s\t%s\n", $3, $2}' \ - | awk -F$'\t' '$1 != $2' | sort > ucsc.sequenceName.tab + | awk -F$'\t' '$1 != $2' | sort > ucsc.assembly.tab wc -l *.tab | sed -e 's/^/# /;' # 4293 ucsc.genbank.tab # 4293 ucsc.refseq.tab -# 4271 ucsc.sequenceName.tab +# 4271 ucsc.assembly.tab ~/kent/src/hg/utils/automation/chromAlias.pl ucsc.*.tab \ > panPan3.chromAlias.tab -for t in refseq genbank sequenceName +for t in refseq genbank assembly do c0=`cat ucsc.$t.tab | wc -l` c1=`grep $t panPan3.chromAlias.tab | wc -l` ok="OK" if [ "$c0" -ne "$c1" ]; then ok="ERROR" fi printf "# checking $t: $c0 =? $c1 $ok\n" done # checking refseq: 4293 =? 4293 OK # checking genbank: 4293 =? 4293 OK -# # checking sequenceName: 4271 =? 4271 OK +# # checking assembly: 4271 =? 4271 OK # verify chrM is here properly: grep chrM panPan3.chromAlias.tab | sed -e 's/^/# /;' # D38116.1 chrM genbank -# MT chrM sequenceName +# MT chrM assembly # NC_001644.1 chrM refseq hgLoadSqlTab panPan3 chromAlias ~/kent/src/hg/lib/chromAlias.sql \ panPan3.chromAlias.tab ######################################################################### # fixup search rule for assembly track/gold table (DONE - 2020-06-12 - Hiram) cd ~/kent/src/hg/makeDb/trackDb/bonobo/panPan3 # preview prefixes and suffixes: hgsql -N -e "select frag from gold;" panPan3 \ | sed -e 's/[0-9][0-9]*//;' | sort | uniq -c | sed -e 's/^/# /;' # 1 NC_.1 # 4975 SSBP.1 # implies a rule: '[NS][CS][B0-9_][P0-9][0-9]+(\.[0-9]+)?'