705126cb1d2946056b9da0397acaf2bb6703c6e4 galt Tue Oct 12 22:43:02 2021 -0700 finishing off GRC incident step now that QA has pushed hg38.2bit to RR. refs #25091 diff --git src/hg/makeDb/doc/hg38/patchUpdate.13.txt src/hg/makeDb/doc/hg38/patchUpdate.13.txt index c238378..b3379c0 100644 --- src/hg/makeDb/doc/hg38/patchUpdate.13.txt +++ src/hg/makeDb/doc/hg38/patchUpdate.13.txt @@ -492,35 +492,36 @@ ok="OK" if [ "$c0" -ne "$c1" ]; then ok="ERROR" fi printf "# checking $t: $c0 =? $c1 $ok\n" done # checking refseq: 639 =? 639 OK # checking genbank: 640 =? 640 OK # checking assembly: 640 =? 640 OK # Note how there's one fewer refseq, consistent with featureBits above. hgLoadSqlTab hg38 chromAlias $HOME/kent/src/hg/lib/chromAlias.sql ${db}.chromAlias.tab ############################################################################## -# UCSC to Ensembl (TODO 2021-09-18 galt) +# UCSC to Ensembl (DONE 2021-09-18 galt) # Ask Hiram to update ensembleToUcsc and ensemblLift tables. # FYI ensemblLift offset shows how many Ns were inserted by Ensembl to give the right coordinate to alts and fixes. # - +I asked Hiram to update them and he did. +However, some questions remain about how best to handle the 57 reversed sequences found on Ensembl chroms. ############################################################################ # altLocations and patchLocations (DONE 2021-08-27 galt) # indicate corresponding locations between haplotypes and reference mkdir /hive/data/genomes/hg38/bed/altLocations.p13 cd /hive/data/genomes/hg38/bed/altLocations.p13 ~/kent/src/hg/utils/automation/altScaffoldPlacementToBed.pl \ /hive/data/genomes/grcH38P13/genbank/GCA_000001405.28_GRCh38.p13_assembly_structure/{ALT_*,PATCHES}/alt_scaffolds/alt_scaffold_placement.txt \ | sort -k1,1 -k2n,2n \ > altAndFixLocations.bed wc -l altAndFixLocations.bed #892 altAndFixLocations.bed grep _alt altAndFixLocations.bed > altLocations.bed grep _fix altAndFixLocations.bed > fixLocations.bed hgLoadBed hg38 altLocations{,.bed} @@ -870,24 +871,24 @@ ############################################################################## # OMIM tracks (TODO - 2020-? angie) # the otto process builds the omim* tables; edit otto/omim/buildOmimTracks.sh to make sure # the most recent dbSNP version is listed for the db. After the snpNNN table is updated to # include patch sequences, the next otto update will include patches. # omimGene2 is still using refGene, but I think it would be better if it used ncbiRefSeqCurated # if it exists. # TODO: OMIM Genes needs liftOver to new alts and fixes (or redo from ncbiRefSeq). # OMIM Phenotypes needs liftOvers to all alts and fixes. Sometimes it spans a region larger # than an alt/fix, so maybe lower the percentage that has to map? ############################################################################## -# GRC Incident Database (TODO 2021-? galt) +# GRC Incident Database (DONE 2021-10-12 galt) # Wait until the updated hg38 files have been pushed to RR because GRC Incident update is # automated. Then update the file used to map GRC's RefSeq accessions to our names: hgsql hg38 -NBe 'select alias,chrom from chromAlias where source = "refseq" order by alias;' \ > /hive/data/outside/grc/incidentDb/GRCh38/refSeq.chromNames.tab #############################################################################