8cf5d04b02f2bacf4fb496891d9cfc3cd99e8df8 hiram Tue Feb 20 10:33:36 2024 -0800 recover from otto update refs #32902 diff --git src/hg/makeDb/doc/mm39/chrM.ncbiRefSeq.txt src/hg/makeDb/doc/mm39/chrM.ncbiRefSeq.txt index e022ea6..eca9fea 100644 --- src/hg/makeDb/doc/mm39/chrM.ncbiRefSeq.txt +++ src/hg/makeDb/doc/mm39/chrM.ncbiRefSeq.txt @@ -1,108 +1,111 @@ # add the NCBI RefSeq genes for chrM to the NCBI RefSeq track # DONE - Hiram - 2024-02-12 mkdir /hive/data/genomes/mm39/bed/ncbiRefSeq.2023-04-19/chrMcatchUp cd /hive/data/genomes/mm39/bed/ncbiRefSeq.2023-04-19/chrMcatchUp # what is the NCBI name for chrM: grep chrM ../../chromAlias/mm39.chromAlias.txt # chrM AY172335.1 MT NC_005089.1 # extract the gff descriptions from the primary gff file: zgrep NC_005089.1 ../download/GCF_000001635.27_GRCm39_genomic.gff.gz \ | grep -v "^#" > genome.gff.NC_005089.1.tsv # extract the RefSeqLink data from that gff file: ~/kent/src/hg/makeDb/doc/mm39/gffToLink.pl > to.add.ncbiRefSeqLink.tsv # obtain the genePred file from the genomic.gff.gz file: export asmId=GCF_000001635.27_GRCm39 export downloadDir=/hive/data/genomes/mm39/bed/ncbiRefSeq.2023-04-19/download export ncbiGffGz=$downloadDir/${asmId}_genomic.gff.gz zcat $ncbiGffGz \ | sed -re 's/([;\t])SO_type=/\1so_type=/;' \ | gff3ToGenePred -refseqHacks -attrsOut=$asmId.attrs.txt \ -unprocessedRootsOut=$asmId.unprocessedRoots.txt stdin raw.$asmId.gp grep NC_005089.1 raw.$asmId.gp > NC_005089.1.gp sed -e 's/NC_005089.1/chrM/g;' NC_005089.1.gp > chrM.gp +hgLoadGenePred -genePredExt mm39 chrMRefSeq chrM.gp +hgsql -N -e 'select * from chrMRefSeq;' mm39 > to.add.chrM.sql.tsv +hgsql -e 'drop table chrMRefSeq;' mm39 # save the existing files in case they are broken during this process: hgsql -N -e 'select * from ncbiRefSeq;' mm39 > before.mm39.ncbiRefSeq.tsv hgsql -N -e 'select * from ncbiRefSeqCurated;' mm39 > before.mm39.ncbiRefSeqCurated.tsv hgsql -N -e 'select * from ncbiRefSeqLink;' mm39 > before.mm39.ncbiRefSeqLink.tsv # loading the genePred data into two tables: hgsql -N -e 'select count(*) from ncbiRefSeq;' mm39 # 134700 hgsql -e 'LOAD DATA LOCAL INFILE "to.add.chrM.sql.tsv" INTO TABLE ncbiRefSeq;' mm39 hgsql -N -e 'select count(*) from ncbiRefSeq;' mm39 # 134737 hgsql -N -e 'select count(*) from ncbiRefSeqCurated;' mm39 # 55621 hgsql -e 'LOAD DATA LOCAL INFILE "to.add.chrM.sql.tsv" INTO TABLE ncbiRefSeqCurated;' mm39 hgsql -N -e 'select count(*) from ncbiRefSeqCurated;' mm39 # 55658 ## and the RefSeqLink data: hgsql -N -e 'select count(*) from ncbiRefSeqLink;' mm39 # 134699 hgsql -e 'LOAD DATA LOCAL INFILE "to.add.ncbiRefSeqLink.tsv" INTO TABLE ncbiRefSeqLink;' mm39 hgsql -N -e 'select count(*) from ncbiRefSeqLink;' mm39 # 134736 # check the relationship between the tables: hgsql -e 'SELECT e.name,e.chrom,j.id,j.name FROM ncbiRefSeq e, ncbiRefSeqLink j WHERE e.name = j.id AND e.chrom = "chrM";' mm39 +---------+-------+---------+---------+ | TrnF | chrM | TrnF | TrnF | | mt-Rnr1 | chrM | mt-Rnr1 | mt-Rnr1 | | TrnV | chrM | TrnV | TrnV | | mt-Rnr2 | chrM | mt-Rnr2 | mt-Rnr2 | | TrnL1 | chrM | TrnL1 | TrnL1 | | ND1 | chrM | ND1 | ND1 | | TrnI | chrM | TrnI | TrnI | | TrnQ | chrM | TrnQ | TrnQ | | TrnM | chrM | TrnM | TrnM | | ND2 | chrM | ND2 | ND2 | | TrnW | chrM | TrnW | TrnW | | TrnA | chrM | TrnA | TrnA | | TrnN | chrM | TrnN | TrnN | | TrnC | chrM | TrnC | TrnC | | TrnY | chrM | TrnY | TrnY | | COX1 | chrM | COX1 | COX1 | | TrnS1 | chrM | TrnS1 | TrnS1 | | TrnD | chrM | TrnD | TrnD | | COX2 | chrM | COX2 | COX2 | | TrnK | chrM | TrnK | TrnK | | ATP8 | chrM | ATP8 | ATP8 | | ATP6 | chrM | ATP6 | ATP6 | | COX3 | chrM | COX3 | COX3 | | TrnG | chrM | TrnG | TrnG | | ND3 | chrM | ND3 | ND3 | | TrnR | chrM | TrnR | TrnR | | ND4L | chrM | ND4L | ND4L | | ND4 | chrM | ND4 | ND4 | | TrnH | chrM | TrnH | TrnH | | TrnS2 | chrM | TrnS2 | TrnS2 | | TrnL2 | chrM | TrnL2 | TrnL2 | | ND5 | chrM | ND5 | ND5 | | ND6 | chrM | ND6 | ND6 | | TrnE | chrM | TrnE | TrnE | | CYTB | chrM | CYTB | CYTB | | TrnT | chrM | TrnT | TrnT | | TrnP | chrM | TrnP | TrnP | +---------+-------+---------+---------+