8cf5d04b02f2bacf4fb496891d9cfc3cd99e8df8
hiram
  Tue Feb 20 10:33:36 2024 -0800
recover from otto update refs #32902

diff --git src/hg/makeDb/doc/mm39/chrM.ncbiRefSeq.txt src/hg/makeDb/doc/mm39/chrM.ncbiRefSeq.txt
index e022ea6..eca9fea 100644
--- src/hg/makeDb/doc/mm39/chrM.ncbiRefSeq.txt
+++ src/hg/makeDb/doc/mm39/chrM.ncbiRefSeq.txt
@@ -1,108 +1,111 @@
 # add the NCBI RefSeq genes for chrM to the NCBI RefSeq track
 # DONE - Hiram - 2024-02-12
 
 mkdir  /hive/data/genomes/mm39/bed/ncbiRefSeq.2023-04-19/chrMcatchUp
 cd  /hive/data/genomes/mm39/bed/ncbiRefSeq.2023-04-19/chrMcatchUp
 
 # what is the NCBI name for chrM:
 
 grep chrM ../../chromAlias/mm39.chromAlias.txt
 
 # chrM       AY172335.1      MT      NC_005089.1
 
 # extract the gff descriptions from the primary gff file:
 
 zgrep NC_005089.1 ../download/GCF_000001635.27_GRCm39_genomic.gff.gz \
   | grep -v "^#" > genome.gff.NC_005089.1.tsv
 
 # extract the RefSeqLink data from that gff file:
 
 ~/kent/src/hg/makeDb/doc/mm39/gffToLink.pl > to.add.ncbiRefSeqLink.tsv
 
 # obtain the genePred file from the genomic.gff.gz file:
 
 export asmId=GCF_000001635.27_GRCm39
 export downloadDir=/hive/data/genomes/mm39/bed/ncbiRefSeq.2023-04-19/download
 export ncbiGffGz=$downloadDir/${asmId}_genomic.gff.gz
 
 zcat $ncbiGffGz \
     | sed -re 's/([;\t])SO_type=/\1so_type=/;' \
       | gff3ToGenePred  -refseqHacks -attrsOut=$asmId.attrs.txt \
         -unprocessedRootsOut=$asmId.unprocessedRoots.txt stdin raw.$asmId.gp
 
 grep NC_005089.1 raw.$asmId.gp > NC_005089.1.gp
 sed -e 's/NC_005089.1/chrM/g;' NC_005089.1.gp > chrM.gp
+hgLoadGenePred -genePredExt mm39 chrMRefSeq chrM.gp
+hgsql -N -e 'select * from chrMRefSeq;' mm39 > to.add.chrM.sql.tsv
+hgsql -e 'drop table chrMRefSeq;' mm39
 
 # save the existing files in case they are broken during this process:
 hgsql -N -e 'select * from ncbiRefSeq;' mm39 > before.mm39.ncbiRefSeq.tsv
 hgsql -N -e 'select * from ncbiRefSeqCurated;' mm39 > before.mm39.ncbiRefSeqCurated.tsv
 hgsql -N -e 'select * from ncbiRefSeqLink;' mm39 > before.mm39.ncbiRefSeqLink.tsv
 
 # loading the genePred data into two tables:
 
 hgsql -N -e 'select count(*) from ncbiRefSeq;' mm39
 # 134700
 hgsql -e 'LOAD DATA LOCAL INFILE "to.add.chrM.sql.tsv" INTO TABLE ncbiRefSeq;' mm39
 hgsql -N -e 'select count(*) from ncbiRefSeq;' mm39
 # 134737
 
 hgsql -N -e 'select count(*) from ncbiRefSeqCurated;' mm39
 # 55621
 hgsql -e 'LOAD DATA LOCAL INFILE "to.add.chrM.sql.tsv" INTO TABLE ncbiRefSeqCurated;' mm39
 hgsql -N -e 'select count(*) from ncbiRefSeqCurated;' mm39
 # 55658
 
 ## and the RefSeqLink data:
 
 hgsql -N -e 'select count(*) from ncbiRefSeqLink;' mm39
 # 134699
 hgsql -e 'LOAD DATA LOCAL INFILE "to.add.ncbiRefSeqLink.tsv" INTO TABLE ncbiRefSeqLink;' mm39
 hgsql -N -e 'select count(*) from ncbiRefSeqLink;' mm39
 # 134736
 
 # check the relationship between the tables:
 
 hgsql -e 'SELECT e.name,e.chrom,j.id,j.name FROM
   ncbiRefSeq e,
   ncbiRefSeqLink j
 WHERE e.name = j.id AND e.chrom = "chrM";' mm39
 
 +---------+-------+---------+---------+
 | TrnF    | chrM  | TrnF    | TrnF    |
 | mt-Rnr1 | chrM  | mt-Rnr1 | mt-Rnr1 |
 | TrnV    | chrM  | TrnV    | TrnV    |
 | mt-Rnr2 | chrM  | mt-Rnr2 | mt-Rnr2 |
 | TrnL1   | chrM  | TrnL1   | TrnL1   |
 | ND1     | chrM  | ND1     | ND1     |
 | TrnI    | chrM  | TrnI    | TrnI    |
 | TrnQ    | chrM  | TrnQ    | TrnQ    |
 | TrnM    | chrM  | TrnM    | TrnM    |
 | ND2     | chrM  | ND2     | ND2     |
 | TrnW    | chrM  | TrnW    | TrnW    |
 | TrnA    | chrM  | TrnA    | TrnA    |
 | TrnN    | chrM  | TrnN    | TrnN    |
 | TrnC    | chrM  | TrnC    | TrnC    |
 | TrnY    | chrM  | TrnY    | TrnY    |
 | COX1    | chrM  | COX1    | COX1    |
 | TrnS1   | chrM  | TrnS1   | TrnS1   |
 | TrnD    | chrM  | TrnD    | TrnD    |
 | COX2    | chrM  | COX2    | COX2    |
 | TrnK    | chrM  | TrnK    | TrnK    |
 | ATP8    | chrM  | ATP8    | ATP8    |
 | ATP6    | chrM  | ATP6    | ATP6    |
 | COX3    | chrM  | COX3    | COX3    |
 | TrnG    | chrM  | TrnG    | TrnG    |
 | ND3     | chrM  | ND3     | ND3     |
 | TrnR    | chrM  | TrnR    | TrnR    |
 | ND4L    | chrM  | ND4L    | ND4L    |
 | ND4     | chrM  | ND4     | ND4     |
 | TrnH    | chrM  | TrnH    | TrnH    |
 | TrnS2   | chrM  | TrnS2   | TrnS2   |
 | TrnL2   | chrM  | TrnL2   | TrnL2   |
 | ND5     | chrM  | ND5     | ND5     |
 | ND6     | chrM  | ND6     | ND6     |
 | TrnE    | chrM  | TrnE    | TrnE    |
 | CYTB    | chrM  | CYTB    | CYTB    |
 | TrnT    | chrM  | TrnT    | TrnT    |
 | TrnP    | chrM  | TrnP    | TrnP    |
 +---------+-------+---------+---------+