fbd7a94213923088270a2bc3da63d0b8139451f0
angie
  Thu Oct 10 14:33:52 2019 -0700
LRG now distinguishes between fix_patch and novel_patch in their XML, so we can include mappings to fix & alt patch sequences.  Overdue for an update anyway.  refs #24285

diff --git src/hg/makeDb/doc/hg38/hg38.txt src/hg/makeDb/doc/hg38/hg38.txt
index 98eefd9..2890bc5 100644
--- src/hg/makeDb/doc/hg38/hg38.txt
+++ src/hg/makeDb/doc/hg38/hg38.txt
@@ -3633,75 +3633,72 @@
     time (doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/hg38/bed/lastzMonDom5.2014-05-27/DEF \
         -swap -chainMinScore=5000 -chainLinearGap=loose \
         -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \
         -syntenicNet) > swap.log 2>&1
     # real    102m41.443s
 
     cat fb.monDom5.chainHg38Link.txt
     # 420069915 bases of 3501660299 (11.996%) in intersection
     time (/cluster/bin/scripts/doRecipBest.pl -buildDir=`pwd` \
         -dbHost=hgwdev -workhorse=hgwdev monDom5 hg38) > rbest.log 2>&1
     #  real    90m56.189s
 
 _EOF_
 #############################################################################
-# LOCUS REFERENCE GENOMIC (LRG) REGIONS AND TRANSCRIPTS (DONE 5/30/18 angie)
-# Redmine #13359 -- otto-mate To Do #17877
-# previously done 7/7/14, 9/9/16
+# LOCUS REFERENCE GENOMIC (LRG) REGIONS AND TRANSCRIPTS (DONE 10/9/19 angie)
+# Redmine #13359, #24285 -- otto-mate To Do #17877
+# previously done 7/7/14, 9/9/16, 5/30/18
     set today = `date +%Y_%m_%d`
     mkdir -p /hive/data/genomes/hg38/bed/lrg/$today
     cd /hive/data/genomes/hg38/bed/lrg/$today
     wget ftp://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_public_xml_files.zip
     unzip LRG_public_xml_files.zip
 
     # Run script to convert LRG*.xml files to BED+ for regions and genePredExt+fa for transcripts:
     ~/kent/src/hg/utils/automation/parseLrgXml.pl GRCh38
     genePredCheck lrgTranscriptsUnmapped.gp
 #Error: lrgTranscriptsUnmapped.gp:765: LRG_7t1 no exonFrame on CDS exon 46
-#checked: 917 failed: 1
+#checked: 1029 failed: 1
     # If there are complaints e.g. about exonFrame, look for inconsistencies in the
     # affected transcript's coding_region/coordinates vs. exon/intron info in xml.
     # Contact Variation team leader Fiona Cunningham @EBI to resolve in the background
     # (missing exonFrame info doesn't affect our track representation because we end up using
     # psl).  We agreed to disagree about exon 46 of LRG_7t1 because that last coding exon
     # portion is only the stop codon.
 
-    # Filter out alts and patches not (yet) included in hg38:
-    mv lrg.bed lrg.allSeqs.bed
-    cut -f 1 ../../../chrom.sizes | grep -Fwf - lrg.allSeqs.bed > lrg.bed
+    # No longer necessary to filter out alt and fix patches since they have been added to hg38.
 
     # Load LRG regions:
     bedToBigBed lrg.bed /hive/data/genomes/hg38/chrom.sizes lrg.bb \
       -tab -type=bed12+ -as=$HOME/kent/src/hg/lib/lrg.as -extraIndex=name
-    rm -f /gbdb/hg38/bbi/lrg.bb
-    ln -s `pwd`/lrg.bb /gbdb/hg38/bbi/lrg.bb
+    ln -sf `pwd`/lrg.bb /gbdb/hg38/bbi/lrg.bb
     hgBbiDbLink hg38 lrg /gbdb/hg38/bbi/lrg.bb
 
     # Map LRG fixed_annotation transcripts from LRG coords to hg38 coords (HT MarkD):
     lrgToPsl lrg.bed /hive/data/genomes/hg38/chrom.sizes lrg.psl
     pslCheck lrg.psl
-#checked: 802 failed: 0 errors: 0
+#checked: 919 failed: 0 errors: 0
     awk '{print $10 "\t" $11;}' lrg.psl > lrg.sizes
     genePredToFakePsl -chromSize=lrg.sizes placeholder \
       lrgTranscriptsUnmapped.gp lrgTranscriptsFakePsl.psl lrgTranscripts.cds
     pslMap lrgTranscriptsFakePsl.psl lrg.psl lrgTranscriptsHg38.psl
     mrnaToGene -genePredExt -cdsFile=lrgTranscripts.cds -keepInvalid \
       lrgTranscriptsHg38.psl lrgTranscriptsHg38NoName2.gp
 #Warning: no CDS for LRG_163t1
 #Warning: no CDS for LRG_347t1
-    # It's OK if mrnaToGene complains about "no CDS" for a non-coding tx.
+    # It's OK if mrnaToGene complains about "no CDS" for a non-coding tx (RefSeq accession NR_*).
     grep -l NR_ LRG_163.xml LRG_347.xml
 #LRG_163.xml
 #LRG_347.xml
 
     # Load PSL, CDS and sequences.
     hgLoadPsl hg38 -table=lrgTranscriptAli lrgTranscriptsHg38.psl
     hgLoadSqlTab hg38 lrgCds ~/kent/src/hg/lib/cdsSpec.sql lrgTranscripts.cds
     hgPepPred hg38 tab lrgCdna lrgCdna.tab
     hgPepPred hg38 tab lrgPep lrgPep.tab
 
 
 #############################################################################
 ## 7-Way Multiz (DONE - 2014-06-02 - Hiram)
     ssh hgwdev
     mkdir /hive/data/genomes/hg38/bed/multiz7way