fbd7a94213923088270a2bc3da63d0b8139451f0 angie Thu Oct 10 14:33:52 2019 -0700 LRG now distinguishes between fix_patch and novel_patch in their XML, so we can include mappings to fix & alt patch sequences. Overdue for an update anyway. refs #24285 diff --git src/hg/makeDb/doc/hg38/hg38.txt src/hg/makeDb/doc/hg38/hg38.txt index 98eefd9..2890bc5 100644 --- src/hg/makeDb/doc/hg38/hg38.txt +++ src/hg/makeDb/doc/hg38/hg38.txt @@ -3633,75 +3633,72 @@ time (doBlastzChainNet.pl -verbose=2 \ /hive/data/genomes/hg38/bed/lastzMonDom5.2014-05-27/DEF \ -swap -chainMinScore=5000 -chainLinearGap=loose \ -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku \ -syntenicNet) > swap.log 2>&1 # real 102m41.443s cat fb.monDom5.chainHg38Link.txt # 420069915 bases of 3501660299 (11.996%) in intersection time (/cluster/bin/scripts/doRecipBest.pl -buildDir=`pwd` \ -dbHost=hgwdev -workhorse=hgwdev monDom5 hg38) > rbest.log 2>&1 # real 90m56.189s _EOF_ ############################################################################# -# LOCUS REFERENCE GENOMIC (LRG) REGIONS AND TRANSCRIPTS (DONE 5/30/18 angie) -# Redmine #13359 -- otto-mate To Do #17877 -# previously done 7/7/14, 9/9/16 +# LOCUS REFERENCE GENOMIC (LRG) REGIONS AND TRANSCRIPTS (DONE 10/9/19 angie) +# Redmine #13359, #24285 -- otto-mate To Do #17877 +# previously done 7/7/14, 9/9/16, 5/30/18 set today = `date +%Y_%m_%d` mkdir -p /hive/data/genomes/hg38/bed/lrg/$today cd /hive/data/genomes/hg38/bed/lrg/$today wget ftp://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_public_xml_files.zip unzip LRG_public_xml_files.zip # Run script to convert LRG*.xml files to BED+ for regions and genePredExt+fa for transcripts: ~/kent/src/hg/utils/automation/parseLrgXml.pl GRCh38 genePredCheck lrgTranscriptsUnmapped.gp #Error: lrgTranscriptsUnmapped.gp:765: LRG_7t1 no exonFrame on CDS exon 46 -#checked: 917 failed: 1 +#checked: 1029 failed: 1 # If there are complaints e.g. about exonFrame, look for inconsistencies in the # affected transcript's coding_region/coordinates vs. exon/intron info in xml. # Contact Variation team leader Fiona Cunningham @EBI to resolve in the background # (missing exonFrame info doesn't affect our track representation because we end up using # psl). We agreed to disagree about exon 46 of LRG_7t1 because that last coding exon # portion is only the stop codon. - # Filter out alts and patches not (yet) included in hg38: - mv lrg.bed lrg.allSeqs.bed - cut -f 1 ../../../chrom.sizes | grep -Fwf - lrg.allSeqs.bed > lrg.bed + # No longer necessary to filter out alt and fix patches since they have been added to hg38. # Load LRG regions: bedToBigBed lrg.bed /hive/data/genomes/hg38/chrom.sizes lrg.bb \ -tab -type=bed12+ -as=$HOME/kent/src/hg/lib/lrg.as -extraIndex=name - rm -f /gbdb/hg38/bbi/lrg.bb - ln -s `pwd`/lrg.bb /gbdb/hg38/bbi/lrg.bb + ln -sf `pwd`/lrg.bb /gbdb/hg38/bbi/lrg.bb hgBbiDbLink hg38 lrg /gbdb/hg38/bbi/lrg.bb # Map LRG fixed_annotation transcripts from LRG coords to hg38 coords (HT MarkD): lrgToPsl lrg.bed /hive/data/genomes/hg38/chrom.sizes lrg.psl pslCheck lrg.psl -#checked: 802 failed: 0 errors: 0 +#checked: 919 failed: 0 errors: 0 awk '{print $10 "\t" $11;}' lrg.psl > lrg.sizes genePredToFakePsl -chromSize=lrg.sizes placeholder \ lrgTranscriptsUnmapped.gp lrgTranscriptsFakePsl.psl lrgTranscripts.cds pslMap lrgTranscriptsFakePsl.psl lrg.psl lrgTranscriptsHg38.psl mrnaToGene -genePredExt -cdsFile=lrgTranscripts.cds -keepInvalid \ lrgTranscriptsHg38.psl lrgTranscriptsHg38NoName2.gp #Warning: no CDS for LRG_163t1 #Warning: no CDS for LRG_347t1 - # It's OK if mrnaToGene complains about "no CDS" for a non-coding tx. + # It's OK if mrnaToGene complains about "no CDS" for a non-coding tx (RefSeq accession NR_*). grep -l NR_ LRG_163.xml LRG_347.xml #LRG_163.xml #LRG_347.xml # Load PSL, CDS and sequences. hgLoadPsl hg38 -table=lrgTranscriptAli lrgTranscriptsHg38.psl hgLoadSqlTab hg38 lrgCds ~/kent/src/hg/lib/cdsSpec.sql lrgTranscripts.cds hgPepPred hg38 tab lrgCdna lrgCdna.tab hgPepPred hg38 tab lrgPep lrgPep.tab ############################################################################# ## 7-Way Multiz (DONE - 2014-06-02 - Hiram) ssh hgwdev mkdir /hive/data/genomes/hg38/bed/multiz7way