7dea7c645f8ce8892d6d4d162a3cf8295309062f chmalee Fri Dec 11 11:22:24 2020 -0800 Bringing repo copies of omim scripts up to date with the working versions, refs #18151 diff --git src/hg/utils/otto/isca/buildISCA.csh src/hg/utils/otto/isca/buildISCA.csh index 7f0b419..c74781a 100755 --- src/hg/utils/otto/isca/buildISCA.csh +++ src/hg/utils/otto/isca/buildISCA.csh @@ -1,91 +1,99 @@ #!/bin/tcsh -e # assumes running in build directory set db=$1 if "$db" == "hg19" then set grc=GRCh37 - set liftUp=cat + set liftUp="liftUp -type=.bed stdout ../../hg19.lift warn stdin" else if "$db" == "hg38" then set grc=GRCh38 - set liftUp="liftUp -type=.bed stdout /cluster/data/hg38/jkStuff/dbVar.lift warn stdin" + set liftUp="liftUp -type=.bed stdout ../../hg38.lift warn stdin" else if "$db" == "hg18" then + # left unchanged, as we no longer build this for hg18. If that changes, we'd need to build a liftUp file + # to translate the chromosome names. set grc=NCBI36 set liftUp=cat +else + echo "Unfamiliar assembly: $db - please edit the script to provide the correct GRC name and liftUp command" + exit 1; endif cd $1 # set today = `date +%Y_%m_%d` # mkdir /hive/data/genomes/hg19/bed/isca/$today # cd /hive/data/genomes/hg19/bed/isca/$today # Get variants submitted on this assembly, and variants remapped from other assemblies. setenv LANG C -# ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/nstd37_ClinGen_Laboratory-Submitted/gvf/ -# ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/nstd101_ClinGen_Kaminsky_et_al_2011/gvf/ -# ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/nstd45_ClinGen_Curated_Dosage_Sensitivity_Map/gvf/ - -wget -N -q "ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/nstd37_ClinGen_Laboratory-Submitted/gvf/nstd37_ClinGen_Laboratory-Submitted.${grc}.*.all.germline.ucsc.gvf.gz" -wget -N -q "ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/nstd45_ClinGen_Curated_Dosage_Sensitivity_Map/gvf/nstd45_ClinGen_Curated_Dosage_Sensitivity_Map.${grc}.*.all.germline.ucsc.gvf.gz" -wget -N -q "ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/nstd101_ClinGen_Kaminsky_et_al_2011/gvf/nstd101_ClinGen_Kaminsky_et_al_2011.${grc}.*.all.germline.ucsc.gvf.gz" - -# Remove patch contig mappings; we don't display them anyway. -rm -f *.p*.gvf.gz - +wget -N -q "ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/gvf/nstd37.${grc}.variant*.gvf.gz" +wget -N -q "ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/gvf/nstd101.${grc}.variant*.gvf.gz" +wget -N -q "ftp://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/gvf/nstd45.${grc}.variant*.gvf.gz" -zcat nstd101*.gvf.gz nstd37_ClinGen_Laboratory-Submitted*.gvf.gz | ../../gvfToBed8Attrs.pl | $liftUp | sort > foo +zcat nstd101*.gvf.gz nstd37*.gvf.gz | ../../gvfToBed8Attrs.pl | $liftUp | sort > foo +# Not sure why this step exists. It appears to just be a chromosome filter, but that's effectively already done ... join ../../chromInfo.$db foo -t ' ' > isca.bed -zcat nstd45_*.gvf.gz | ../../gvfToBed8Attrs.pl | $liftUp | sort > foo +zcat nstd45*.gvf.gz | ../../gvfToBed8Attrs.pl | $liftUp | sort > foo +# Not sure why this step exists (as above). It appears to just be a chromosome filter, but that's effectively already done ... join ../../chromInfo.$db foo -t ' ' > iscaCurated.bed +rm -f foo foreach subtrack (Benign Pathogenic) grep -w $subtrack isca.bed > isca$subtrack.bed + echo -n "Loading isca${subtrack}New and iscaCurated${subtrack}New ... " hgLoadBed -tab -renameSqlTable -sqlTable=$HOME/kent/src/hg/lib/bed8Attrs.sql \ - -allowStartEqualEnd $db isca"$subtrack"New isca$subtrack.bed + -allowStartEqualEnd $db isca"$subtrack"New isca$subtrack.bed >&/dev/null grep -w $subtrack iscaCurated.bed > iscaCurated$subtrack.bed hgLoadBed -tab -renameSqlTable -sqlTable=$HOME/kent/src/hg/lib/bed8Attrs.sql \ - -allowStartEqualEnd $db iscaCurated"$subtrack"New iscaCurated$subtrack.bed + -allowStartEqualEnd $db iscaCurated"$subtrack"New iscaCurated$subtrack.bed >&/dev/null + echo "Success" end # The subcategories of Uncertain need a bit more sophisticated treatment: set subtrack = Uncertain grep -w $subtrack isca.bed \ | grep -vi 'Uncertain Significance: likely' \ > isca$subtrack.bed +echo -n "Loading isca${subtrack}New ... " hgLoadBed -tab -renameSqlTable -sqlTable=$HOME/kent/src/hg/lib/bed8Attrs.sql \ - -allowStartEqualEnd $db isca"$subtrack"New isca$subtrack.bed + -allowStartEqualEnd $db isca"$subtrack"New isca$subtrack.bed >&/dev/null +echo "Success" foreach unc (benign pathogenic) set subtrack = Likely`perl -we 'print ucfirst("'$unc'");'` grep -wi "Likely $unc" isca.bed \ > isca$subtrack.bed + echo -n "Loading isca${subtrack}New ... " hgLoadBed -tab -renameSqlTable -sqlTable=$HOME/kent/src/hg/lib/bed8Attrs.sql \ - -allowStartEqualEnd $db isca"$subtrack"New isca$subtrack.bed + -allowStartEqualEnd $db isca"$subtrack"New isca$subtrack.bed >&/dev/null + echo "Success" end # make bedGraphs hgsql -N -e "SELECT chrom, chromStart, chromEnd FROM iscaPathogenicNew \ WHERE attrVals LIKE '%number_gain%'" $db | sort \ | bedItemOverlapCount $db stdin > iscaPathGain.bedGraph hgsql -N -e "SELECT chrom, chromStart, chromEnd FROM iscaPathogenicNew \ WHERE attrVals LIKE '%number_loss%'" $db | sort \ | bedItemOverlapCount $db stdin > iscaPathLoss.bedGraph hgsql -N -e "SELECT chrom, chromStart, chromEnd FROM iscaBenignNew \ WHERE attrVals LIKE '%number_gain%'" $db | sort \ | bedItemOverlapCount $db stdin > iscaBenignGain.bedGraph hgsql -N -e "SELECT chrom, chromStart, chromEnd FROM iscaBenignNew \ WHERE attrVals LIKE '%number_loss%'" $db | sort \ | bedItemOverlapCount $db stdin > iscaBenignLoss.bedGraph # load tables -hgLoadBed -bedGraph=4 $db iscaPathGainCumNew iscaPathGain.bedGraph -hgLoadBed -bedGraph=4 $db iscaPathLossCumNew iscaPathLoss.bedGraph -hgLoadBed -bedGraph=4 $db iscaBenignGainCumNew iscaBenignGain.bedGraph -hgLoadBed -bedGraph=4 $db iscaBenignLossCumNew iscaBenignLoss.bedGraph +echo -n "Loading bedGraphs ... " +hgLoadBed -bedGraph=4 $db iscaPathGainCumNew iscaPathGain.bedGraph >&/dev/null +hgLoadBed -bedGraph=4 $db iscaPathLossCumNew iscaPathLoss.bedGraph >&/dev/null +hgLoadBed -bedGraph=4 $db iscaBenignGainCumNew iscaBenignGain.bedGraph >&/dev/null +hgLoadBed -bedGraph=4 $db iscaBenignLossCumNew iscaBenignLoss.bedGraph >&/dev/null +echo "Success"