a705056fed4ed3269b4dcbca18194b4f166a36c0
markd
  Fri Oct 15 17:11:33 2021 -0700
import of gencode V39 pre-release

diff --git src/hg/makeDb/outside/gencode/gencodeLoad.mk src/hg/makeDb/outside/gencode/gencodeLoad.mk
index a4580e1..0b2ba79 100644
--- src/hg/makeDb/outside/gencode/gencodeLoad.mk
+++ src/hg/makeDb/outside/gencode/gencodeLoad.mk
@@ -20,114 +20,114 @@
 host=$(shell hostname)
 ppid=$(shell echo $$PPID)
 tmpExt = ${host}.${ppid}.tmp
 SHELL = bash -e
 export SHELLOPTS=pipefail
 
 ##
 # programs, etc
 ##
 mach = $(shell uname -m)
 
 ##
 # Release info and files from Sanger.
 # BEGIN EDIT THESE EACH RELEASE
 ##
-preRelease = no
-#preRelease = yes
+#preRelease = no
+preRelease = yes
 db = hg38
 #db = hg19
 #db = mm39
 #db = mm10
 ifeq (${db},mm10)
     grcRefAssembly = GRCm38
     verBase = M25
     prevVer = M24
     backmapTargetVer = M25
     ver = ${verBase}lift37
     gencodeOrg = Gencode_mouse
     ftpReleaseSubdir = release_${verBase}/GRCm38_mapping
     annGffTypeName = chr_patch_hapl_scaff.annotation
     isBackmap = yes
     asmReptUrl = https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/635/GCF_000001635.26_GRCm38.p6/GCF_000001635.26_GRCm38.p6_assembly_report.txt
 else ifeq (${db},mm39)
     grcRefAssembly = GRCm39
     ver = M27
     prevVer = M26
     gencodeOrg = Gencode_mouse
     ftpReleaseSubdir = release_${ver}
     annGffTypeName = chr_patch_hapl_scaff.annotation
     asmReptUrl = https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/635/GCF_000001635.27_GRCm39/GCF_000001635.27_GRCm39_assembly_report.txt
 else ifeq (${db},hg38)
     grcRefAssembly = GRCh38
-    ver = 38
-    prevVer = 37
+    ver = 39
+    prevVer = 38
     gencodeOrg = Gencode_human
     ftpReleaseSubdir = release_${ver}
     annGffTypeName = chr_patch_hapl_scaff.annotation
     asmReptUrl = https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_assembly_report.txt
 else ifeq (${db},hg19)
     grcRefAssembly = GRCh37
     verBase = 38
     ver = ${verBase}lift37
     prevVer = 37lift37
     backmapTargetVer = 19
     ftpReleaseSubdir = release_${verBase}/GRCh37_mapping
     gencodeOrg = Gencode_human
     annGffTypeName = annotation
     isBackmap = yes
     asmReptUrl = https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_assembly_report.txt
 else
     $(error unimplement genome database: ${db})
 endif
 # END EDIT THESE EACH RELEASE
 
 
 ifeq (${preRelease},yes)
     # pre-release
-    baseUrl = ftp://ftp.ebi.ac.uk/pub/databases/havana/gencode_pre
+    baseUrl = rsync://ftp.ebi.ac.uk/pub/databases/havana/gencode_pre
 else
     # official release
-    baseUrl = ftp://ftp.ebi.ac.uk/pub/databases/gencode
+    baseUrl = rsync://ftp.ebi.ac.uk/pub/databases/gencode
 endif
 
 rel = V${ver}
 releaseUrl = ${baseUrl}/${gencodeOrg}/${ftpReleaseSubdir}
 dataDir = data
 relDir = ${dataDir}/release_${ver}
 annotationGff = ${relDir}/gencode.v${ver}.${annGffTypeName}.gff3.gz
 pseudo2WayGff = ${relDir}/gencode.v${ver}.2wayconspseudos.gff3.gz
 polyAGff = ${relDir}/gencode.v${ver}.polyAs.gff3.gz
 
 ccdsBinDir = ~markd/compbio/ccds/ccds2/output/bin/$(mach)/opt
 gencodeMakeTracks = ${ccdsBinDir}/gencodeMakeTracks
 gencodeMakeAttrs = ${ccdsBinDir}/gencodeMakeAttrs
 gencodeExonSupportToTable = ${ccdsBinDir}/gencodeExonSupportToTable
 gencodeGxfToGenePred = ${ccdsBinDir}/gencodeGxfToGenePred
 gencodePolyaGxfToGenePred = ${ccdsBinDir}/gencodePolyaGxfToGenePred
 gencodeGxfToAttrs = ${ccdsBinDir}/gencodeGxfToAttrs
 buildGencodeToUcscLift = ${HOME}/kent/src/hg/makeDb/outside/gencode/bin/buildGencodeToUcscLift
 gencodeBackMapMetadataIds = ${ccdsBinDir}/gencodeBackMapMetadataIds
 encodeAutoSqlDir = ${HOME}/kent/src/hg/lib/encode
 
 ##
 # intermediate data not loaded into tracks
 ##
 gencodeGp = ${dataDir}/gencode.gp
 gencodeTsv = ${dataDir}/gencode.tsv
 gencodeToUcscChain = ${dataDir}/gencodeToUcsc.chain
-asmRept = ${dataDir}/$(notdir asmReptUrl)
+asmRept = ${dataDir}/$(notdir ${asmReptUrl})
 
 # flag indicating fetch was done
 fetchDone = ${relDir}/done
 
 ##
 # track and table data
 ##
 tableDir = tables
 tablePre = wgEncodeGencode
 
 # subset track and pattern for generate genePred and track names for each subset
 # obtained from gencode.v*.annotation.level_1_2.gtf, gencode.v*.annotation.level_3.gtf
 tableBasic = ${tablePre}Basic${rel}
 tableBasicGp = ${tableDir}/${tableBasic}.gp
 
@@ -227,33 +227,31 @@
 
 # directory for flags indicating tables were loaded
 loadedDir = loaded
 
 # directory for output and flags for sanity checks
 checkDir = check
 
 all: fetch mkTables loadTables checkSanity cmpRelease listTables
 
 
 ##
 # fetch release, this doesn't get subdirectories so as not to copy the lift releases
 ##
 fetch: ${fetchDone}
 ${fetchDone}:
-	@mkdir -p $(dir $@) ${dataDir}
-	wget -nv --cut-dirs=4 --directory-prefix=${relDir} -np "${releaseUrl}/*"
-	chmod a-w ${relDir}/*
+	rsync -a --include='gencode.*' --exclude='*' '${releaseUrl}/' ${relDir}
 	touch $@
 
 ##
 # dependencies for files from release
 ##
 ${annotationGff}: ${fetchDone}
 ${pseudo2WayGff}: ${fetchDone}
 ${polyAGff}: ${fetchDone}
 ${tableGeneSourceMeta}: ${fetchDone}
 ${tableTranscriptSourceMeta}: ${fetchDone}
 ${tableTranscriptSupportMeta}: ${fetchDone}
 ${tableExonSupportMeta}: ${fetchDone}
 ${tableGeneSymbolMeta}: ${fetchDone}
 ${tablePdbMeta}: ${fetchDone}
 ${tablePubMedMeta}: ${fetchDone}
@@ -292,31 +290,31 @@
 	${gencodePolyaGxfToGenePred} $< ${gencodeToUcscChain} $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 
 ${tableUniProtTab}: ${tableSwissProtMeta} ${tableTrEMBLMeta} ${gencodeTsv}
 	@mkdir -p $(dir $@)
 	((${metaFilterCmdGz} ${tableSwissProtMeta} | tawk '{print $$0,"SwissProt"}') && (${metaFilterCmdGz}  ${tableTrEMBLMeta} | tawk '{print $$0,"TrEMBL"}')) | sort -k 1,1 > $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 
 ${gencodeToUcscChain}: ${asmRept}
 	@mkdir -p $(dir $@)
 	${buildGencodeToUcscLift} ${db} ${asmRept} $@.${tmpExt}
 	mv -f $@.${tmpExt} $@
 
 ${asmRept}:
 	@mkdir -p $(dir $@)
-	wget -nv -O $@.${tmpExt} ${asmReptUrl}
+	wget -nv -o /dev/stderr -O $@.${tmpExt} ${asmReptUrl}
 	mv -f $@.${tmpExt} $@
 
 # other tab files, just copy to name following convention to make load rules
 # work
 ifeq (${isBackmap}, yes)
    metaFilterCmd = ${gencodeBackMapMetadataIds} ${gencodeTsv} ${targetGencodeTsv}
    metaFilterCmdGz = ${metaFilterCmd}
    metaFilterDepend = ${gencodeTsv} ${targetGencodeTsv}
 else
    metaFilterCmd = cat
    metaFilterCmdGz = zcat
    metaFilterDepend = ${gencodeTsv}
 endif
 define copyMetadataTabGz
 mkdir -p $(dir $@)