b1b2e5b7727c19291acb605ad561dfbd225b0496 hiram Tue Jan 21 10:30:24 2020 -0800 cleaning up some errors for Ensembl v99 build no redmine diff --git src/hg/utils/automation/findEnsFtpNames.sh src/hg/utils/automation/findEnsFtpNames.sh index dac3122..467d53b 100755 --- src/hg/utils/automation/findEnsFtpNames.sh +++ src/hg/utils/automation/findEnsFtpNames.sh @@ -15,61 +15,71 @@ echo "release.<ens version>.fasta.names" echo "use those lists to edit EnsGeneAutomate.pm" exit 255 fi echo "Scanning for GTF file names" echo "user anonymous hiram@soe cd pub/release-${VERSION}/gtf ls -lR bye" > ftp.rsp ftp -n -v -i ftp.ensembl.org < ftp.rsp > release.${VERSION}.gtf.ls-lR # the mus_musculus_ extra sequences are stuck at version 86 -egrep -v "CHECKSUMS|README" release.${VERSION}.gtf.ls-lR | awk ' +egrep -v "CHECKSUMS|README|Cyprinus_carpio_hebao_red|Cyprinus_carpio_german_mirror" release.${VERSION}.gtf.ls-lR | awk ' { if (match($1,"^./[a-z0-9_]*:$")) {gsub(":$","",$1); printf "%s/", $1 } if (NF == 9) { if ((match($1,"^-rw")) && (match($NF,"'${VERSION}'.gtf.gz"))) {printf "%s\n", $NF} } if (NF == 9) { if ((match($1,"^-rw")) && (match($NF,"86.gtf.gz"))) {printf "%s\n", $NF} } } ' | sed -e "s#^./#'x' => '#; s#\$#',#" > release.${VERSION}.gtf.names echo "Scanning for MySQL table files" echo "user anonymous hiram@soe cd pub/release-${VERSION}/mysql ls -lR bye" > ftp.rsp ftp -i -n -v ftp.ensembl.org < ftp.rsp > release.${VERSION}.MySQL.ls-lR egrep "_core_${VERSION}.*:$" release.${VERSION}.MySQL.ls-lR \ | sed -e 's/://;' | sed -e "s#^./#'x' => '#; s#\$#',#" \ > release.${VERSION}.MySQL.names echo "Scanning for protein fasta files:" echo "user anonymous hiram@ucsc cd pub/release-${VERSION}/fasta ls -lR bye" > ftp.rsp ftp -i -n -v ftp.ensembl.org < ftp.rsp > release.${VERSION}.fasta.ls-lR + awk ' -BEGIN{ D="notYet" } +BEGIN{ D="notYet"; d="notyet" } { if (!match($1,"^drwx")) { - if (match($1,"^./[a-z_]*/pep:$")) { + if (match($1,"^./[0-9a-z_]*/pep:$")) { gsub(":$","",$1); D = $1; + d = tolower(D); + sub("./","", d); + sub("/pep","", d); } - if ((9 == NF) && match($1,"^-rw") && match($NF,"pep.all.fa")) { + if ((9 == NF) && match($1,"^-rw") && match($NF,"pep.all.fa.gz")) { + tl = tolower($NF) + if (index(tl, d) > 0) { printf "%s/%s\n", D, $NF } } } +} ' release.${VERSION}.fasta.ls-lR \ | sed -e "s#^./#'x' => '#; s#\$#',#" > release.${VERSION}.fasta.names +# printf "%s/%s\t%s\t%s\n", D, $NF, d, tl +# printf "%s/%s\t%s\t%s\n", D, $NF, d, tl + rm -f ftp.rsp