src/hg/utils/automation/findEnsFtpNames.sh 1.1

1.1 2009/07/14 18:40:25 hiram
Initial source used for the past couple of Ensembl updates
Index: src/hg/utils/automation/findEnsFtpNames.sh
===================================================================
RCS file: src/hg/utils/automation/findEnsFtpNames.sh
diff -N src/hg/utils/automation/findEnsFtpNames.sh
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/hg/utils/automation/findEnsFtpNames.sh	14 Jul 2009 18:40:25 -0000	1.1
@@ -0,0 +1,79 @@
+#!/bin/sh
+
+# $Id$
+
+VERSION=$1
+if [ "x${VERSION}y" = "xy" ]; then
+    echo "usage: findEnsFtpNames.sh <ens version>"
+    echo "where <ens version> is something like: 55"
+    echo "this script will scan the ftp.ensembl.org site and extract"
+    echo "the names from the files there that we need to create"
+    echo "correspondence to UCSC database names"
+    echo "when complete, look for result files:"
+    echo "release.<ens version>.gtf.names"
+    echo "release.<ens version>.MySQL.names"
+    echo "release.<ens version>.fasta.names"
+    echo "use those lists to edit EnsGeneAutomate.pm"
+    exit 255
+fi
+
+
+echo "Scanning for GTF file names"
+
+echo "user anonymous hiram@soe
+cd pub/release-${VERSION}/gtf
+ls -lR
+bye" > ftp.rsp
+
+ftp -n -v -i ftp.ensembl.org < ftp.rsp > release.${VERSION}.gtf.ls-lR
+
+awk '
+{
+if (match($1,"^./")) {gsub("^./","",$1); gsub(":$","",$1); printf "%s/", $1 }
+if (NF == 9) { if (match($1,"^-rw")) {printf "%s\n", $NF} }
+}
+' release.${VERSION}.gtf.ls-lR > release.${VERSION}.gtf.names
+
+echo "Scanning for MySQL table files"
+
+echo "user anonymous hiram@soe
+cd pub/release-${VERSION}/mysql
+ls -lR
+bye" > ftp.rsp
+
+ftp -i -n -v ftp.ensembl.org < ftp.rsp > release.${VERSION}.MySQL.ls-lR
+
+awk '
+BEGIN{ D="notYet" }
+{
+  if (!match($1,"^d")) {
+    if (match($1,"^./")) {
+        gsub("^./","",$1); gsub(":$","",$1); D = $1;
+        if (match(D,"_core_")) { printf "x       x =>  %s ,\n", D }
+    }
+  }
+}
+' release.${VERSION}.MySQL.ls-lR > release.${VERSION}.MySQL.names
+
+echo "Scanning for protein fasta files:"
+
+echo "user anonymous hiram@ucsc
+cd pub/release-${VERSION}/fasta
+ls -lR
+bye" > ftp.rsp
+
+ftp -i -n -v ftp.ensembl.org < ftp.rsp > release.${VERSION}.fasta.ls-lR
+
+awk '
+BEGIN{ D="notYet" }
+{
+  if (!match($1,"^d")) {
+    if (match($1,"^./")) {
+        gsub("^./","",$1); gsub(":$","",$1); D = $1;
+    }
+    if ((9 == NF) && match($1,"^-rw") && match($NF,"pep.all.fa")) {
+        printf "=> %s/%s ,\n", D, $NF
+    }
+  }
+}
+' release.${VERSION}.fasta.ls-lR > release.${VERSION}.fasta.names