b1b2e5b7727c19291acb605ad561dfbd225b0496
hiram
  Tue Jan 21 10:30:24 2020 -0800
cleaning up some errors for Ensembl v99 build no redmine

diff --git src/hg/utils/automation/findEnsFtpNames.sh src/hg/utils/automation/findEnsFtpNames.sh
index dac3122..467d53b 100755
--- src/hg/utils/automation/findEnsFtpNames.sh
+++ src/hg/utils/automation/findEnsFtpNames.sh
@@ -15,61 +15,71 @@
     echo "release.<ens version>.fasta.names"
     echo "use those lists to edit EnsGeneAutomate.pm"
     exit 255
 fi
 
 echo "Scanning for GTF file names"
 
 echo "user anonymous hiram@soe
 cd pub/release-${VERSION}/gtf
 ls -lR
 bye" > ftp.rsp
 
 ftp -n -v -i ftp.ensembl.org < ftp.rsp > release.${VERSION}.gtf.ls-lR
 
 # the mus_musculus_ extra sequences are stuck at version 86
-egrep -v "CHECKSUMS|README" release.${VERSION}.gtf.ls-lR | awk '
+egrep -v "CHECKSUMS|README|Cyprinus_carpio_hebao_red|Cyprinus_carpio_german_mirror" release.${VERSION}.gtf.ls-lR | awk '
 {
 if (match($1,"^./[a-z0-9_]*:$")) {gsub(":$","",$1); printf "%s/", $1 }
 if (NF == 9) { if ((match($1,"^-rw")) && (match($NF,"'${VERSION}'.gtf.gz"))) {printf "%s\n", $NF} }
 if (NF == 9) { if ((match($1,"^-rw")) && (match($NF,"86.gtf.gz"))) {printf "%s\n", $NF} }
 }
 ' | sed -e "s#^./#'x' => '#; s#\$#',#" > release.${VERSION}.gtf.names
 
 echo "Scanning for MySQL table files"
 
 echo "user anonymous hiram@soe
 cd pub/release-${VERSION}/mysql
 ls -lR
 bye" > ftp.rsp
 
 ftp -i -n -v ftp.ensembl.org < ftp.rsp > release.${VERSION}.MySQL.ls-lR
 
 egrep "_core_${VERSION}.*:$" release.${VERSION}.MySQL.ls-lR \
   | sed -e 's/://;' | sed -e "s#^./#'x' => '#; s#\$#',#" \
      > release.${VERSION}.MySQL.names
 
 echo "Scanning for protein fasta files:"
 
 echo "user anonymous hiram@ucsc
 cd pub/release-${VERSION}/fasta
 ls -lR
 bye" > ftp.rsp
 
 ftp -i -n -v ftp.ensembl.org < ftp.rsp > release.${VERSION}.fasta.ls-lR
 
+
 awk '
-BEGIN{ D="notYet" }
+BEGIN{ D="notYet"; d="notyet" }
 {
   if (!match($1,"^drwx")) {
-    if (match($1,"^./[a-z_]*/pep:$")) {
+    if (match($1,"^./[0-9a-z_]*/pep:$")) {
         gsub(":$","",$1); D = $1;
+        d = tolower(D);
+        sub("./","", d);
+        sub("/pep","", d);
     }
-    if ((9 == NF) && match($1,"^-rw") && match($NF,"pep.all.fa")) {
+    if ((9 == NF) && match($1,"^-rw") && match($NF,"pep.all.fa.gz")) {
+        tl = tolower($NF)
+        if (index(tl, d) > 0) {
           printf "%s/%s\n", D, $NF
         }
     }
   }
+}
 ' release.${VERSION}.fasta.ls-lR \
 	| sed -e "s#^./#'x' => '#; s#\$#',#" > release.${VERSION}.fasta.names
 
+#         printf "%s/%s\t%s\t%s\n", D, $NF, d, tl
+#         printf "%s/%s\t%s\t%s\n", D, $NF, d, tl
+
 rm -f ftp.rsp