c43865d1c59800b4e4a72c9a334e51b5e103e385
hiram
  Thu Jan 23 15:11:51 2020 -0800
correctly ignore failure to find MANE or select genes refs #24748

diff --git src/hg/utils/automation/doNcbiRefSeq.pl src/hg/utils/automation/doNcbiRefSeq.pl
index 67962c7..c23ed70 100755
--- src/hg/utils/automation/doNcbiRefSeq.pl
+++ src/hg/utils/automation/doNcbiRefSeq.pl
@@ -326,57 +326,61 @@
 export annotationRelease=`zcat \$ncbiGffGz | head -100 | grep ^#.annotation-source | sed -e 's/.*annotation-source //'`
 if [ "\$annotationRelease" == "" ]; then
   export annotationRelease=\$asmId
 fi
 export versionDate=`ls -L --full-time \$ncbiGffGz | awk '{print \$6;}'`
 echo "\$annotationRelease (\$versionDate)" > ncbiRefSeqVersion.txt
 
 # this produces the genePred in NCBI coordinates
 # 8/23/17: gff3ToGenePred quits over illegal attribute SO_type... make it legal (so_type):
 zcat \$ncbiGffGz \\
   | sed -re 's/([;\\t])SO_type=/\\1so_type=/;' \\
   | gff3ToGenePred $warnOnly -refseqHacks -attrsOut=\$asmId.attrs.txt \\
       -unprocessedRootsOut=\$asmId.unprocessedRoots.txt stdin \$asmId.gp
 genePredCheck \$asmId.gp
 
-zcat \$ncbiGffGz \\
-  | egrep 'tag=(RefSeq|MANE) Select' | cut -f9- | tr ';' '\\n' \\
+zcat \$ncbiGffGz | egrep 'tag=(RefSeq|MANE) Select' || true > before.cut9.txt
+
+if [ -s before.cut9.txt ]; then
+  cut -f9- before.cut9.txt | tr ';' '\\n' \\
     | grep 'Name=' | grep -v NP_ | cut -d= -f2 | sort -u \\
        > \$asmId.refseqSelectTranscripts.txt
+fi
+rm -f before.cut9.txt
 
 # extract labels from semi-structured text in gbff COMMENT/description sections:
 zcat \$downloadDir/\${asmId}_rna.gbff.gz \\
   | (grep ' :: ' || true) \\
     | perl -wpe 's/\\s+::.*//; s/^\\s+//;' \\
       | sort -u \\
         > pragmaLabels.txt
 
 # extract cross reference text for refLink
 \$gff3ToRefLink \$downloadDir/\$asmId.raFile.txt \$ncbiGffGz pragmaLabels.txt 2> \$db.refLink.stderr.txt \\
   | sort > \$asmId.refLink.tab
 
 # converting the NCBI coordinates to UCSC coordinates
 liftUp -extGenePred -type=.gp stdout $localLiftFile drop \$asmId.gp \\
   | gzip -c > \$asmId.\$db.gp.gz
 $genePredCheckDb \$asmId.\$db.gp.gz
 
 # curated subset of all genes
 (zegrep "^[NY][MRP]_" \$asmId.\$db.gp.gz || true) > \$db.curated.gp
 # may not be any curated genes
 if [ ! -s \$db.curated.gp ]; then
   rm -f \$db.curated.gp
-else
+elif [ -s \$asmId.refseqSelectTranscripts.txt ]; then
   cat \$db.curated.gp | fgrep -f \$asmId.refseqSelectTranscripts.txt - \\
     > \$db.refseqSelect.curated.gp
   # may not be any refseqSelect.curated genes
   if [ ! -s \$db.refseqSelect.curated.gp ]; then
     rm -f \$db.refseqSelect.curated.gp
   fi
 fi
 
 # predicted subset of all genes
 (zegrep "^X[MR]_" \$asmId.\$db.gp.gz || true) > \$db.predicted.gp
 
 # not curated or predicted subset of all genes, the left overs
 (zegrep -v "^[NXY][MRP]_" \$asmId.\$db.gp.gz || true) > \$db.other.gp
 
 # curated and predicted without leftovers: