b4f6c6c609d1e6ff8f09c2f42795b1df05f31452
markd
  Wed Jan 24 20:42:01 2024 -0800
try harder to find a more useful name for GFF3 records that don't follow the most common patterns.  This are things like RefSeq IG segments and tRNAs that NHGRI had problems with

diff --git src/hg/utils/gff3ToGenePred/gff3ToGenePred.c src/hg/utils/gff3ToGenePred/gff3ToGenePred.c
index 1c2bac0..3e994cd 100644
--- src/hg/utils/gff3ToGenePred/gff3ToGenePred.c
+++ src/hg/utils/gff3ToGenePred/gff3ToGenePred.c
@@ -9,31 +9,31 @@
 #include "options.h"
 #include "gff3.h"
 #include "genePred.h"
 
 #define LEAK_CHECK 0  // set to 1 to free all memory
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "gff3ToGenePred - convert a GFF3 file to a genePred file\n"
   "usage:\n"
   "   gff3ToGenePred inGff3 outGp\n"
   "options:\n"
   "  -warnAndContinue - on bad genePreds being created, put out warning but continue\n"
-  "  -useName - rather than using 'id' as name, use the 'name' tag\n"
+  "  -useName - use the 'name' tag as the name, if present\n"
   "  -rnaNameAttr=attr - If this attribute exists on an RNA record, use it as the genePred\n"
   "   name column\n"
   "  -geneNameAttr=attr - If this attribute exists on a gene record, use it as the genePred\n"
   "   name2 column\n"
   "  -attrsOut=file - output attributes of mRNA record to file.  These are per-genePred row,\n"
   "   not per-GFF3 record. Thery are derived from GFF3 attributes, not the attributes themselves.\n"
   "  -processAllGeneChildren - output genePred for all children of a gene regardless of feature\n"
   "  -unprocessedRootsOut=file - output GFF3 root records that were not used.  This will not be a\n"
   "   valid GFF3 file.  It's expected that many non-root records will not be used and they are not\n"
   "   reported.\n"
   "  -bad=file   - output genepreds that fail checks to file\n"
   "  -maxParseErrors=50 - Maximum number of parsing errors before aborting. A negative\n"
   "   value will allow an unlimited number of errors.  Default is 50.\n"
   "  -maxConvertErrors=50 - Maximum number of conversion errors before aborting. A negative\n"
   "   value will allow an unlimited number of errors.  Default is 50.\n"
@@ -284,46 +284,65 @@
 // a refseq accession
 if (isGeneWithCdsChildCase(mrna))
     {
     // is name something like YP_203370.1 (don't try too hard)
     struct gff3Ann *cds = mrna->children->ann;
     if (!sameString(cds->type, gff3FeatCDS))
         cds = mrna->children->ann->children->ann; // post-2018-format
     // Also checking now for 'Y' as a prefix, as otherwise this would apply to all normal transcripts
     if ((cds->name != NULL) && (strlen(cds->name) > 4) && isupper(cds->name[0]) && isupper(cds->name[1])
         && (cds->name[2] == '_') && isdigit(cds->name[3]) && cds->name[0] == 'Y')
         return cds->name;
     }
 return NULL;
 }
 
+static char* getAttrVal(struct gff3Ann* ann, char *name)
+/* return the single value for name or NULL */
+{
+struct gff3Attr *attr = gff3AnnFindAttr(ann, name);
+if (attr != NULL)
+    return attr->vals->name;
+else
+    return NULL;
+}
+
 static char* getRnaName(struct gff3Ann* mrna)
 /* return the value to use for the genePred name field */
 {
 char *name = NULL;
 if (rnaNameAttr != NULL)
-    {
-    struct gff3Attr *attr = gff3AnnFindAttr(mrna, rnaNameAttr);
-    if (attr != NULL)
-        name = attr->vals->name;
-    }
+    name = getAttrVal(mrna, rnaNameAttr);
 if (isEmpty(name) && refseqHacks)
     name = refSeqHacksFindName(mrna);
+if (isEmpty(name) && useName)
+    name = mrna->name;
+// try other possible fields
+if (isEmpty(name))
+    name = getAttrVal(mrna, "transcript_id");
+if (isEmpty(name))
+    name = getAttrVal(mrna, "transcript_name");
+if (isEmpty(name))
+    name = getAttrVal(mrna, "Name");
+if (isEmpty(name))
+    name = getAttrVal(mrna, "standard_name");   // RefSeq use this
+if (isEmpty(name))
+    name = getAttrVal(mrna, "gene");   // also for RefSeq when no transcript name
 if (isEmpty(name))
-    name = (useName ? mrna->name : mrna->id);
+    name = getAttrVal(mrna, "gene_name");
 if (isEmpty(name))
-    name = mrna->id;
+    name = mrna->id; // desperation
 return name;
 }
 
 static char* getGeneName(struct gff3Ann* gene)
 /* return the value to use for the genePred name2 field,
  * or NULL if can't be defined. */
 {
 char *name2 = NULL;
 if (geneNameAttr != NULL)
     {
     struct gff3Attr *attr = gff3AnnFindAttr(gene, geneNameAttr);
     if (attr != NULL)
         name2 = attr->vals->name;
     }
 if (isEmpty(name2) && useName)