98c19d05d28cdf3c344a1e0f26a12f7c4397ae03 angie Wed Mar 25 10:49:24 2015 -0700 In a genome-wide query, bed2GffLines is called once per assembly sequence; for scaffold-based assemblies, it can be called hundreds of thousands of times. When called on a very small table with rows for relatively few sequences, it may be called tens of thousands of times without producing any output. Combined with the inefficiency of allocating a sizeable hash regardless of whether it had any items to work on, the long delays were causing web timeouts. Now, if there are no incoming items, it returns immediately. There are still some delays from the hundreds of thousands of queries that return no data, but they are manageable (~45s for ~200k scaffolds). refs #15043 diff --git src/hg/hgTables/gffOut.c src/hg/hgTables/gffOut.c index aa49a36..9a9fd74 100644 --- src/hg/hgTables/gffOut.c +++ src/hg/hgTables/gffOut.c @@ -224,30 +224,32 @@ addGffLineFromBed(bed, source, "CDS", cdsPortionStart, cdsPortionEnd, frames[exonIndx], txName); } /* start_codon (goes last for - strand) overlaps with CDS */ if ((exonIndx == cdsStartIndx) && isRc) addStartStopCodon(bed, exonIndx, exonCdsEnd, -3, "start_codon", source, txName); } static int bedToGffLines(struct bed *bedList, struct slName *exonFramesList, struct hTableInfo *hti, int fieldCount, char *source, boolean gtf2StopCodons) /* Translate a (list of) bed into gff and print out. * Note that field count (perhaps reduced by bitwise intersection) * can in effect override hti. */ { +if (! bedList) + return 0; struct hash *nameHash = newHash(20); struct bed *bed; struct slName *exonFrames = exonFramesList; int i, exonStart, exonEnd; char txName[256]; int itemCount = 0; static int namelessIx = 0; for (bed = bedList; bed != NULL; bed = bed->next) { /* Enforce unique transcript_ids. */ if (bed->name != NULL) { struct hashEl *hel = hashLookup(nameHash, bed->name); int dupCount = (hel != NULL ? ptToInt(hel->val) : 0);