86f72a48aa5e300161f9b14bcb15fa8499122838 chmalee Wed Mar 25 16:11:04 2026 -0700 Fix array length calcuation for a chopByChar call that caused a segfault. Stop at 5 failing sequence names per genome to speed up checking, refs #37174 diff --git src/hg/utils/hubCheck/hubCheck.c src/hg/utils/hubCheck/hubCheck.c index 76e133330a4..47365d4d9dd 100644 --- src/hg/utils/hubCheck/hubCheck.c +++ src/hg/utils/hubCheck/hubCheck.c @@ -557,31 +557,31 @@ /* Construct the right javascript for the jstree for a top-level genomes.txt error or * error opening a trackDb.txt file */ { if (!doHtml) dyStringPrintf(errors, "%s", message); else { static int count = 0; // forces unique ID's which the jstree object needs char id[512]; char *errorMessages[16]; char *strippedMessage = NULL; char *genomeName = trackHubSkipHubName(genome->name); if (message) strippedMessage = cloneString(message); // multiple errors may be in a single message, chop by newline and make a node in the tree for each message - int numMessages = chopByChar(strippedMessage, '\n', errorMessages, sizeof(errorMessages)); + int numMessages = chopByChar(strippedMessage, '\n', errorMessages, ArraySize(errorMessages)); int i = 0; dyStringPrintf(errors, "trackData['%s'] = [", genomeName); for (; i < numMessages && isNotEmpty(errorMessages[i]); i++) { safef(id, sizeof(id), "%s%d", genomeName, count); dyStringPrintf(errors, "%s,", makeChildObjectString(id, "Genome Error", genomeName, genomeName, "#550073", genomeName, errorMessages[i], genomeName)); count++; } } } void trackDbErr(struct dyString *errors, char *message, struct trackHubGenome *genome, struct trackDb *tdb, boolean doHtml) /* Adds the right object for a jstree object of trackDb configuration errors. */ { if (!doHtml) @@ -1081,58 +1081,72 @@ "id:'%sError', text:'No trackDb configuration errors', parent:'%s'}", idName, idName); } dyStringPrintf(errors, "];\n"); } return retVal; } static boolean isValidSeqNameChar(char c) /* Return TRUE if c is a valid character for a sequence name: [A-Za-z0-9._-] */ { return isalnum(c) || c == '.' || c == '_' || c == '-'; } +#define MAX_NUM_SEQ_CHECKS 5 static void checkSequenceNames(char *twoBitPath, char *genomeName) /* Check that sequence names in the 2bit file contain only valid characters: * ASCII letters, digits, period, underscore, hyphen. * First character must be a letter or digit. Max length 254. */ { struct slName *seqList = twoBitSeqNames(twoBitPath); struct slName *seq; +int numWarns = 0; for (seq = seqList; seq != NULL; seq = seq->next) { char *name = seq->name; int len = strlen(name); if (len > 254) + { warn("warning: sequence name '%s' in genome '%s' exceeds 254 characters (length %d)", name, genomeName, len); + numWarns++; + } if (len > 0 && !isalnum(name[0])) + { warn("warning: sequence name '%s' in genome '%s' starts with '%c' -must start with a letter or digit", name, genomeName, name[0]); + numWarns++; + } char *p; for (p = name; *p != '\0'; p++) { if (!isValidSeqNameChar(*p)) { warn("warning: sequence name '%s' in genome '%s' contains invalid character '%c' -" "only [A-Za-z0-9._-] are allowed. Consider using chromAlias for alternative names.", name, genomeName, *p); + numWarns++; break; } } + if (numWarns > MAX_NUM_SEQ_CHECKS) + { + warn("Stopping at %d sequence name warnings. Please check the rest of your sequences for legality.", MAX_NUM_SEQ_CHECKS); + break; + } } slFreeList(&seqList); } static void checkTrackNamesForDots(struct trackDb *tdbList) /* Warn about track names containing periods before they get polished away. */ { struct trackDb *tdb; for (tdb = tdbList; tdb != NULL; tdb = tdb->next) { if (strchr(tdb->track, '.')) warn("warning: track name \"%s\" contains a period which will be changed to an underscore. Periods in track names can cause problems with table browser filters. Consider using underscores instead.", tdb->track); if (tdb->subtracks != NULL) checkTrackNamesForDots(tdb->subtracks); }