9dde2f308a759b500f69bea45cc2ec4f80184d6b lrnassar Fri May 29 07:51:44 2026 -0700 Fix 422 crash in mlqAutomate from subjects that normalize to empty. refs #37659 Subjects consisting only of reply/list-tag tokens (e.g. "Re:", "[genome]") normalize to an empty string, which sent Redmine a bare 'subject=~' filter and returned HTTP 422, aborting the whole run. Give such subjects a stable "" placeholder in both find_existing_ticket (search side) and create_ticket (stored side) so they search and match consistently. Also exclude the placeholder from the staff-only subject-match shortcut so unrelated no-subject threads still require an email match and aren't merged. diff --git src/utils/qa/mlqAutomate.py src/utils/qa/mlqAutomate.py index ba8649df17b..cfb6f0f5913 100755 --- src/utils/qa/mlqAutomate.py +++ src/utils/qa/mlqAutomate.py @@ -82,30 +82,35 @@ "Other", "Alignments", "BLAT", "Bug Report", "CAPTCHA", "Command-line Utilities", "Conservation", "Custom Track", "Data - Availability (when)", "Data - Interpretation (what)", "Data - Location (where)", "Data Contribution", "Data Integrator", "Data Requests", "dbSNP", "Downloads", "ENCODE", "External Tools", "Feature Request", "GBiB", "GBiC", "Gene Interactions (hgGeneGraph)", "Gene Tracks", "Help Docs (Info)", "Hubs", "IP blocked", "JSON hubApi", "Licenses", "LiftOver", "Login", "Mirror - Asia", "Mirror - Europe", "Mirror Site & Utilities", "Multi-region", "MySQL", "PCR", "Publications & Citing", "Sessions", "Slow Performance", "Table Browser", "Track Collection Builder", "User Accounts", "Variant Annotation Integrator", "Widget" ] SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) PST = pytz.timezone('America/Los_Angeles') DRY_RUN = False +# Placeholder for emails whose subject is empty or consists entirely of +# reply/list-tag tokens that normalize away. Gives them a stable, matchable +# subject instead of a bare '~' Redmine filter (HTTP 422). +NO_SUBJECT = '' + # Setup logging LOG_FILE = os.environ.get('MLQ_LOG_FILE', os.path.join(SCRIPT_DIR, 'mlq_automate.log')) logging.basicConfig( level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s', handlers=[ logging.FileHandler(LOG_FILE), logging.StreamHandler(sys.stdout) ] ) logger = logging.getLogger(__name__) # Suppress httpx INFO logs (Anthropic client HTTP request logging) logging.getLogger('httpx').setLevel(logging.WARNING) @@ -1208,55 +1213,62 @@ """Find an existing Redmine ticket by subject and email match. Requires both: 1. Normalized subject match 2. At least one email from thread_emails matches the ticket's Email field Exception: if any thread participant is a @ucsc.edu address (staff), the email match is skipped and subject match alone is sufficient. Staff replies to mailing list threads typically go to the list, not the original sender, so the original sender's email won't appear in To/CC. thread_emails should include all participants (From, To, CC) to handle replies where the original sender appears in To/CC fields. """ normalized = normalize_subject(subject) + # Empty normalization (e.g. a subject of just "Re:" or "[genome]") would send + # Redmine a bare 'subject=~' filter and 422. Fall back to the placeholder so + # these still search and match consistently with how the ticket was created. + if not normalized: + normalized = NO_SUBJECT url = f"{CONFIG['REDMINE_URL']}/issues.json" params = { 'project_id': CONFIG['REDMINE_PROJECT'], 'subject': f"~{normalized}", 'status_id': '*', 'limit': 100, } headers = {'X-Redmine-API-Key': CONFIG['REDMINE_API_KEY']} resp = requests.get(url, params=params, headers=headers, timeout=30) resp.raise_for_status() data = resp.json() email_list = [e.lower() for e in thread_emails] has_staff_participant = any(e.endswith('@ucsc.edu') for e in email_list) for issue in data.get('issues', []): if normalize_subject(issue['subject']).lower() != normalized.lower(): continue # Staff replies to mailing list threads don't need email match — # subject match is sufficient since staff wouldn't start a new - # unrelated thread with the same subject - if has_staff_participant: + # unrelated thread with the same subject. The placeholder subject is + # shared by all no-subject threads, so it's excluded from this shortcut: + # require an email match to avoid merging unrelated conversations. + if has_staff_participant and normalized != NO_SUBJECT: return issue['id'] # For external senders, require email match to avoid false positives # on generic subjects email_field = next( (f for f in issue.get('custom_fields', []) if f['id'] == CONFIG['CUSTOM_FIELDS']['Email']), None ) if email_field and email_field.get('value'): ticket_emails = [e.strip().lower() for e in email_field['value'].split(',')] if any(te in ee or ee in te for te in email_list for ee in ticket_emails): return issue['id'] return None @@ -1368,30 +1380,36 @@ """Get journal (comment) history for a ticket.""" url = f"{CONFIG['REDMINE_URL']}/issues/{ticket_id}.json?include=journals" headers = {'X-Redmine-API-Key': CONFIG['REDMINE_API_KEY']} resp = requests.get(url, headers=headers, timeout=30) resp.raise_for_status() return resp.json().get('issue', {}) def create_ticket(subject, body, sender_emails, mlm_name, category='Other', attachments=None): """Create a new Redmine ticket with optional attachments. Includes retry logic for transient server errors (5xx) and network issues. Sends email notification to QA team if all retries fail. """ + # Subjects that normalize to nothing (empty, or only reply/list tags) get a + # stable placeholder so the stored ticket can be found by find_existing_ticket + # on later runs (its subject search would otherwise never match). + if not normalize_subject(subject): + subject = NO_SUBJECT + if DRY_RUN: att_info = f" with {len(attachments)} attachment(s)" if attachments else "" logger.info(f" [DRY RUN] Would create ticket: {subject[:50]}{att_info}") logger.info(f" Category: {category}, MLM: {mlm_name}") return None url = f"{CONFIG['REDMINE_URL']}/issues.json" headers = { 'X-Redmine-API-Key': CONFIG['REDMINE_API_KEY'], 'Content-Type': 'application/json', } # Strip emojis from subject (body should already be sanitized via sanitize_for_redmine) clean_subject = re.sub(r'[\U00010000-\U0010FFFF]', '', subject)