src/utils/qa/mlqAutomate.py 81fc9a3788b59421d8efabfc9fa70857dc6dacda

81fc9a3788b59421d8efabfc9fa70857dc6dacda
lrnassar
  Tue Mar 31 15:34:36 2026 -0700
Skip medical info spam rule for genome-www emails in mlqAutomate. The unmoderated
genome-www list is only accessible to the Browser team, so personal medical
information is not a privacy concern there. This prevents false positive spam
classification of legitimate clinical genetics questions. refs #37320

diff --git src/utils/qa/mlqAutomate.py src/utils/qa/mlqAutomate.py
index 1141f648b45..c6763538349 100755
--- src/utils/qa/mlqAutomate.py
+++ src/utils/qa/mlqAutomate.py
@@ -246,58 +246,66 @@
         singleEvents=True
     ).execute().get('items', [])
 
     for event in events:
         title = event.get('summary', '')
         match = re.search(r'MLM(?:\s+Rotating)?:\s*(\w+)', title, re.IGNORECASE)
         if match:
             cal_name = match.group(1)
             return CONFIG['NAME_MAPPING'].get(cal_name, cal_name)
 
     logger.warning(f"No MLM found for {target_date.date()}")
     return None
 
 
 @retry(max_attempts=3, delay=2, exceptions=(anthropic.APIError,))
-def analyze_email_with_claude(subject, body, sender):
+def analyze_email_with_claude(subject, body, sender, group_email=None):
     """
     Use Claude to analyze an email in a single call.
     Returns dict with: is_spam, category, draft_response
     """
     client = anthropic.Anthropic(api_key=CONFIG['CLAUDE_API_KEY'])
     categories_list = ", ".join(MLQ_CATEGORIES)
 
+    # The medical info spam rule only applies to public/moderated lists.
+    # Unmoderated lists (e.g., genome-www) are only accessible to the Browser team,
+    # so personal medical information is not a privacy concern there.
+    is_unmoderated = group_email in CONFIG['UNMODERATED_LISTS']
+
+    medical_rule = ""
+    if not is_unmoderated:
+        medical_rule = "\n  - Contains sensitive personal medical information (specific names with genetic test results, medical conditions, family medical history, or personal health details) - these are privacy concerns"
+
     prompt = f"""Analyze this email for the UCSC Genome Browser support team.
 
 From: {sender}
 Subject: {subject}
 Body:
 {body[:3000]}
 
 Provide your analysis in this exact format:
 
 SPAM: [YES or NO]
 CATEGORY: [Pick one from: {categories_list}]
 DRAFT_RESPONSE: [If not spam, write a helpful, professional response under 200 words. If spam, write "N/A"]
 
 Important:
 - Mark as SPAM if it is:
   - Conference/journal solicitations asking for paper submissions
   - Promotions for workshops, courses, training programs, or webinars
   - Marketing or promotional emails advertising services or products
-  - Mass-sent announcements unrelated to genome browser support
-  - Contains sensitive personal medical information (specific names with genetic test results, medical conditions, family medical history, or personal health details) - these are privacy concerns
+  - Mass-sent announcements unrelated to genome browser support{medical_rule}
 - Mark as NOT SPAM if it is a genuine question about using the UCSC Genome Browser (general genetics questions without personal identifying info are OK)
 - For CATEGORY, pick the most specific match. Use "Other" if unsure.
 - For DRAFT_RESPONSE, be helpful and concise. Ask clarifying questions if needed. Point to relevant documentation when appropriate."""
 
     response = client.messages.create(
         model="claude-sonnet-4-20250514",
         max_tokens=800,
         messages=[{"role": "user", "content": prompt}]
     )
 
     result_text = response.content[0].text.strip()
 
     # Parse the response
     is_spam = False
     category = "Other"
@@ -1636,31 +1644,32 @@
 
             # Skip empty updates (e.g., email was entirely quoted content)
             if not processed_body.strip() and not uploaded_attachments:
                 logger.info(f"  Skipping empty update for ticket #{existing_ticket}")
                 continue
 
             ticket_status = ticket.get('status', {}).get('name', '').lower()
             is_closed = 'closed' in ticket_status or 'resolved' in ticket_status
 
             comment = f"--- New Email Update ---\n\nFrom: {sender}\n\n{processed_body}"
             update_ticket(existing_ticket, comment, reopen=is_closed,
                          new_mlm=mlm_name if is_closed else None,
                          attachments=uploaded_attachments)
         else:
             # Analyze with Claude for category and draft response
-            analysis = analyze_email_with_claude(subject, body, sender)
+            analysis = analyze_email_with_claude(subject, body, sender,
+                                                group_email=group_email)
 
             logger.info(f"  Category: {analysis['category']}")
 
             # Create new ticket with attachments
             ticket_id = create_ticket(
                 subject,
                 processed_body,
                 sender_emails,
                 mlm_name,
                 category=analysis['category'],
                 attachments=uploaded_attachments
             )
 
             if ticket_id and analysis['draft_response']:
                 draft_note = f"--- AI Suggested Response (Draft) ---\n\n{analysis['draft_response']}"
@@ -1804,31 +1813,32 @@
                     if not processed_body.strip() and not uploaded_attachments:
                         logger.info(f"  Skipping empty update for ticket #{existing_ticket}")
                         continue
 
                     comment = f"--- New Email Update ---\n\nFrom: {email['from']}\n\n{processed_body}"
                     reopen = is_closed and first_update
                     update_ticket(existing_ticket, comment, reopen=reopen,
                                  new_mlm=mlm_name if reopen else None,
                                  attachments=uploaded_attachments)
                     first_update = False
         else:
             # Analyze email with Claude (single call for spam, category, draft)
             analysis = analyze_email_with_claude(
                 first_email['subject'],
                 first_email['body'],
-                first_email['from']
+                first_email['from'],
+                group_email=thread['group']
             )
 
             if analysis['is_spam']:
                 logger.info(f"Skipping spam: {first_email['subject'][:50]}")
                 continue
 
             logger.info(f"  Category: {analysis['category']}")
 
             # Upload attachments from the first email
             first_attachments = first_email.get('attachments', [])
             uploaded_attachments = []
             if first_attachments:
                 logger.info(f"  Uploading {len(first_attachments)} attachment(s)")
                 uploaded_attachments = upload_attachments_to_redmine(first_attachments)