81fc9a3788b59421d8efabfc9fa70857dc6dacda lrnassar Tue Mar 31 15:34:36 2026 -0700 Skip medical info spam rule for genome-www emails in mlqAutomate. The unmoderated genome-www list is only accessible to the Browser team, so personal medical information is not a privacy concern there. This prevents false positive spam classification of legitimate clinical genetics questions. refs #37320 diff --git src/utils/qa/mlqAutomate.py src/utils/qa/mlqAutomate.py index 1141f648b45..c6763538349 100755 --- src/utils/qa/mlqAutomate.py +++ src/utils/qa/mlqAutomate.py @@ -246,58 +246,66 @@ singleEvents=True ).execute().get('items', []) for event in events: title = event.get('summary', '') match = re.search(r'MLM(?:\s+Rotating)?:\s*(\w+)', title, re.IGNORECASE) if match: cal_name = match.group(1) return CONFIG['NAME_MAPPING'].get(cal_name, cal_name) logger.warning(f"No MLM found for {target_date.date()}") return None @retry(max_attempts=3, delay=2, exceptions=(anthropic.APIError,)) -def analyze_email_with_claude(subject, body, sender): +def analyze_email_with_claude(subject, body, sender, group_email=None): """ Use Claude to analyze an email in a single call. Returns dict with: is_spam, category, draft_response """ client = anthropic.Anthropic(api_key=CONFIG['CLAUDE_API_KEY']) categories_list = ", ".join(MLQ_CATEGORIES) + # The medical info spam rule only applies to public/moderated lists. + # Unmoderated lists (e.g., genome-www) are only accessible to the Browser team, + # so personal medical information is not a privacy concern there. + is_unmoderated = group_email in CONFIG['UNMODERATED_LISTS'] + + medical_rule = "" + if not is_unmoderated: + medical_rule = "\n - Contains sensitive personal medical information (specific names with genetic test results, medical conditions, family medical history, or personal health details) - these are privacy concerns" + prompt = f"""Analyze this email for the UCSC Genome Browser support team. From: {sender} Subject: {subject} Body: {body[:3000]} Provide your analysis in this exact format: SPAM: [YES or NO] CATEGORY: [Pick one from: {categories_list}] DRAFT_RESPONSE: [If not spam, write a helpful, professional response under 200 words. If spam, write "N/A"] Important: - Mark as SPAM if it is: - Conference/journal solicitations asking for paper submissions - Promotions for workshops, courses, training programs, or webinars - Marketing or promotional emails advertising services or products - - Mass-sent announcements unrelated to genome browser support - - Contains sensitive personal medical information (specific names with genetic test results, medical conditions, family medical history, or personal health details) - these are privacy concerns + - Mass-sent announcements unrelated to genome browser support{medical_rule} - Mark as NOT SPAM if it is a genuine question about using the UCSC Genome Browser (general genetics questions without personal identifying info are OK) - For CATEGORY, pick the most specific match. Use "Other" if unsure. - For DRAFT_RESPONSE, be helpful and concise. Ask clarifying questions if needed. Point to relevant documentation when appropriate.""" response = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=800, messages=[{"role": "user", "content": prompt}] ) result_text = response.content[0].text.strip() # Parse the response is_spam = False category = "Other" @@ -1636,31 +1644,32 @@ # Skip empty updates (e.g., email was entirely quoted content) if not processed_body.strip() and not uploaded_attachments: logger.info(f" Skipping empty update for ticket #{existing_ticket}") continue ticket_status = ticket.get('status', {}).get('name', '').lower() is_closed = 'closed' in ticket_status or 'resolved' in ticket_status comment = f"--- New Email Update ---\n\nFrom: {sender}\n\n{processed_body}" update_ticket(existing_ticket, comment, reopen=is_closed, new_mlm=mlm_name if is_closed else None, attachments=uploaded_attachments) else: # Analyze with Claude for category and draft response - analysis = analyze_email_with_claude(subject, body, sender) + analysis = analyze_email_with_claude(subject, body, sender, + group_email=group_email) logger.info(f" Category: {analysis['category']}") # Create new ticket with attachments ticket_id = create_ticket( subject, processed_body, sender_emails, mlm_name, category=analysis['category'], attachments=uploaded_attachments ) if ticket_id and analysis['draft_response']: draft_note = f"--- AI Suggested Response (Draft) ---\n\n{analysis['draft_response']}" @@ -1804,31 +1813,32 @@ if not processed_body.strip() and not uploaded_attachments: logger.info(f" Skipping empty update for ticket #{existing_ticket}") continue comment = f"--- New Email Update ---\n\nFrom: {email['from']}\n\n{processed_body}" reopen = is_closed and first_update update_ticket(existing_ticket, comment, reopen=reopen, new_mlm=mlm_name if reopen else None, attachments=uploaded_attachments) first_update = False else: # Analyze email with Claude (single call for spam, category, draft) analysis = analyze_email_with_claude( first_email['subject'], first_email['body'], - first_email['from'] + first_email['from'], + group_email=thread['group'] ) if analysis['is_spam']: logger.info(f"Skipping spam: {first_email['subject'][:50]}") continue logger.info(f" Category: {analysis['category']}") # Upload attachments from the first email first_attachments = first_email.get('attachments', []) uploaded_attachments = [] if first_attachments: logger.info(f" Uploading {len(first_attachments)} attachment(s)") uploaded_attachments = upload_attachments_to_redmine(first_attachments)