ace0c6866e5b9b793c4cf3bd6f79515e575884a9 lrnassar Tue Feb 18 15:19:03 2025 -0800 Fixing a bug in the ENIGMA MLA generation where scores present on different datasets were not getting combined properly. diff --git src/hg/makeDb/scripts/enigma/BRCAmla.py src/hg/makeDb/scripts/enigma/BRCAmla.py index 68da10e6451..8f391d5f93c 100644 --- src/hg/makeDb/scripts/enigma/BRCAmla.py +++ src/hg/makeDb/scripts/enigma/BRCAmla.py @@ -160,35 +160,35 @@ def parseDicsAndCreateFinalLLRdic(caputoVarsDic,parsonsVarsDic,liVarsDic,eastonVarsDic): combinedDic = {'caputoVarsDic' : caputoVarsDic, 'parsonsVarsDic' : parsonsVarsDic, 'liVarsDic' : liVarsDic, 'eastonVarsDic' : eastonVarsDic} finalVarsList = set() allLRsPossible = ["segregationLR","pathologyLR","coocurrenceLR","familyLR","caseControlLR"] finalCombinedLRdic = {} for dic in combinedDic.keys(): for key in combinedDic[dic].keys(): finalVarsList.add(key) for variant in finalVarsList: finalCombinedLRdic[variant] = {} for dic in combinedDic.keys(): if variant in combinedDic[dic].keys(): finalCombinedLRdic[variant][dic] = {} for LR in allLRsPossible: if LR in combinedDic[dic][variant]: - if LR in finalCombinedLRdic[variant][dic].keys(): #Look for familyLR in parsonsXXX + if LR+"combined" in finalCombinedLRdic[variant].keys(): #Look for familyLR in parsonsXXX #Assign the combined value finalCombinedLRdic[variant][LR+"combined"] = finalCombinedLRdic[variant][LR+"combined"] * float(combinedDic[dic][variant][LR]) #Assign individual value - finalCombinedLRdic[variant][dic][LR] = varsDicAllValues[variant][LR] + finalCombinedLRdic[variant][dic][LR] = float(combinedDic[dic][variant][LR]) else: finalCombinedLRdic[variant][dic][LR] = float(combinedDic[dic][variant][LR]) #First value for the combined finalCombinedLRdic[variant][LR+"combined"] = float(combinedDic[dic][variant][LR]) for var in finalCombinedLRdic.keys(): finalCombinedLRdic[var]["combinedLRscore"] = 1 for combinedLR in finalCombinedLRdic[var].keys(): if combinedLR.endswith("combined"): finalCombinedLRdic[var]["combinedLRscore"] = finalCombinedLRdic[var]["combinedLRscore"] * finalCombinedLRdic[var][combinedLR] print("Total number of final variables in combined dataset: "+str(len(finalVarsList))) #The result is a dictionary as such: #Level 1: Variants, e.g. NM_000059.4:c.3509C>T #Level 2: A dictionary for each dataset, a combined score for each LR (5 total), and a final combinedLR from multiplying all combined individual scores