src/hg/utils/otto/panelApp/cnv.py 11a7d1aac16cfc653302a713b756fe4c352f450d

11a7d1aac16cfc653302a713b756fe4c352f450d
max
  Mon May 19 02:34:40 2025 -0700
fixing panelApp bug, refs #35757

diff --git src/hg/utils/otto/panelApp/cnv.py src/hg/utils/otto/panelApp/cnv.py
index 75a3304eae4..d3390c9bfbf 100644
--- src/hg/utils/otto/panelApp/cnv.py
+++ src/hg/utils/otto/panelApp/cnv.py
@@ -1,200 +1,196 @@
 import requests
 import json 
 import pandas as pd 
 import sys 
 import argparse
 import re
 
-def get_url(url):
-    page_count = 1
-    jData = dict()
-    while True:
+def getAllPages(url, results=[]):
+    " recursively download all pages. Stack should be big enough "
     try:
-            new_url = "{}{}".format(url, page_count)
-            myResponse = requests.get(new_url)
+        myResponse = requests.get(url)
         if (myResponse.ok):
             jData = json.loads(myResponse.content.decode())
-
             # If jData is empty create, else append
-                if "error" in jData.keys():
-                    raise Exception("{} page count is missing.".format(page_count))
+            if "error" in jData.keys() or not "results" in jData.keys():
+                raise Exception("Error in keys when downloading %s" % url)
+
+            if "count" in jData and not "page" in url:
+                print("API says that there are %d results for url %s" % (jData["count"], url))
+            results.extend(jData["results"])
+
+            if "next" in jData and jData["next"] is not None: # need to get next URL
+                return getAllPages(jData["next"], results)
         else:
-                print('Json data retrieved')
-                break
+            raise Exception("Error in object when downloading %s" % url)
     except:
-            if page_count > 1:
-                print('Json data retrieved')
-            else:
-                print("Unable to request URL")
-                sys.exit()
-            break
-        print(page_count)
-        page_count += 1
-    return jData
+        raise Exception("HTTP Error when downloading %s" % url)
+    return results
 
 def downloadCnvs():
     Error = True
     continuous_count=0
-    url = "https://panelapp.genomicsengland.co.uk/api/v1/regions/?page="
-    jData = get_url(url)
+    url = "https://panelapp.genomicsengland.co.uk/api/v1/regions/?format=json"
+    res = getAllPages(url)
 
-    res = jData['results']
     num_gene_data = len(res)
+    print("Got %d CNVs" % num_gene_data)
     count = 0
     continuous_count = 0
     hg19_dict = dict()
     hg38_dict = dict()
 
     while count != num_gene_data:
         temp_attribute_dictionary = dict()
         string_dict_key = 'gene_{}'.format(continuous_count)
         
         chromo = res[count]['chromosome']
         chromosome = 'chr' + chromo
 
         start_coordinates = res[count]['grch38_coordinates'][0]
         end_coordinates = res[count]['grch38_coordinates'][1]
 
         score = '0'
         strand = '.'
         thickStart = start_coordinates
         thickEnd = end_coordinates
         blockCount = '1'
         blockSizes = int(end_coordinates) - int(start_coordinates)
         blockStarts = 0
         
         confidence_level = res[count]['confidence_level']
 
-        rgb_dict = {'3': '0,255,0', '2': '255,191,0', '1':'255,0,0'}
+        rgb_dict = {'0' : '100,100,100', '3': '0,255,0', '2': '255,191,0', '1':'255,0,0'}
         itemRgb = rgb_dict[confidence_level]
         
         entity_name = res[count]['entity_name']
         entity_type = res[count]['entity_type']
         evidence = ' '.join(res[count]['evidence'])
 
-        haploinsufficiency_score = res[count]['haploinsufficiency_score']
+        haploinsufficiency_score = res[count].get('haploinsufficiency_score')
         if not haploinsufficiency_score:
             haploinsufficiency_score = ''
 
-        moi = res[count]['mode_of_inheritance']
+        moi = res[count].get('mode_of_inheritance')
         if not moi:
             moi = ''
 
-        disease_group = res[count]['panel']['disease_group']
+        disease_group = res[count]['panel'].get('disease_group')
         if not disease_group:
             disease_group = ''
 
-        disease_sub_group = res[count]['panel']['disease_sub_group']
+        disease_sub_group = res[count]['panel'].get('disease_sub_group')
         if not disease_sub_group:
             disease_sub_group = ''
 
         # idd = Panel ID
-        idd = res[count]['panel']['id']
+        idd = res[count]['panel'].get('id')
         if not idd:
             idd = ''
 
-        panel_name = res[count]['panel']['name']
+        panel_name = res[count]['panel'].get('name')
         if not panel_name:
             panel_name = ''
         
-        relevant_disorders = ' '.join(res[count]['panel']['relevant_disorders'])
+        relevant_disorders = ' '.join(res[count]['panel'].get('relevant_disorders', []))
         if not relevant_disorders:
             relevant_disorders = ''
 
-        status = res[count]['panel']['status']
+        status = res[count]['panel'].get('status')
         if not status:
             status = ''
         
         '''
-        types = res[count]['panel']['types']
-        types = str(types).replace("{","").replace("}", "").replace("'", "")
+        types = res[count]['panel'].get['types')
         if not types:
             types = ''
+        else:
+            types = str(types).replace("{","").replace("}", "").replace("'", "")
             types = types[1:-1]
         '''
 
-        types = res[count]['panel']['types'][0]['name']
+        types = res[count]['panel']['types'][0].get('name')
 
         version = res[count]['panel']['version']
         if float(version) < 0.99:
             continue
         if not version:
             version = ''
 
-        penetrance = res[count]['penetrance']
+        penetrance = res[count].get('penetrance')
         if not penetrance:
             penetrance = ''
 
-        phenotypes = ' '.join(res[count]['phenotypes'])
+        phenotypes = ' '.join(res[count].get('phenotypes', []))
         if not phenotypes:
             phenotypes = ''
 
         publications = ' '.join(res[count]['publications'])
         if not publications:
             publications = ''
         
         #required_overlap_percentage = res[count]['required_overlap_percentage']
         tags = res[count]['tags']
         if not tags:
             tags = ''
     
-        triplosensitivity_score = res[count]['triplosensitivity_score']
+        triplosensitivity_score = res[count].get('triplosensitivity_score')
         if not triplosensitivity_score:
             triplosensitivity_score = ''
     
+        type_of_variants = None
+        if "type_of_variants" in res[count]:
             type_of_variants = res[count]['type_of_variants']
         if not type_of_variants:
             type_of_variants = ''
 
-        verbose_name = res[count]['verbose_name']            
+        verbose_name = res[count].get('verbose_name')
         if not verbose_name:
             verbose_name = ''    
 
         # Mouse Over Field
         mouseOverField = ""
         try:
             mof = 'Gene: ' +  entity_name + ';' + ' Panel: ' + name + ';' + ' MOI: ' + moi + ';' + ' Phenotypes: ' + phenotypes + ';' + ' Confidence: ' + confidence_level + ';'    
             mouseOverField = mof
         except:
             mouseOverField = ''        
 
         # name
         name = '{} ({})'.format(entity_name, panel_name)
         
         hg38_dict[string_dict_key] = [chromosome, start_coordinates, end_coordinates, name, score, strand, 
                             thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts, confidence_level, 
                             panel_name, idd, entity_name, entity_type, evidence, haploinsufficiency_score, moi, disease_group, 
                             disease_sub_group, relevant_disorders, status, types, version, penetrance, phenotypes, 
                             publications, triplosensitivity_score, type_of_variants, verbose_name, mouseOverField]
         
         #-------------------------------------------------------------------------------
     
         continuous_count = continuous_count + 1
         count = count + 1
 
     # Removes new lines
     for key, item in hg38_dict.items():
         strip_list = list()
         for i in item:
             try:
                 strip_list.append(i.replace('\t', ' ').strip().strip("\n").strip("\r"))
             except:
                 strip_list.append(i)
         hg38_dict[key] = strip_list
 
     pd_38_table = pd.DataFrame.from_dict(hg38_dict)
     pd_38_table = pd_38_table.T
     pd_38_table.columns = ['chrom', 'chromStart', 'End', 'name', 'Score', 'strand', 'thickStart', 'thickEnd', 
                             'itemRgb', 'blockCount', 'blockSizes', 'blockStarts', 'Confidence Level', 
                             'Panel Name', 'Panel ID', 'Entity Name', 'Entity Type', 'Evidence', 'ClinGen Haploinsufficiency Score', 
                             'Mode of Inheritance', 'Disease Group', 'Disease Sub Group', 'Relevant Disorders', 
                             'Status', 'Types', 'Version Created', 'Penetrance', 'Phenotypes', 'Publications', 
                             'CinGen Triplosensitivity Score', 'Type of Variants', 'Verbose Name', 'Mouse Over Field']
     #pd_38_table = pd_38_table.sort_values(by=['chromosome', 'Start'], ascending = (True, True))
     #pd_38_table.to_csv('hg38_region_noheader_sorted.tsv', sep='\t', index=False, header=None) 
     #pd_38_table.to_csv('hg38_region_header_sorted.tsv', sep='\t', index=False) 
     return pd_38_table
 
-
-#if __name__ == "__main__":
-    #main()
+#print(len(downloadCnvs()))