src/hg/utils/otto/civic/civicToBed.py cb31fb5a2ebbbb167b7f07c19f3af9e3e724068c

cb31fb5a2ebbbb167b7f07c19f3af9e3e724068c
cvaske
  Tue Jan 7 13:48:52 2025 -0800
civic: remove certifi package

hgwdev-new no longer needs the PyPI certificate bundle

diff --git src/hg/utils/otto/civic/civicToBed.py src/hg/utils/otto/civic/civicToBed.py
index b5cee02..40c1453 100644
--- src/hg/utils/otto/civic/civicToBed.py
+++ src/hg/utils/otto/civic/civicToBed.py
@@ -1,20 +1,19 @@
 # /// script
 # requires-python = ">=3.12"
 # dependencies = [
 #     "pandas",
-#     "certifi",
 # ]
 # [tool.uv]
 # exclude-newer = "2024-11-20T00:00:00Z"
 # ///
 
 """Download CIViC DB files and convert into bigBed12 tracks
 
 This script is meant to be run with `uv run civicToBed.py` which
 will create a reproducible Python environment to run.
 
 EXTERNAL DEPENDENCIES: this script depends on several kent binaries,
 as well as data files. See the sections below with comment titles
 "External Shell Command Dependencies" and "Local Data File
 Dependencies"
 
@@ -64,61 +63,56 @@
 
 """
 
 from collections import defaultdict
 from contextlib import closing
 from copy import deepcopy
 import dataclasses
 import datetime
 import logging
 import os
 import ssl
 import subprocess
 from typing import Callable, Final, Generator, Sequence
 import urllib.request
 
-import certifi
 import numpy as np
 import pandas as pd
 
 ##
 ## External Shell Command Dependencies
 ##
 BED_TO_BIG_BED_CMD: Final = "bedToBigBed"
 LIFT_OVER_CMD: Final = "liftOver"
 BED_SORT_CMD: Final = "bedSort"
 
 ##
 ## Local Data File Dependencies
 ##
 LIFTOVER_CHAINS: Final = [
     ["hg19", "hg38", "/hive/data/gbdb/hg19/liftOver/hg19ToHg38.over.chain.gz"],
     ["hg38", "hg19", "/hive/data/gbdb/hg38/liftOver/hg38ToHg19.over.chain.gz"],
 ]
 GENCODE_UCSC_FN: Final = {
     "hg38": "/hive/data/genomes/hg38/bed/gencodeV47/build/ucscGenes.bed",
     "hg19": "/hive/data/genomes/hg19/bed/gencodeV47lift37/build/ucscGenes.bed",
 }
 CHROM_SIZES: Final = {
     "hg38": "/hive/data/genomes/hg38/chrom.sizes",
     "hg19": "/hive/data/genomes/hg19/chrom.sizes",
 }
 
-## hgwdev has trouble with SSL without using certifi, so this ensures
-## that certificates from PyPI are used
-ssl_ctx = ssl.create_default_context(cafile=certifi.where())
-
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
 
 DOWNLOAD_BASE_URL: Final = "https://civicdb.org/downloads"
 
 DATA_TABLES: Final = [
     "MolecularProfileSummaries",
     "VariantSummaries",
     "ClinicalEvidenceSummaries",
     "AssertionSummaries",
 ]
 
 ## Maximum lengeth of a string (e.g. insAGCATGACCAG...) before
 ## being truncated and appended with an ellipsis
 MAX_VARIANT_LENGTH: Final = 20
@@ -604,31 +598,31 @@
         doid == "<NA>", doid + "|" + df["disease"]
     )
     return df
 
 
 def load_dataframes(table_dict: dict[str, str]) -> dict[str, pd.DataFrame]:
     """Load several dataframes.
     Input is a dict from name to the source path.
     Output is a dict from name to a Pandas DataFrame"""
 
     return {name: pd.read_csv(path, sep="\t") for name, path in table_dict.items()}
 
 
 def urlretrieve(url, filename):
     with closing(open(filename, "wb")) as outfile:
-        with closing(urllib.request.urlopen(url, context=ssl_ctx)) as instream:
+        with closing(urllib.request.urlopen(url)) as instream:
             outfile.write(instream.read())
 
 
 def download_datadir(
     basedir: str,
     baseurl: str,
     dateslug: str,
     tablelist: list[str],
     overwrite: bool = True,
 ) -> dict[str, str]:
     dlpaths = {}
 
     # make directory
     dldir = os.path.join(basedir, dateslug)
     os.makedirs(dldir, exist_ok=True)