cb31fb5a2ebbbb167b7f07c19f3af9e3e724068c cvaske Tue Jan 7 13:48:52 2025 -0800 civic: remove certifi package hgwdev-new no longer needs the PyPI certificate bundle diff --git src/hg/utils/otto/civic/civicToBed.py src/hg/utils/otto/civic/civicToBed.py index b5cee02..40c1453 100644 --- src/hg/utils/otto/civic/civicToBed.py +++ src/hg/utils/otto/civic/civicToBed.py @@ -1,20 +1,19 @@ # /// script # requires-python = ">=3.12" # dependencies = [ # "pandas", -# "certifi", # ] # [tool.uv] # exclude-newer = "2024-11-20T00:00:00Z" # /// """Download CIViC DB files and convert into bigBed12 tracks This script is meant to be run with `uv run civicToBed.py` which will create a reproducible Python environment to run. EXTERNAL DEPENDENCIES: this script depends on several kent binaries, as well as data files. See the sections below with comment titles "External Shell Command Dependencies" and "Local Data File Dependencies" @@ -64,61 +63,56 @@ """ from collections import defaultdict from contextlib import closing from copy import deepcopy import dataclasses import datetime import logging import os import ssl import subprocess from typing import Callable, Final, Generator, Sequence import urllib.request -import certifi import numpy as np import pandas as pd ## ## External Shell Command Dependencies ## BED_TO_BIG_BED_CMD: Final = "bedToBigBed" LIFT_OVER_CMD: Final = "liftOver" BED_SORT_CMD: Final = "bedSort" ## ## Local Data File Dependencies ## LIFTOVER_CHAINS: Final = [ ["hg19", "hg38", "/hive/data/gbdb/hg19/liftOver/hg19ToHg38.over.chain.gz"], ["hg38", "hg19", "/hive/data/gbdb/hg38/liftOver/hg38ToHg19.over.chain.gz"], ] GENCODE_UCSC_FN: Final = { "hg38": "/hive/data/genomes/hg38/bed/gencodeV47/build/ucscGenes.bed", "hg19": "/hive/data/genomes/hg19/bed/gencodeV47lift37/build/ucscGenes.bed", } CHROM_SIZES: Final = { "hg38": "/hive/data/genomes/hg38/chrom.sizes", "hg19": "/hive/data/genomes/hg19/chrom.sizes", } -## hgwdev has trouble with SSL without using certifi, so this ensures -## that certificates from PyPI are used -ssl_ctx = ssl.create_default_context(cafile=certifi.where()) - logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) DOWNLOAD_BASE_URL: Final = "https://civicdb.org/downloads" DATA_TABLES: Final = [ "MolecularProfileSummaries", "VariantSummaries", "ClinicalEvidenceSummaries", "AssertionSummaries", ] ## Maximum lengeth of a string (e.g. insAGCATGACCAG...) before ## being truncated and appended with an ellipsis MAX_VARIANT_LENGTH: Final = 20 @@ -604,31 +598,31 @@ doid == "", doid + "|" + df["disease"] ) return df def load_dataframes(table_dict: dict[str, str]) -> dict[str, pd.DataFrame]: """Load several dataframes. Input is a dict from name to the source path. Output is a dict from name to a Pandas DataFrame""" return {name: pd.read_csv(path, sep="\t") for name, path in table_dict.items()} def urlretrieve(url, filename): with closing(open(filename, "wb")) as outfile: - with closing(urllib.request.urlopen(url, context=ssl_ctx)) as instream: + with closing(urllib.request.urlopen(url)) as instream: outfile.write(instream.read()) def download_datadir( basedir: str, baseurl: str, dateslug: str, tablelist: list[str], overwrite: bool = True, ) -> dict[str, str]: dlpaths = {} # make directory dldir = os.path.join(basedir, dateslug) os.makedirs(dldir, exist_ok=True)