8faeb3cba60c7cb842bc17c17a57c9b53ef1b478 max Tue Apr 21 02:51:32 2026 -0700 ncbiCloneEndsCH1073: add NCBI CH1073 BAC library clone end placements track on danRer11, refs #35059 210,777 unique-concordant clone-insert placements from NCBI's CH1073 (RZPD-1073 / DanioKey) library clone report. Separate from the existing bacEndPairsLift (danRer4 -> danRer11 UCSC-BLAT lift), which is left in place. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> diff --git src/hg/makeDb/doc/danRer11/ncbiCloneEndsCH1073.txt src/hg/makeDb/doc/danRer11/ncbiCloneEndsCH1073.txt new file mode 100644 index 00000000000..a8631027908 --- /dev/null +++ src/hg/makeDb/doc/danRer11/ncbiCloneEndsCH1073.txt @@ -0,0 +1,34 @@ +# NCBI CH1073 clone end placements track, refs #35059 +# 2026-04-21 Claude max + +mkdir -p /hive/data/genomes/danRer11/bed/ncbiCloneEndsCH1073 +cd /hive/data/genomes/danRer11/bed/ncbiCloneEndsCH1073 + +# NCBI assembly report (has the UCSC-style name in column 10, so we just +# project col 7 (RefSeq accession) onto col 10 (UCSC name)) +curl -sS -o GCF_000002035.6.assembly.txt \ + 'https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/002/035/GCF_000002035.6_GRCz11/GCF_000002035.6_GRCz11_assembly_report.txt' + +# CH1073 unique_concordant placements (210777 clone_insert rows, ~48 MB) +curl -sS -o CH1073.unique_concordant.gff \ + 'https://ftp.ncbi.nlm.nih.gov/repository/clone/reports/Danio_rerio/CH1073.GCF_000002035.6.105.unique_concordant.gff' + +# RefSeq acc -> UCSC chrom name +~/kent/src/hg/makeDb/scripts/ncbiCloneEndsCH1073/refSeqNames.py \ + GCF_000002035.6.assembly.txt > refSeq.ucscName.tab +# 1923 mappings, all names present in /hive/data/genomes/danRer11/chrom.sizes + +# Parse GFF -> BED 6+7 (matches cloneEnds.as). All 210777 clone_insert rows +# map to UCSC names; 26 are flagged as oversize (insertSize > 500 kb). +~/kent/src/hg/makeDb/scripts/ncbiCloneEndsCH1073/makeBed.py \ + refSeq.ucscName.tab /hive/data/genomes/danRer11/chrom.sizes \ + CH1073.unique_concordant.gff \ + > ncbiCloneEndsCH1073.bed 2> makeBed.log + +# Sort and convert to bigBed +sort -k1,1 -k2,2n ncbiCloneEndsCH1073.bed > ncbiCloneEndsCH1073.sorted.bed +bedToBigBed -type=bed6+7 \ + -as=~/kent/src/hg/makeDb/scripts/ncbiCloneEndsCH1073/cloneEnds.as \ + -tab ncbiCloneEndsCH1073.sorted.bed \ + /hive/data/genomes/danRer11/chrom.sizes \ + danRer11.ncbiCloneEndsCH1073.bb