d93c426ef1ad5fbb32b754408599eaf380a199e5 max Tue Apr 21 13:34:58 2026 -0700 choriCloneEnds: reorganize danRer11 CHORI BAC clone end placements as a superTrack, refs #35059 - Rename ncbiCloneEndsCH1073 to choriCloneEnds throughout (trackDb, HTML, makeDoc, scripts dir, /hive and /gbdb layout). User-visible label is now "CHORI Clones" since all three libraries (CH1073, CH73, CH211) are CHORI/BACPAC BAC libraries; data source (NCBI Clone DB) is cited in Methods. - Wrap the existing CH1073 track in a choriCloneEnds superTrack and add two new subtracks built from the parallel unique_concordant GFFs at ftp.ncbi.nih.gov/repository/clone/reports/Danio_rerio/ : CH73 (99,141 placements, 23 oversize) CH211 (70,231 placements, 46 oversize) CH1073 is rebuilt with the same pipeline (210,777 placements). - Build all three bigBeds with -extraIndex=name and register searchTable / searchType bigBed stanzas with searchIndex name on each subtrack, so clone names (CH1073-100A1, CH73-1A1, CH211-1A1, ...) resolve from the Genome Browser position box. - Single shared HTML description page; Methods now links to the NCBI FTP source and to the UCSC makeDoc and scripts dir on GitHub. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> diff --git src/hg/makeDb/doc/danRer11/choriCloneEnds.txt src/hg/makeDb/doc/danRer11/choriCloneEnds.txt new file mode 100644 index 00000000000..23881b62686 --- /dev/null +++ src/hg/makeDb/doc/danRer11/choriCloneEnds.txt @@ -0,0 +1,36 @@ +# CHORI zebrafish BAC library clone end placements track, refs #35059 +# 2026-04-21 Claude max +# +# NCBI Clone DB publishes per-library "unique_concordant" placement GFFs +# under https://ftp.ncbi.nih.gov/repository/clone/reports/Danio_rerio/ , +# computed against GRCz11 (danRer11). We expose three CHORI libraries +# (CH73, CH211, CH1073) as a superTrack on danRer11. + +mkdir -p /hive/data/genomes/danRer11/bed/choriCloneEnds +cd /hive/data/genomes/danRer11/bed/choriCloneEnds + +# One-time: NCBI assembly report (col 7 RefSeq acc -> col 10 UCSC name). +curl -sS -o GCF_000002035.6.assembly.txt \ + 'https://ftp.ncbi.nih.gov/genomes/all/GCF/000/002/035/GCF_000002035.6_GRCz11/GCF_000002035.6_GRCz11_assembly_report.txt' + +~/kent/src/hg/makeDb/scripts/choriCloneEnds/refSeqNames.py \ + GCF_000002035.6.assembly.txt > refSeq.ucscName.tab +# 1923 mappings, all names present in /hive/data/genomes/danRer11/chrom.sizes + +# Build each library with the same converter + autoSql. The name column +# is indexed (-extraIndex=name) so clone names like CH1073-100A1 resolve +# from the browser position box via "searchIndex name" in the trackDb. +for LIB in CH1073 CH73 CH211; do + mkdir -p $LIB && cd $LIB + curl -sS -o $LIB.unique_concordant.gff \ + https://ftp.ncbi.nih.gov/repository/clone/reports/Danio_rerio/$LIB.GCF_000002035.6.105.unique_concordant.gff + ~/kent/src/hg/makeDb/scripts/choriCloneEnds/makeBed.py \ + ../refSeq.ucscName.tab /hive/data/genomes/danRer11/chrom.sizes \ + $LIB.unique_concordant.gff > $LIB.bed 2> makeBed.log + sort -k1,1 -k2,2n $LIB.bed > $LIB.sorted.bed + bedToBigBed -extraIndex=name -type=bed6+7 \ + -as=$HOME/kent/src/hg/makeDb/scripts/choriCloneEnds/cloneEnds.as \ + -tab $LIB.sorted.bed /hive/data/genomes/danRer11/chrom.sizes $LIB.bb + cd .. +done +# clone_insert row counts: CH1073 210777, CH73 99141, CH211 70231.