src/hg/makeDb/doc/nomLeu3.txt 4e02ac74b68e5c26a87c9514c977af727f1e3bf5

4e02ac74b68e5c26a87c9514c977af727f1e3bf5
hiram
  Thu May 26 12:56:09 2022 -0700
add ncbiRefSeq genes and cytoBand no redmine

diff --git src/hg/makeDb/doc/nomLeu3.txt src/hg/makeDb/doc/nomLeu3.txt
index 88b31ed..54cea9f 100644
--- src/hg/makeDb/doc/nomLeu3.txt
+++ src/hg/makeDb/doc/nomLeu3.txt
@@ -1,969 +1,1012 @@
 # for emacs: -*- mode: sh; -*-
 
 #       DATE:	20-Feb-2013
 #       ORGANISM:	Nomascus leucogenys
 #       TAXID:	61853
 #       ASSEMBLY LONG NAME:	Nleu_3.0
 #       ASSEMBLY SHORT NAME:	Nleu_3.0
 #       ASSEMBLY SUBMITTER:	Gibbon Genome Sequencing Consortium
 #       ASSEMBLY TYPE:	Haploid
 #       NUMBER OF ASSEMBLY-UNITS:	2
 #	ASSEMBLY ACCESSION:	GCA_000146795.3
 #	##Below is a 2 column list with assembly-unit id and name.
 #	##The Primary Assembly unit is listed first.
 #	GCA_000146805.2	Primary Assembly
 #	GCA_000231795.1	non-nuclear
 #       FTP-RELEASE DATE: 28-Dec-2012
 
 #       http://www.ncbi.nlm.nih.gov/genome/480
 #       http://www.ncbi.nlm.nih.gov/genome/assembly/GCF_000146795.2
 #       http://www.ncbi.nlm.nih.gov/bioproject/13975
 #       chrMt scaffolds included in the download directory
 
 #       http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=ADFV01
 #       Genome Coverage : 5.6x in Q20 bases
 
 #       http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=61853
 
 # rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Nomascus_leucogenys/Nleu_3.0/
 
 ##########################################################################
 # Download sequence (DONE - 2013-02-21 - Pauline)
     mkdir /hive/data/genomes/nomLeu3
     cd /hive/data/genomes/nomLeu3
     mkdir genbank
     cd genbank
     time rsync -a -P \
 rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Nomascus_leucogenys/Nleu_3.0/ ./
     #   real    real    22m54.139s
 
     # verify the size of the sequence here:
     faSize Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz \
         non-nuclear/unlocalized_scaffolds/FASTA/chrMT.unlocalized.scaf.fa.gz
     #   114532628 bases (13698051 N's 100834577 real 100834577 upper 0
     #   lower) in 15575 sequences in 2 files
     #   Total size: mean 7353.6 sd 41713.7
     #   min 782 (gi|350542783|gb|ADFV01197901.1|)
     #   max 2866069 (gi|306404542|gb|GL397432.1|) median 4542
 
 # and all together: (adding this for more complex assembly structure)
     faSize Primary_Assembly/assembled_chromosomes/FASTA/chr*.fa.gz \
 Primary_Assembly/unlocalized_scaffolds/FASTA/*.fa.gz \
 Primary_Assembly/unplaced_scaffolds/FASTA/*.fa.gz \
 non-nuclear/unlocalized_scaffolds/FASTA/chrMT.unlocalized.scaf.fa.gz
 
     #   2962077449 bases (205468402 N's 2756609047 real 2756609047 upper 0 lower)
     #   in 17492 sequences in 54 files
     #   Total size: mean 169339.0 sd 4348402.5
     #   min 782 (gi|350542783|gb|ADFV01197901.1|)
     #   max 163208435 (gi|429122739|gb|CM001648.1|) median 4779
 
 ##########################################################################
 # fixup names for UCSC standards (DONE - 2013-02-22 - Pauline)
     mkdir /hive/data/genomes/nomLeu3/ucsc
     cd /hive/data/genomes/nomLeu3/ucsc
 
     ########################  Assembled Chromosomes
     cat << '_EOF_' > ucscCompositeAgp.pl
 #!/bin/env perl
 
 use strict;
 use warnings;
 
 my %accToChr;
 
 open (FH, "<../genbank/Primary_Assembly/assembled_chromosomes/chr2acc") or
         die "can not read Primary_Assembly/assembled_chromosomes/chr2acc";
 while (my $line = <FH>) {
     next if ($line =~ m/^#/);
     chomp $line;
     my ($chrN, $acc) = split('\s+', $line);
     $accToChr{$acc} = $chrN;
 }
 close (FH);
 
 foreach my $acc (keys %accToChr) {
     my $chrN =  $accToChr{$acc};
     print "$acc $accToChr{$acc}\n";
     open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/AGP/chr${chrN}.comp.agp.gz|") or die "can not read chr${chrN}.comp.agp.gz";
     open (UC, ">chr${chrN}.agp") or die "can not write to chr${chrN}.agp";
     while (my $line = <FH>) {
         if ($line =~ m/^#/) {
             print UC $line;
         } else {
             $line =~ s/^$acc/chr${chrN}/;
             print UC $line;
         }
     }
     close (FH);
     close (UC);
     open (FH, "zcat ../genbank/Primary_Assembly/assembled_chromosomes/FASTA/chr${chrN}.fa.gz|") or die "can not read chr${chrN}.fa.gz";
     open (UC, ">chr${chrN}.fa") or die "can not write to chr${chrN}.fa";
     while (my $line = <FH>) {
         if ($line =~ m/^>/) {
             printf UC ">chr${chrN}\n";
         } else {
             print UC $line;
         }
     }
     close (FH);
     close (UC);
 }
 '_EOF_'
 
     # << happy emacs
     chmod +x ucscCompositeAgp.pl
     time ./ucscCompositeAgp.pl
 # 48.327u 9.322s 0:47.74 120.7%	0+0k 0+0io 0pf+0w
     time gzip *.fa *.agp
 # 744.838u 3.232s 12:37.06 98.8%	0+0k 0+0io 0pf+0w
     faSize chr*.fa.gz
 #   2847562091 bases (191770351 N's 2655791740 real 2655791740 upper 0 lower)
 #   in 1925 sequences in 53 files
 #   Total size: mean 1479253.0 sd 13036646.3 min 782
 #   (chrM_ADFV01197901_random) max 163208435 (chr2) median 10860
 
 
     ########################  Unplaced scaffolds
     # verify we don't have any .acc numbers different from .1
     zcat \
     ../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz \
 	| cut -f1 | egrep "^GL|^ADFV" \
 	| sed -e 's/^GL[0-9][0-9]*//; s/^ADFV[0-9][0-9]*//' | sort | uniq -c
     #   33196 .1
     #   375 .2
 
 #   Found .1s and .2s so will need to modify the script below to rename both
 #   types of item.
 
 
     cat << '_EOF_' > unplaced.pl
 #!/bin/env perl
 
 use strict;
 use warnings;
 
 my $agpFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz";
 my $fastaFile =  "../genbank/Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz";
 open (FH, "zcat $agpFile|") or die "can not read $agpFile";
 open (UC, ">unplaced.agp") or die "can not write to unplaced.agp";
 while (my $line = <FH>) {
     if ($line =~ m/^#/) {
         print UC $line;
     } else {
         $line =~ s/\./_/;
         printf UC "chrUn_%s", $line;
     }
 }
 close (FH);
 close (UC);
 
 open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
 open (UC, ">unplaced.fa") or die "can not write to unplaced.fa";
 while (my $line = <FH>) {
     if ($line =~ m/^>/) {
         chomp $line;
         $line =~ s/.*gb\|//;
         $line =~ s/\.1\|.*//;
         printf UC ">chrUn_$line\n";
     } else {
         print UC $line;
     }
 }
 close (FH);
 close (UC);
 '_EOF_'
     # << happy emacs
     chmod +x unplaced.pl
     time ./unplaced.pl
 #2.422u 0.470s 0:02.61 110.7%	0+0k 0+0io 0pf+0w
 
     # make sure none of the names got to be over 31 characers long:
     grep -v "^#" unplaced.agp | cut -f1 | awk '{print length($1)}' | sort -rn | head -1
     gzip *.fa *.agp
     # not much in that sequence:
     faSize unplaced.fa.gz
 #   114515358 bases (13698051 N's 100817307 real 100817307 upper 0 lower)
 #   in 15567 sequences in 1 files
 #   Total size: mean 7356.3 sd 41724.2
 #   min 2496 (chrUn_ADFV01161919) max 2866069 (chrUn_GL397432) median 4543
 
     ########################  Unlocalized scaffolds
     cat << '_EOF_' > unlocalizedCHRM.pl
 #!/bin/env perl
 
 use strict;
 use warnings;
 
 my %accToChr;
 my %chrNames;
 
 open (FH, "<../genbank/non-nuclear/unlocalized_scaffolds/unlocalized.chr2scaf") or
         die "can not read non-nuclear/unlocalized_scaffolds/unlocalized.chr2scaf";
 while (my $line = <FH>) {
     next if ($line =~ m/^#/);
     chomp $line;
     my ($chrN, $acc) = split('\s+', $line);
     $accToChr{$acc} = $chrN;
     $chrNames{$chrN} += 1;
 }
 close (FH);
 
 foreach my $chrN (keys %chrNames) {
     my $agpFile =  "../genbank/non-nuclear/unlocalized_scaffolds/AGP/chr$chrN.unlocalized.scaf.agp.gz";
     my $fastaFile =  "../genbank/non-nuclear/unlocalized_scaffolds/FASTA/chr$chrN.unlocalized.scaf.fa.gz";
     open (FH, "zcat $agpFile|") or die "can not read $agpFile";
     open (UC, "|sed -e 's/chrMT/chrM/g;' | gzip -c >chr${chrN}_random.agp.gz") or die "can not write to chr${chrN}_random.agp";
     while (my $line = <FH>) {
         if ($line =~ m/^#/) {
             print UC $line;
         } else {
             chomp $line;
             my (@a) = split('\t', $line);
             my $acc = $a[0];
             my $accNo1 = $acc;
             $accNo1 =~ s/.1$//;
             die "ERROR: acc not .1: $acc" if ($accNo1 =~ m/\./);
             die "ERROR: chrN $chrN not correct for $acc"
                 if ($accToChr{$acc} ne $chrN);
             my $ucscName = "chr${chrN}_${accNo1}_random";
             printf UC "%s", $ucscName;
             for (my $i = 1; $i < scalar(@a); ++$i) {
                 printf UC "\t%s", $a[$i];
             }
             printf UC "\n";
         }
     }
     close (FH);
     close (UC);
     printf "chr%s\n", $chrN;
     open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
     open (UC, "|sed -e 's/chrMT/chrM/g' | gzip -c >chr${chrN}_random.fa.gz") or die "can not write to chr${chrN}_random.fa";
     while (my $line = <FH>) {
         if ($line =~ m/^>/) {
             chomp $line;
             my $acc = $line;
             $acc =~ s/.*gb\|//;
             $acc =~ s/\|.*//;
             my $accNo1 = $acc;
             $accNo1 =~ s/.1$//;
             die "ERROR: acc not .1: $acc" if ($accNo1 =~ m/\./);
             die "ERROR: chrN $chrN not correct for $acc"
                 if ($accToChr{$acc} ne $chrN);
             my $ucscName = "chr${chrN}_${accNo1}_random";
             printf UC ">$ucscName\n";
         } else {
             print UC $line;
         }
     }
     close (FH);
     close (UC);
 }
 '_EOF_'
 
     cat << '_EOF_' > unlocalized.pl
 #!/bin/env perl
 
 use strict;
 use warnings;
 
 my %accToChr;
 my %chrNames;
 
 open (FH, "<../genbank/Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf") or
         die "can not read Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf";
 while (my $line = <FH>) {
     next if ($line =~ m/^#/);
     chomp $line;
     my ($chrN, $acc) = split('\s+', $line);
     $accToChr{$acc} = $chrN;
     $chrNames{$chrN} += 1;
 }
 close (FH);
 
 foreach my $chrN (keys %chrNames) {
     my $agpFile =  "../genbank/Primary_Assembly/unlocalized_scaffolds/AGP/chr$chrN.unlocalized.scaf.agp.gz";
     my $fastaFile =  "../genbank/Primary_Assembly/unlocalized_scaffolds/FASTA/chr$chrN.unlocalized.scaf.fa.gz";
     open (FH, "zcat $agpFile|") or die "can not read $agpFile";
     open (UC, ">chr${chrN}_random.agp") or die "can not write to chr${chrN}_random.agp";
     while (my $line = <FH>) {
         if ($line =~ m/^#/) {
             print UC $line;
         } else {
             chomp $line;
             my (@a) = split('\t', $line);
             my $acc = $a[0];
             my $accNo1 = $acc;
             $accNo1 =~ s/\./_/;
             die "ERROR: chrN $chrN not correct for $acc"
                 if ($accToChr{$acc} ne $chrN);
             my $ucscName = "chr${chrN}_${accNo1}_random";
             printf UC "%s", $ucscName;
             for (my $i = 1; $i < scalar(@a); ++$i) {
                 printf UC "\t%s", $a[$i];
             }
             printf UC "\n";
         }
     }
     close (FH);
     close (UC);
     printf "chr%s\n", $chrN;
     open (FH, "zcat $fastaFile|") or die "can not read $fastaFile";
     open (UC, ">chr${chrN}_random.fa") or die "can not write to chr${chrN}_random.fa";
     while (my $line = <FH>) {
         if ($line =~ m/^>/) {
             chomp $line;
             my $acc = $line;
             $acc =~ s/.*gb\|//;
             $acc =~ s/\|.*//;
             my $accNo1 = $acc;
             $accNo1 =~ s/\./_/;
             die "ERROR: chrN $chrN not correct for $acc"
                 if ($accToChr{$acc} ne $chrN);
             my $ucscName = "chr${chrN}_${accNo1}_random";
             printf UC ">$ucscName\n";
         } else {
             print UC $line;
         }
     }
     close (FH);
     close (UC);
 }
 '_EOF_'
 
 
     # << happy emacs
     chmod +x unlocalizedCHRM.pl
     time ./unlocalizedCHRM.pl
 
     chmod +x unlocalized.pl
     time ./unlocalized.pl
 
     mv chrMT_random.fa.gz chrM_random.fa.gz
     mv chrMT_random.agp.gz chrM_random.agp.gz
 
     gzip *.fa *.agp
 
     # verify nothing lost from original:
     time faSize *.fa.gz
 
     #  2962077449 bases (205468402 N's 2756609047 real 2756609047 upper 0 lower)
     #  in 17492 sequences in 54 files
     #  Total size: mean 169339.0 sd 4348402.5
     #  min 782 (chrM_ADFV01197901_random) max 163208435 (chr2) median 4779
 
     #   2936052603 bases (179443556 N's 2756609047 real 2756609047 upper 0
     #   lower) in 17976 sequences in 2 files
     #   Total size: mean 163331.8 sd 2015356.2
     #   min 782 (chrM_ADFV01197901_random) max 74231199 (GL397261) median 4849
 
     # make sure none of the names got to be over 31 characers long:
     zcat *.agp.gz | grep -v "^#" | cut -f1 | awk '{print length($0)}' \
         | sort -rn | uniq -c | head
 
 ##########################################################################
 # Initial makeGenomeDb.pl (DONE - 2013-02-22 - Pauline)
     cd /hive/data/genomes/nomLeu3
     # mitoAcc - chrMt sequence is included in the download files
 
     cp ../nomLeu2/nomLeu2.config.ra .
     mv nomLeu2.config.ra nomLeu3.config.ra
 
 
 # Config parameters for makeGenomeDb.pl:
 db nomLeu3
 clade mammal
 genomeCladePriority 13
 scientificName Nomascus leucogenys
 commonName Gibbon
 assemblyDate Oct. 2012
 assemblyLabel Gibbon Genome Sequencing Consortium
 assemblyShortLabel GGSC Nleu3.0
 orderKey 328
 mitoAcc none
 fastaFiles /hive/data/genomes/nomLeu3/ucsc/*.fa.gz
 agpFiles /hive/data/genomes/nomLeu3/ucsc/*.agp.gz
 # qualFiles none
 dbDbSpeciesDir gibbon
 photoCreditURL http://www.genome.gov/pressDisplay.cfm?photoID=74
 photoCreditName Jim Zuckerman, Gibbon Conservation Center
 ncbiGenomeId 480
 ncbiAssemblyId 506498
 ncbiAssemblyName Nleu_3.0
 ncbiBioProject 13975
 genBankAccessionID GCA_000146795.2
 taxId 61853
 
 
     # << happy emacs
 
     # verify sequence and agp are OK
     time makeGenomeDb.pl -workhorse=hgwdev -fileServer=hgwdev -dbHost=hgwdev \
         -stop=dbDb nomLeu3.config.ra >& dbDb.log
     #first run failed part way through, resumed run from part way through
 
     time makeGenomeDb.pl -workhorse=hgwdev -fileServer=hgwdev -dbHost=hgwdev \
         -continue=agp nomLeu3.config.ra >& agp.log
     #   real    2m4.625s
     #
     # verify OK:
     tail -1 agp.log
     #   *** All done!  (through the 'agp' step)
 
     #	add the trackDb entries to the source tree, and the 2bit link:
     ln -s `pwd`/nomLeu3.unmasked.2bit /gbdb/nomLeu3/nomLeu3.2bit
     #	browser should function now in sandbox
     #   trackDb files here:
     #   /hive/data/genomes/nomLeu3/TemporaryTrackDbCheckout/kent/src/hg/makeDb/trackDb/gibbon/nomLeu3/
     #   into source tree
     #   now browser should function on hgwdev
 
 #########################################################################
 # running repeat masker (DONE - 2013-03-04 - Pauline)
     mkdir /hive/data/genomes/nomLeu3/bed/repeatMasker
     cd /hive/data/genomes/nomLeu3/bed/repeatMasker
     time doRepeatMasker.pl -buildDir=`pwd` -noSplit \
 	-bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \
 	-smallClusterHub=encodek nomLeu3 >& do.log
 
     time doRepeatMasker.pl -buildDir=`pwd` -noSplit \
 	-bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \
 	-smallClusterHub=encodek -continue=cat nomLeu3 >& cat.log
 
     cat faSize.rmsk.txt
     #   2936052603 bases (179443556 N's 2756609047 real 1339535283 upper
     #   1417073764 lower) in 17976 sequences in 1 files
     #   Total size: mean 163331.8 sd 2015356.2
     #   min 782 (chrM_ADFV01197901_random) max 74231199 (GL397261) median 4849
     #   %48.26 masked total, %51.41 masked real
 
     egrep -i "versi|relea" do.log
     #    April 26 2011 (open-3-3-0) version of RepeatMasker
     # CC   RELEASE 20110920;                                            *
     # RepeatMasker version development-$Id: RepeatMasker,v 1.26 2011/09/26 16:19:44 angie Exp $
 
     time featureBits -countGaps nomLeu3 rmsk
     #   1409756364 bases of 2962077449 (47.594%) in intersection
     #   real	0m33.871s
 
     # why is it different than the faSize above ?
     # because rmsk masks out some N's as well as bases, the count above
     #	separates out the N's from the bases, it doesn't show lower case N's
 
 ##########################################################################
 # running simple repeat (DONE - 2013-03-04 - Pauline)
     mkdir /hive/data/genomes/nomLeu3/bed/simpleRepeat
     cd /hive/data/genomes/nomLeu3/bed/simpleRepeat
     time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
 	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \
 	nomLeu3 >& do.log
 
     # continuing:
     time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
 	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \
 	-continue=filter nomLeu3 >& filter.log
     #   real    0m53.367s
 
     cat fb.simpleRepeat
     #   123289144 bases of 2756609047 (4.472%) in intersection
 
     # add to rmsk after it is done:
     cd /hive/data/genomes/nomLeu3
     twoBitMask nomLeu3.rmsk.2bit \
 	-add bed/simpleRepeat/trfMask.bed nomLeu3.2bit
     #	you can safely ignore the warning about fields >= 13
 
     twoBitToFa nomLeu3.2bit stdout | faSize stdin > faSize.nomLeu3.2bit.txt
     cat faSize.nomLeu3.2bit.txt
 
     #   2962077449 bases (205468402 N's 2756609047 real 1346803840
     #   upper 1409805207 lower) in 17492 sequences in 1 files
     #   Total size: mean 169339.0 sd 4348402.5
     #   min 782 (chrM_ADFV01197901_random) max 163208435 (chr2) median 4779
     #   %47.60 masked total, %51.14 masked real
 
     rm /gbdb/nomLeu3/nomLeu3.2bit
     ln -s `pwd`/nomLeu3.2bit /gbdb/nomLeu3/nomLeu3.2bit
 
 #########################################################################
 # Verify all gaps are marked, add any N's not in gap as type 'other'
 #	(REDONE - 2013-03-01 - Pauline)
 
     mkdir /hive/data/genomes/nomLeu3/bed/gap
     cd /hive/data/genomes/nomLeu3/bed/gap
 
     time nice findMotif -motif=gattaca -verbose=4 \
 	-strand=+ ../../nomLeu3.unmasked.2bit >& findMotif.txt
     #   real	1m2.760s
 
     grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
     time featureBits nomLeu3 -not gap -bed=notGap.bed
     #2756609047 bases of 2756609047 (100.000%) in intersection
 
     time featureBits nomLeu3 allGaps.bed notGap.bed -bed=new.gaps.bed
     #0 bases of 2756609047 (0.000%) in intersection
 
     # are there non-bridged gaps here:
     hgsql -N -e "select bridge from gap;" nomLeu3 | sort | uniq -c
 
     #     92 no
     # 180376 yes
 
 ##########################################################################
 ## WINDOWMASKER (DONE- 2013-03-27 - Pauline)
     mkdir /hive/data/genomes/nomLeu3/bed/windowMasker
     cd /hive/data/genomes/nomLeu3/bed/windowMasker
     time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
 	-dbHost=hgwdev nomLeu3 >& do.log
 
     # Masking statistics
     twoBitToFa nomLeu3.wmsk.2bit stdout | faSize stdin
     #   2962077449 bases (205468402 N's 2756609047 real 1711898047 upper
     #   1044711000 lower) in 17492 sequences in 1 files
     #   Total size: mean 169339.0 sd 4348402.5 min 782
     #   (chrM_ADFV01197901_random) max 163208435 (chr2) median 4779
     #   %35.27 masked total, %37.90 masked real
 
     twoBitToFa nomLeu3.wmsk.sdust.2bit stdout | faSize stdin
     #   2962077449 bases (205468402 N's 2756609047 real 1697032536 upper
     #   1059576511 lower) in 17492 sequences in 1 files
     #   Total size: mean 169339.0 sd 4348402.5 min 782
     #   (chrM_ADFV01197901_random) max 163208435 (chr2) median 4779
     #   %35.77 masked total, %38.44 masked real
 
     hgLoadBed nomLeu3 windowmaskerSdust windowmasker.sdust.bed.gz
     #   Read 15229701 elements of size 3 from windowmasker.sdust.bed.gz
 
     featureBits -countGaps nomLeu3 windowmaskerSdust
     #   1059576511 bases of 2962077449 (35.771%) in intersection
 
     #	eliminate the gaps from the masking
     time featureBits nomLeu3 -not gap -bed=notGap.bed
     #   2756609047 bases of 2756609047 (100.000%) in intersection
     #   real	0m20.812s
 
     time nice -n +19 featureBits nomLeu3 windowmaskerSdust notGap.bed \
         -bed=stdout | gzip -c > cleanWMask.bed.gz
     #   1059576532 bases of 2756609047 (38.438%) in intersection
     #   real    11m9.332s
 
     #	reload track to get it clean
     hgLoadBed nomLeu3 windowmaskerSdust cleanWMask.bed.gz
     #   Read 15242903 elements of size 4 from cleanWMask.bed.gz
     featureBits -countGaps nomLeu3 windowmaskerSdust
     #   1059576532 bases of 2936052603 (36.088%) in intersection
     #   real    1m38.313s
     zcat cleanWMask.bed.gz \
 	| twoBitMask ../../nomLeu3.unmasked.2bit stdin \
 	    -type=.bed nomLeu3.cleanWMSdust.2bit
     twoBitToFa nomLeu3.cleanWMSdust.2bit stdout | faSize stdin \
         > nomLeu3.cleanWMSdust.faSize.txt
     cat nomLeu3.cleanWMSdust.faSize.txt
     #   2962077449 bases (205468402 N's 2756609047 real 1697032536 upper
     #   1059576511 lower) in 17492 sequences in 1 files
     #   Total size: mean 169339.0 sd 4348402.5 min 782
     #   (chrM_ADFV01197901_random) max 163208435 (chr2) median 4779
     #   %35.77 masked total, %38.44 masked real
 
     # how much does this window masker and repeat masker overlap:
     featureBits -countGaps nomLeu3 rmsk windowmaskerSdust
     #   833347944 bases of 2962077449 (28.134%) in intersection
 
 ########################################################################
 # cpgIslands - (DONE - 2013-03-05 - Pauline)
     mkdir /hive/data/genomes/nomLeu3/bed/cpgIslands
     cd /hive/data/genomes/nomLeu3/bed/cpgIslands
     /usr/bin/time -p doCpgIslands.pl nomLeu3 >& do.log
     #   NomLeu2 run elapsed time: 61m44s (couldn't recover time from this run)
 
     cat fb.nomLeu3.cpgIslandExt.txt
     #   17807990 bases of 2756609047 (0.646%) in intersection
 
 #########################################################################
 # genscan - (DONE - 2013-03-05 - Pauline)
     mkdir /hive/data/genomes/nomLeu3/bed/genscan
     cd /hive/data/genomes/nomLeu3/bed/genscan
     /usr/bin/time -p doGenscan.pl nomLeu3 >& do.log
     #   NomLeu2 run elapsed time: 66m40s (couldn't recover time from this run)
 
     cat fb.nomLeu3.genscan.txt
     #   49283434 bases of 2756609047 (1.788%) in intersection
     cat fb.nomLeu3.genscanSubopt.txt
     #   50711544 bases of 2756609047 (1.840%) in intersection
 
 #########################################################################
 # MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2013-03-05 - Pauline)
     # Use -repMatch=900, based on size -- for human we use 1024
     # use the "real" number from the faSize measurement,
     # hg19 is 2897316137, calculate the ratio factor for 1024:
     calc \( 2756609047 / 2897316137 \) \* 1024
 
     #   nomLeu2 numbers:
     #	( 2756609047 / 2897316137 ) * 1024 = 974.269818
 
     # round up to 1000 (nomLeu1 was 900)
 
     cd /hive/data/genomes/nomLeu3
     /usr/bin/time -p blat nomLeu3.2bit /dev/null /dev/null -tileSize=11 \
       -makeOoc=jkStuff/nomLeu3.11.ooc -repMatch=1000
     #   Wrote 30230 overused 11-mers to jkStuff/nomLeu3.11.ooc
     #   real 87.92
     #   nomLeu2 was: Wrote 30203 overused 11-mers to jkStuff/nomLeu3.11.ooc
 
     # there are non-bridged gaps, no lift file needed for genbank
     hgsql -N -e "select bridge from gap;" nomLeu3 | sort | uniq -c
     #   92 no
     #   180376 yes
     cd /hive/data/genomes/nomLeu3/jkStuff
     gapToLift nomLeu3 nomLeu3.nonBridged.lift -bedFile=nomLeu3.nonBridged.bed
     # largest non-bridged contig:
     awk '{print $3-$2,$0}' nomLeu3.nonBridged.bed | sort -nr | head
     #   106279408 chrX  34971740        141251148       chrX.01
 
 #########################################################################
 # AUTO UPDATE GENBANK (DONE - 2013-03-08 - Pauline)
     # examine the file:
     /cluster/data/genbank/data/organism.lst
     # for your species to see what counts it has for:
 # organism       mrnaCnt estCnt  refSeqCnt
 #Nomascus leucogenys     18      1       0
 
     # to decide which "native" mrna or ests you want to specify in genbank.conf
 
     ssh hgwdev
     cd $HOME/kent/src/hg/makeDb/genbank
     git pull
     # edit etc/genbank.conf to add nomLeu3 just after ce2
 
 # nomLeu3 (Gibbon)
 nomLeu3.serverGenome = /hive/data/genomes/nomLeu3/nomLeu3.2bit
 nomLeu3.clusterGenome = /hive/data/genomes/nomLeu3/nomLeu3.2bit
 nomLeu3.ooc = /hive/data/genomes/nomLeu3/nomLeu3.11.ooc
 nomLeu3.lift = /hive/data/genomes/nomLeu3/jkStuff/nomLeu3.nonBridged.lift
 nomLeu3.refseq.mrna.native.pslCDnaFilter  = ${finished.refseq.mrna.native.pslCDnaFilter}
 nomLeu3.refseq.mrna.xeno.pslCDnaFilter    = ${finished.refseq.mrna.xeno.pslCDnaFilter}
 nomLeu3.genbank.mrna.native.pslCDnaFilter = ${finished.genbank.mrna.native.pslCDnaFilter}
 nomLeu3.genbank.mrna.xeno.pslCDnaFilter   = ${finished.genbank.mrna.xeno.pslCDnaFilter}
 nomLeu3.genbank.est.native.pslCDnaFilter  = ${finished.genbank.est.native.pslCDnaFilter}
 nomLeu3.genbank.est.xeno.pslCDnaFilter    = ${finished.genbank.est.xeno.pslCDnaFilter}
 nomLeu3.refseq.mrna.native.load  = no
 nomLeu3.refseq.mrna.xeno.load = yes
 nomLeu3.genbank.mrna.native.load = no
 nomLeu3.genbank.mrna.xeno.load = yes
 nomLeu3.genbank.est.native.load = no
 nomLeu3.downloadDir = nomLeu3
 nomLeu3.perChromTables = no
 
     # end of section added to etc/genbank.conf
     git commit -m "adding nomLeu3 Gibbon refs #9812" etc/genbank.conf
     git push
     make etc-update
 
     ssh hgwdev			# used to do this on "genbank" machine
     screen -S nomLeu3           # long running job managed in screen
     cd /cluster/data/genbank
 /usr/bin/time -p nice ./bin/gbAlignStep -initial nomLeu3 &
 #  logFile: var/build/logs/2013.03.06-11:36:35.nomLeu3.initalign.log
 #	 var/build/logs/2012.05.07-10:21:12.nomLeu3.initalign.log
 #   real    2135m50.446s
 
     # load database when finished
     ssh hgwdev
     cd /cluster/data/genbank
 
     /usr/bin/time -p nice ./bin/gbDbLoadStep -drop -initialLoad nomLeu3 &
     #   logFile: var/dbload/hgwdev/logs/2013.03.08-09:52:00.dbload.log
    #   real 2453.76
 
     # enable daily alignment and update of hgwdev (DONE - 2013-03-08 - Pauline)
     cd ~/kent/src/hg/makeDb/genbank
     git pull
     # add nomLeu3 to:
     vi etc/align.dbs etc/hgwdev.dbs
     git commit -m "Added nomLeu3. refs #9812" etc/align.dbs etc/hgwdev.dbs
     git push
     make etc-update
 
 #########################################################################
 # set default position to RHO gene displays  (DONE - 2013-03-08 -Pauline)
     hgsql -e \
 'update dbDb set defaultPos="chr21:52,409,568-52,414,485" where name="nomLeu3";' \
 	hgcentraltest
     # a bit wider for a little more context (2013-03-28 - Hiram)
     hgsql -e \
 'update dbDb set defaultPos="chr21:52408339-52415715" where name="nomLeu3";' \
 	hgcentraltest
 
 ############################################################################
 # downloads and pushQ entry (DONE - 2013-03-08 - Pauline)
     # after adding nomLeu3 to the all.joiner file and verifying that
     #   joinerCheck is clean (i.e. run joinerCheck w -times and -keys flags
     #   to make sure there are no errors), can construct the downloads:
     cd /hive/data/genomes/nomLeu3
     /usr/bin/time -p makeDownloads.pl -workhorse=hgwdev nomLeu3
     #   real 1400.59
 
     mkdir /hive/data/genomes/nomLeu3/pushQ
     cd /hive/data/genomes/nomLeu3/pushQ
     # Mark says don't let the transMap track get there
     time makePushQSql.pl nomLeu3 2> stderr.txt > nomLeu3.sql
     #  real	2m55.404s
 
     # check the stderr.txt for bad stuff, these kinds of warnings are OK:
 # WARNING: hgwdev does not have /gbdb/nomLeu3/wib/gc5Base.wib
 # WARNING: hgwdev does not have /gbdb/nomLeu3/wib/quality.wib
 # WARNING: hgwdev does not have /gbdb/nomLeu3/bbi/quality.bw
 # WARNING: nomLeu3 does not have seq
 # WARNING: nomLeu3 does not have extFile
 # WARNING: nomLeu3 does not have estOrientInfo
 
     scp -p nomLeu3.sql hgwbeta:/tmp/
     ssh hgwbeta "hgsql qapushq < /tmp/nomLeu3.sql"
 
 
 ############################################################################
 # construct liftOver to nomLeu3 (DONE - 2013-03-07 - Pauline)
     screen -S lift	# manage this longish running job in a screen
     mkdir /hive/data/genomes/nomLeu2/bed/blat.nomLeu3.2013-03-06
     cd /hive/data/genomes/nomLeu2/bed/blat.nomLeu3.2013-03-06
     # check it with -debug first to see if it is going to work:
     /usr/bin/time -p doSameSpeciesLiftOver.pl -buildDir=`pwd` -bigClusterHub=swarm \
 	-ooc=/hive/data/genomes/nomLeu2/jkStuff/nomLeu2.11.ooc \
 	-debug -dbHost=hgwdev -workhorse=hgwdev nomLeu2 nomLeu3
     #   real    0m1.838s
     # if that is OK, then run it:
     /usr/bin/time -p doSameSpeciesLiftOver.pl -buildDir=`pwd` -bigClusterHub=swarm \
 	-ooc=/hive/data/genomes/nomLeu2/jkStuff/nomLeu2.11.ooc \
 	-dbHost=hgwdev -workhorse=hgwdev nomLeu2 nomLeu3 >& do.log
     #	real    277m26.138s
 
     # verify this file exists:
     #	/gbdb/nomLeu2/liftOver/nomLeu2.over.chain.gz
     # and try out the conversion on genome-test from nomLeu2 to nomLeu3
 
 ############################################################################
  #  BLATSERVERS ENTRY (DONE - 2013-03-12 - Pauline)
  #	After getting a blat server assigned by the Blat Server Gods,
      ssh hgwdev
 
      hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
  	VALUES ("nomLeu3", "blat4c", "17840", "1", "0"); \
  	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
  	VALUES ("nomLeu3", "blat4c", "17841", "0", "1");' \
  	    hgcentraltest
      #	test it with some sequence
 
 ############################################################################
 # reset dbDb description to be like the previous nomLeu
 #       (DONE - 2013-03-21 - Hiram)
     hgsql hgcentraltest -e 'update dbDb set description = "Oct. 2012 (GGSC Nleu3.0/nomLeu3)"" where name = "nomLeu3";' \
           | hgsql -h genome-testdb hgcentraltest
 
 ############################################################################
 # SWAP hg19/Human chain/net (DONE - 2013-03-26 - Pauline)
     # original alignment
     cd /hive/data/genomes/hg19/bed/lastzNomLeu3.2013-03-06
     cat fb.hg19.chainNomLeu3Link.txt
     #   2542790081 bases of 2897316137 (87.764%) in intersection
 
     #   running this swap - DONE - 2013-03-26
     mkdir /hive/data/genomes/nomLeu3/bed/blastz.hg19.swap
     cd /hive/data/genomes/nomLeu3/bed/blastz.hg19.swap
     time nice -n +19 doBlastzChainNet.pl -verbose=2 \
         /hive/data/genomes/hg19/bed/lastzNomLeu3.2013-03-06/DEF \
         -swap -syntenicNet \
         -noLoadChainSplit -chainMinScore=5000 -chainLinearGap=medium \
         -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
         > swap.log 2>&1 &
     #   real   138m48s
     cat fb.nomLeu3.chainHg19Link.txt
     #   2479386532 bases of 2756609047 (89.943%) in intersection
 
 ##############################################################################
 # create ucscToRefSeq name mapping (DONE - 2017-12-14 - Hiram)
     mkdir /hive/data/genomes/nomLeu3/bed/ucscToRefSeq
     cd /hive/data/genomes/nomLeu3/bed/ucscToRefSeq
 
     # run up idKeys for refseq release
     mkdir refseq
     cd refseq
     ln -s /hive/data/outside/ncbi/genomes/refseq/vertebrate_mammalian/Nomascus_leucogenys/all_assembly_versions/GCF_000146795.2_Nleu_3.0/GCF_000146795.2_Nleu_3.0_genomic.fna.gz .
     faToTwoBit *.fna.gz refseq.nomLeu3.2bit
     doIdKeys.pl -buildDir=`pwd` -twoBit=`pwd`/refseq.nomLeu3.2bit refseqNomLeu3
     # real    8m48.219s
 
     cd /hive/data/genomes/nomLeu3/bed/ucscToRefSeq
     join -t$'\t' ../idKeys/nomLeu3.idKeys.txt refseq/refseqNomLeu3.idKeys.txt \
       | cut -f2- | sort > ucsc.refseq.tab
 
     join -t$'\t' <(sort ../../chrom.sizes) ucsc.refseq.tab \
         | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' \
           | sort -k1,1 -k2,2n > ucscToRefSeq.bed
 
     # maximum size of UCSC chrom name for SQL index
     export SZ=`cut -f1 ucscToRefSeq.bed | awk '{print length($0)}' | sort -n | tail -1`
     # SZ: 28
 
     sed -e "s/21/$SZ/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
         | hgLoadSqlTab nomLeu3 ucscToRefSeq stdin ucscToRefSeq.bed
 
     checkTableCoords nomLeu3 ucscToRefSeq
 
     featureBits -countGaps nomLeu3 ucscToRefSeq
     # 2962060179 bases of 2962077449 (99.999%) in intersection
 
     # the missing bits are the chrM scaffolds:
     calc 2962077449 - 2962060179
     # 2962077449 - 2962060179 = 17270.000000
 
     grep chrM ../../chrom.sizes | ave -col=2 stdin | grep total
     # total 17270.000000
 
 ##############################################################################
 # create ucscToINSDC name mapping (DONE - 2013-08-16 - Hiram)
     # this allows the "ensembl" blue bar button to appear
     mkdir /hive/data/genomes/nomLeu3/bed/ucscToINSDC
     cd /hive/data/genomes/nomLeu3/bed/ucscToINSDC
 
     cat << '_EOF_' > translateNames.sh
 #!/bin/sh
 
 export chrUN=""
 export chrMT=""
 
 runAll() {
 
 
 if [ -s ../../genbank/Primary_Assembly/assembled_chromosomes/chr2acc ]; then
   grep -v "^#" ../../genbank/Primary_Assembly/assembled_chromosomes/chr2acc \
      | sed -e 's/^/chr/'
 fi
 
 unCount=`grep chrUn_ ../../chrom.sizes | wc -l`
 if [ $unCount -gt 0 ]; then
    chrUN="chrUn_"
 fi
 
 if [ -s ../../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz ]; then
   zcat ../../genbank/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz | grep -v "^#" | cut -f1 | sort -u \
      | sed -e "s/^\([A-Za-z0-9]*\).\([0-9]*\)/${chrUN}\1_\2\t\1.\2/;"
 fi
 
 if [ -s ../../genbank/Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf ]; then
 grep -v "^#" \
   ../../genbank/Primary_Assembly/unlocalized_scaffolds/unlocalized.chr2scaf \
     | sed -e 's/^\([A-Za-z0-9]*\)\t\([A-Za-z0-9]*\).\([0-9]*\)/chr\1_\2_\3_random\t\2.\3/;'
 
 fi
 
 if [ -s ../../genbank/non-nuclear/assembled_chromosomes/chr2acc ]; then
    AC=`grep "^MT" ../../genbank/non-nuclear/assembled_chromosomes/chr2acc | cut -f2`
    echo -e "chrM\t$AC"
 else
   mCount=`grep chrM ../../chrom.sizes | wc -l`
   if [ $mCount -gt 0 ]; then
      if [ $mCount -eq 1 ]; then
         chrMT=$1
         if [ "x${chrMT}y" != "xy" ]; then
           echo -e "chrM\t${chrMT}"
         else
           echo "need to find chrM accessions" 1>&2
         fi
      else
         if [ -s ../../genbank/non-nuclear/unlocalized_scaffolds/unlocalized.chr2scaf ]; then
           grep -v "^#" ../../genbank/non-nuclear/unlocalized_scaffolds/unlocalized.chr2scaf \
            | cut -f2 | sed -e 's/\([A-Za-z0-9]*\).\([0-9]*\)/chrM_\1_random\t\1.\2/'
         else
            echo "need to find multiple chrM accessions" 1>&2
         fi
      fi
   fi
 fi
 
 }
 
 runAll $* | sort > ucscToINSDC.txt
 '_EOF_'
     # << happy emacs
 
     chmod +x translateNames.sh
     ./translateNames.sh
     # verify all names are covered, "should all be a count of 2 only:
 (cut -f1 ../../chrom.sizes; cut -f1 ucscToINSDC.txt) | sort | uniq -c | sort -rn | head
 (cut -f1 ../../chrom.sizes; cut -f1 ucscToINSDC.txt) | sort | uniq -c | sort -rn | tail
 
     export DB=`pwd | sed -e 's#/hive/data/genomes/##; s#/.*##;'`
     join <(sort ../../chrom.sizes) ucscToINSDC.txt \
         | awk '{printf "%s\t0\t%d\t%s\n", $1, $2, $3}' > ucscToINSDC.tab
 
     # maximum size of UCSC chrom name for SQL index
     export SZ=`cut -f1 ucscToINSDC.tab | awk '{print length($0)}' | sort -n | tail -1`
     # SZ: 28
 
     sed -e "'s/21/$SZ/'" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
         | hgLoadSqlTab '${DB}' ucscToINSDC stdin ucscToINSDC.tab'
 
     checkTableCoords ${DB} ucscToINSDC
 
     # verify the track link to INSDC functions
 
 ##############################################################################
 # add chromAlias table (DONE - 2017-12-14 - Hiram)
 
     mkdir /hive/data/genomes/nomLeu3/bed/chromAlias
     cd /hive/data/genomes/nomLeu3/bed/chromAlias
 
     hgsql -N -e 'select chrom,name from ucscToRefSeq;' nomLeu3 \
         > ucsc.refseq.tab
     hgsql -N -e 'select chrom,name from ucscToINSDC;' nomLeu3 \
         > ucsc.genbank.tab
 
     join -t$'\t' ../idKeys/nomLeu3.idKeys.txt \
 	../../ensembl/ensemblNomLeu3.idKeys.txt \
 	| cut -f2,3 | sort > ucsc.ensembl.tab
 
     ~/kent/src/hg/utils/automation/chromAlias.pl
     sort -o nomLeu3.chromAlias.tab nomLeu3.chromAlias.tab
 
 for t in refseq genbank ensembl
 do
   c0=`cat ucsc.$t.tab | wc -l`
   c1=`grep $t nomLeu3.chromAlias.tab | wc -l`
   ok="OK"
   if [ "$c0" -ne "$c1" ]; then
      ok="ERROR"
   fi
   printf "# checking $t: $c0 =? $c1 $ok\n"
 done
 # checking refseq: 17484 =? 17484 OK
 # checking genbank: 17492 =? 17492 OK
 # checking ensembl: 17484 =? 17484 OK
 
     hgLoadSqlTab nomLeu3 chromAlias ~/kent/src/hg/lib/chromAlias.sql \
         nomLeu3.chromAlias.tab
 
 ##############################################################################
 # TransMap V3 tracks. see makeDb/doc/transMapTracks.txt (2014-12-21 markd)
 ##############################################################################
 
 ##############################################################################
 # GENEID GENE PREDICTIONS (DONE - 2015-06-26 - Hiram)
     ssh hgwdev
     mkdir /hive/data/genomes/nomLeu3/bed/geneid
     cd /hive/data/genomes/nomLeu3/bed/geneid
     wget --timestamping \
 http://genome.crg.es/genepredictions/N.leucogenys/nomLeu3/geneid_v1.4/00README
 
     wget --timestamping \
 http://genome.crg.es/genepredictions/N.leucogenys/nomLeu3/geneid_v1.4/nomLeu3.geneid.gtf
 
     ldHgGene -gtf -genePredExt nomLeu3 geneid nomLeu3.geneid.gtf
     # Read 34901 transcripts in 254923 lines in 1 files
     #  34901 groups 5247 seqs 1 sources 3 feature types
     # 34901 gene predictions
 
     featureBits -enrichment nomLeu3 augustusGene:CDS geneid
 # augustusGene:CDS 1.120%, geneid 1.254%, both 0.899%, cover 80.28%,
 #    enrich 64.00x
 
 ##########################################################################
+# ncbiRefSeq (DONE - 2022-05-26 Hiram)
+
+    mkdir /hive/data/genomes/nomLeu3/bed/ncbiRefSeq.2022-05-26
+    cd /hive/data/genomes/nomLeu3/bed/ncbiRefSeq.2022-05-26
+
+    time( /cluster/home/hiram/kent/src/hg/utils/automation/doNcbiRefSeq.pl \
+       -stop=process \
+       -buildDir=`pwd` GCF_000146795.2_Nleu_3.0 nomLeu3) \
+          > process.log 2>&1
+    # real    3m37.260s
+
+    time( /cluster/home/hiram/kent/src/hg/utils/automation/doNcbiRefSeq.pl \
+       -continue=load \
+       -buildDir=`pwd` GCF_000146795.2_Nleu_3.0 nomLeu3) \
+          > load.log 2>&1
+    # real    0m30.847s
+
+    sed -e 's/^/    # /;' fb.ncbiRefSeq.nomLeu3.txt
+    # 62471660 bases of 2756609047 (2.266%) in intersection
+
+    # add: include ../../refSeqComposite.ra alpha
+    # to the gibbon/nomLeu3/trackDb.ra to turn on the track in the browser
+
+    # nomLeu3 doesn't have refGene, measure against xenoRefGene
+    featureBits -enrichment nomLeu3 xenoRefGene ncbiRefSeq
+ # xenoRefGene 2.683%, ncbiRefSeq 2.266%, both 1.927%, cover 71.82%, enrich 31.69x
+
+    featureBits -enrichment nomLeu3 ncbiRefSeq xenoRefGene
+ # ncbiRefSeq 2.266%, xenoRefGene 2.683%, both 1.927%, cover 85.04%, enrich 31.69x
+
+    featureBits -enrichment nomLeu3 ncbiRefSeqCurated xenoRefGene
+ # ncbiRefSeqCurated 0.002%, xenoRefGene 2.683%, both 0.002%, cover 82.59%, enrich 30.78x
+
+    featureBits -enrichment nomLeu3 xenoRefGene ncbiRefSeqCurated
+ # xenoRefGene 2.683%, ncbiRefSeqCurated 0.002%, both 0.002%, cover 0.06%, enrich 30.78x
+
+#############################################################################
+# cytoBandIdeo - (DONE - 2022-05-26 - Hiram)
+    mkdir /hive/data/genomes/nomLeu3/bed/cytoBand
+    cd /hive/data/genomes/nomLeu3/bed/cytoBand
+    makeCytoBandIdeo.csh nomLeu3
+
+#############################################################################