4bd23630e7a8c9a909c69900c8b67b992a993e91
hiram
  Mon Jan 13 13:15:52 2020 -0800
two new categories of gap types to count, repeat and contamination refs #24748

diff --git src/hg/utils/automation/asmHubGap.pl src/hg/utils/automation/asmHubGap.pl
index ae9ff12..1a12b96 100755
--- src/hg/utils/automation/asmHubGap.pl
+++ src/hg/utils/automation/asmHubGap.pl
@@ -26,30 +26,32 @@
 
 if ( ! -s $agpFile ) {
   printf STDERR "ERROR: can not find AGP file:\n\t'%s'\n", $agpFile;
   exit 255;
 }
 
 # definition of gap types in the AGP file
 my %gapTypes = (
 'clone' => 'gaps between clones in scaffolds',
 'heterochromatin' => 'heterochromatin gaps',
 'short_arm' => 'short arm gaps',
 'telomere' => 'telomere gaps',
 'centromere' => 'gaps for centromeres are included when they can be reasonably localized',
 'scaffold' => 'gaps between scaffolds in chromosome assemblies',
 'contig' => 'gaps between contigs in scaffolds',
+'repeat' => 'an unresolvable repeat',
+'contamination' => 'gap inserted in place of foreign sequence to maintain the coordinates',
 'other' => 'gaps added at UCSC to annotate strings of <em>N</em>s that were not marked in the AGP file',
 'fragment' => 'gaps between whole genome shotgun contigs'
 );
 
 my $em = "<em>";
 my $noEm = "</em>";
 my $assemblyDate = `grep -v "^#" $namesFile | cut -f9`;
 chomp $assemblyDate;
 my $ncbiAssemblyId = `grep -v "^#" $namesFile | cut -f10`;
 chomp $ncbiAssemblyId;
 my $organism = `grep -v "^#" $namesFile | cut -f5`;
 chomp $organism;
 my $gapCount = `zcat $agpFile | grep -v "^#" | awk -F'\t' '\$5 == "N"' | wc -l`;
 chomp $gapCount;
 $gapCount = &AsmHub::commify($gapCount);