0bc6be88baf4061ca10c6f207331ec04e36c17f1
hiram
  Fri Feb 2 11:34:37 2024 -0800
better name for this table refs #23589

diff --git src/hg/lib/asmSummary.sql src/hg/lib/asmSummary.sql
new file mode 100644
index 0000000..fc6cb6c
--- /dev/null
+++ src/hg/lib/asmSummary.sql
@@ -0,0 +1,47 @@
+# asmSummary.sql was originally generated by the autoSql program, which also 
+# generated asmSummary.c and asmSummary.h.  This creates the database representation of
+# an object which can be loaded and saved from RAM in a fairly 
+# automatic way.
+
+#NCBI assembly_summary data, see: https://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/README_assembly_summary.txt
+CREATE TABLE asmSummary (
+    assemblyAccession varchar(255) NOT NULL,	# www.ncbi.nlm.nih.gov/assembly/?term=xxx
+    bioproject varchar(255),	# www.ncbi.nlm.nih.gov/bioproject/?term=xxx
+    biosample varchar(255),	# www.ncbi.nlm.nih.gov/biosample/?term=xxx
+    wgsMaster varchar(255),	# www.ncbi.nlm.nih.gov/nuccore/xxx
+    refseqCategory varchar(255),	# representative or reference
+    taxId int unsigned NOT NULL,	# www.ncbi.nlm.nih.gov/taxonomy/?term=xxx
+    speciesTaxid int unsigned NOT NULL,	# www.ncbi.nlm.nih.gov/taxonomy/?term=xxx
+    organismName varchar(255) NOT NULL,	# binomial scientific name
+    infraspecificName varchar(255),	# strain/cultivar/ecotype/breed
+    isolate varchar(255),	# source of sample
+    versionStatus varchar(255) NOT NULL,	# latest/suppressed/replaced
+    assemblyLevel varchar(255) NOT NULL,	# Contig/Scaffold/Complete Genome/Chromosome
+    releaseType varchar(255) NOT NULL,	# Major/Minor/Patch
+    genomeRep varchar(255) NOT NULL,	# Full/Partial
+    seqRelDate varchar(255) NOT NULL,	# date YYYY/MM/DD sequence released to INSDC
+    asmName varchar(255) NOT NULL,	# submitter supplied name
+    asmSubmitter varchar(255),	# institution submitting assembly
+    gbrsPairedAsm varchar(255),	# GenBank<->RefSeq GCA/GCF relationship
+    pairedAsmComp varchar(255),	# identical/different for GCA<->GCF relationship
+    ftpPath varchar(255),	# ftp.ncbi.nlm.nih.gov/genomes/all/GCx/012/345/678/asmId
+    excludedFromRefseq varchar(255),	# noted reason for exclusion from RefSeq
+    relationToTypeMaterial varchar(255),	# note of assembly relation to sample
+    assemblyType varchar(255) NOT NULL,	# haploid/diploid/haploid-with-alt-loci/alternate-pseudohaplotype
+    phyloGroup varchar(255) NOT NULL,	# bacteria/viral/archaea/fungi/metagenomes/invertebrate/other/vertebrate_other/plant/vertebrate_mammalian/protozoa
+    genomeSize bigint NOT NULL,	# total length of all top-level sequences in the primary assembly
+    genomeSizeUngapped bigint NOT NULL,	# genome length not counting gaps (gap == 10 or more Ns)
+    gcPercent float NOT NULL,	# GC percent
+    repliconCount smallint unsigned NOT NULL,	# total number of chromosomes, organelle genomes and plasmids
+    scaffoldCount int unsigned NOT NULL,	# number of scaffolds: placed, unlocalzes, unplace, alternate loci and patch
+    contigCount int unsigned NOT NULL,	# number of conts in the primary assembly
+    annotationProvider varchar(255) NOT NULL,	# the group that provied the annotation on the assembly
+    annotationName varchar(255) NOT NULL,	# the name of the annotation
+    annotationDate varchar(255) NOT NULL,	# annotation date YYYY/MM/DD
+    totalGeneCount varchar(255),	# total gene count in annotation
+    proteinCodingGeneCount int unsigned,	# protein coding gene count in annotation
+    nonCodingGeneCount varchar(255),	# non coding gene count in annotation
+    pubmedId varchar(255),	# comma separated list of PubMed ID(s)
+              #Indices
+    PRIMARY KEY(assemblyAccession)
+);