0bc6be88baf4061ca10c6f207331ec04e36c17f1 hiram Fri Feb 2 11:34:37 2024 -0800 better name for this table refs #23589 diff --git src/hg/lib/asmSummary.sql src/hg/lib/asmSummary.sql new file mode 100644 index 0000000..fc6cb6c --- /dev/null +++ src/hg/lib/asmSummary.sql @@ -0,0 +1,47 @@ +# asmSummary.sql was originally generated by the autoSql program, which also +# generated asmSummary.c and asmSummary.h. This creates the database representation of +# an object which can be loaded and saved from RAM in a fairly +# automatic way. + +#NCBI assembly_summary data, see: https://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/README_assembly_summary.txt +CREATE TABLE asmSummary ( + assemblyAccession varchar(255) NOT NULL, # www.ncbi.nlm.nih.gov/assembly/?term=xxx + bioproject varchar(255), # www.ncbi.nlm.nih.gov/bioproject/?term=xxx + biosample varchar(255), # www.ncbi.nlm.nih.gov/biosample/?term=xxx + wgsMaster varchar(255), # www.ncbi.nlm.nih.gov/nuccore/xxx + refseqCategory varchar(255), # representative or reference + taxId int unsigned NOT NULL, # www.ncbi.nlm.nih.gov/taxonomy/?term=xxx + speciesTaxid int unsigned NOT NULL, # www.ncbi.nlm.nih.gov/taxonomy/?term=xxx + organismName varchar(255) NOT NULL, # binomial scientific name + infraspecificName varchar(255), # strain/cultivar/ecotype/breed + isolate varchar(255), # source of sample + versionStatus varchar(255) NOT NULL, # latest/suppressed/replaced + assemblyLevel varchar(255) NOT NULL, # Contig/Scaffold/Complete Genome/Chromosome + releaseType varchar(255) NOT NULL, # Major/Minor/Patch + genomeRep varchar(255) NOT NULL, # Full/Partial + seqRelDate varchar(255) NOT NULL, # date YYYY/MM/DD sequence released to INSDC + asmName varchar(255) NOT NULL, # submitter supplied name + asmSubmitter varchar(255), # institution submitting assembly + gbrsPairedAsm varchar(255), # GenBank<->RefSeq GCA/GCF relationship + pairedAsmComp varchar(255), # identical/different for GCA<->GCF relationship + ftpPath varchar(255), # ftp.ncbi.nlm.nih.gov/genomes/all/GCx/012/345/678/asmId + excludedFromRefseq varchar(255), # noted reason for exclusion from RefSeq + relationToTypeMaterial varchar(255), # note of assembly relation to sample + assemblyType varchar(255) NOT NULL, # haploid/diploid/haploid-with-alt-loci/alternate-pseudohaplotype + phyloGroup varchar(255) NOT NULL, # bacteria/viral/archaea/fungi/metagenomes/invertebrate/other/vertebrate_other/plant/vertebrate_mammalian/protozoa + genomeSize bigint NOT NULL, # total length of all top-level sequences in the primary assembly + genomeSizeUngapped bigint NOT NULL, # genome length not counting gaps (gap == 10 or more Ns) + gcPercent float NOT NULL, # GC percent + repliconCount smallint unsigned NOT NULL, # total number of chromosomes, organelle genomes and plasmids + scaffoldCount int unsigned NOT NULL, # number of scaffolds: placed, unlocalzes, unplace, alternate loci and patch + contigCount int unsigned NOT NULL, # number of conts in the primary assembly + annotationProvider varchar(255) NOT NULL, # the group that provied the annotation on the assembly + annotationName varchar(255) NOT NULL, # the name of the annotation + annotationDate varchar(255) NOT NULL, # annotation date YYYY/MM/DD + totalGeneCount varchar(255), # total gene count in annotation + proteinCodingGeneCount int unsigned, # protein coding gene count in annotation + nonCodingGeneCount varchar(255), # non coding gene count in annotation + pubmedId varchar(255), # comma separated list of PubMed ID(s) + #Indices + PRIMARY KEY(assemblyAccession) +);