b6aee4c6471cddebd638fec8dbb988c29a69bc22
markd
  Thu Apr 23 21:58:41 2026 -0700
import of GENCODE V50, MV39, and V50lift37; added a command to do import with a single command

diff --git src/hg/makeDb/trackDb/mouse/mm39/wgEncodeGencodeVM39.ra src/hg/makeDb/trackDb/mouse/mm39/wgEncodeGencodeVM39.ra
new file mode 100644
index 00000000000..098e09fab87
--- /dev/null
+++ src/hg/makeDb/trackDb/mouse/mm39/wgEncodeGencodeVM39.ra
@@ -0,0 +1,247 @@
+track wgEncodeGencodeVM39
+compositeTrack on
+superTrack wgEncodeGencodeSuper pack
+shortLabel All GENCODE VM39
+longLabel All GENCODE annotations from VM39 (Ensembl 116)
+group genes
+dragAndDrop subTracks
+priority 2.945
+visibility pack
+subGroup1 view View aGenes=Genes bPolya=PolyA
+subGroup2 name Name Basic=Basic Comprehensive=Comprehensive Pseudogenes=Pseudogenes zPolyA=PolyA
+allButtonPair on
+sortOrder name=+ view=+
+fileSortOrder labVersion=Contents dccAccession=UCSC_Accession
+type genePred
+configurable off
+wgEncodeGencodeVersion M39
+maxTransEnabled on
+
+    track wgEncodeGencodeVM39ViewGenes
+    shortLabel Genes
+    view aGenes
+    configurable on
+    visibility pack
+    subTrack wgEncodeGencodeVM39
+    type genePred
+    idXref wgEncodeGencodeAttrsVM39 transcriptId geneId
+    itemClassTbl wgEncodeGencodeAttrsVM39
+    itemClassNameColumn transcriptId
+    itemClassClassColumn transcriptClass
+    cdsDrawDefault genomic\ codons
+    baseColorUseCds given
+    baseColorDefault genomicCodons
+    geneClasses coding nonCoding pseudo problem
+    gClass_coding 12,12,120
+    gClass_nonCoding 0,153,0
+    gClass_pseudo 255,51,255
+    gClass_problem 254,0,0
+    highlightColor 255,255,0
+    # filterBy notes:
+    #  - attrs is an alias for the current wgEncodeGencodeAttrs in the sql
+    #  - transcriptMethod is a pseudo-column name, which is handled explictly in the code
+    #  - attrs.transcriptType are transcript biotypes.  This will get the current list of values:
+    #    hgsql -Ne 'select distinct(transcriptType) from wgEncodeGencodeAttrsVM39 order by transcriptType' mm39
+    #  - tag - is s pseudo-column name for join with the tag table.  This will get the current list of values:
+    #    hgsql -Ne 'select distinct(tag) from wgEncodeGencodeTagVM39 order by tag' mm39
+    #  - supportLevel is a pseudo-column name handled in the code
+    filterBy attrs.transcriptClass:Transcript_Class=coding,nonCoding,pseudo,problem \
+             transcriptMethod:Transcript_Annotation_Method=manual,automatic,manual_only,automatic_only \
+             attrs.transcriptType:Transcript_Biotype=IG_C_gene,IG_C_pseudogene,IG_D_gene,IG_D_pseudogene,IG_J_gene,IG_LV_gene,IG_pseudogene,IG_V_gene,IG_V_pseudogene,lncRNA,miRNA,misc_RNA,Mt_rRNA,Mt_tRNA,nonsense_mediated_decay,non_stop_decay,processed_pseudogene,processed_transcript,protein_coding,protein_coding_CDS_not_defined,protein_coding_LoF,pseudogene,retained_intron,ribozyme,rRNA,scaRNA,scRNA,snoRNA,snRNA,sRNA,TEC,transcribed_processed_pseudogene,transcribed_unitary_pseudogene,transcribed_unprocessed_pseudogene,translated_unprocessed_pseudogene,TR_C_gene,TR_D_gene,TR_J_gene,TR_J_pseudogene,TR_V_gene,TR_V_pseudogene,unitary_pseudogene,unprocessed_pseudogene \
+             tag:Tag=3_nested_supported_extension,3_standard_supported_extension,5_nested_supported_extension,5_standard_supported_extension,alternative_3_UTR,alternative_5_UTR,appris_alternative_1,appris_alternative_2,appris_principal_1,appris_principal_2,appris_principal_3,appris_principal_4,appris_principal_5,basic,bicistronic,CAGE_supported_TSS,CCDS,cds_end_NF,cds_start_NF,confirm_experimentally,dotter_confirmed,downstream_ATG,Ensembl_canonical,EnsEMBL_merge_exception,exp_conf,fragmented_locus,GENCODE_Primary,inferred_exon_combination,inferred_transcript_model,low_sequence_quality,mRNA_end_NF,mRNA_start_NF,NAGNAG_splice_site,ncRNA_host,NMD_exception,NMD_likely_if_extended,non_ATG_start,non_canonical_conserved,non_canonical_genome_sequence_error,non_canonical_other,non_canonical_polymorphism,non_canonical_TEC,non_canonical_U12,non_submitted_evidence,not_best_in_genome_evidence,not_organism_supported,overlapping_locus,overlapping_uORF,overlaps_pseudogene,polymorphic_pseudogene_no_stop,precursor_RNA,readthrough_gene,readthrough_transcript,reference_genome_error,retained_intron_CDS,retained_intron_final,retained_intron_first,retrogene,RNA_Seq_supported_only,RNA_Seq_supported_partial,RP_supported_TIS,seleno,Selenoprotein,semi_processed,sequence_error,stop_codon_readthrough,TAGENE,upstream_ATG,upstream_uORF \
+             supportLevel:Support_Level=tsl1,tsl2,tsl3,tsl4,tsl5,tslNA
+    highlightBy transcriptMethod:Transcript_Annotation_Method=manual,automatic,manual_only,automatic_only \
+                attrs.transcriptType:Transcript_Biotype=IG_C_gene,IG_C_pseudogene,IG_D_gene,IG_D_pseudogene,IG_J_gene,IG_LV_gene,IG_pseudogene,IG_V_gene,IG_V_pseudogene,lncRNA,miRNA,misc_RNA,Mt_rRNA,Mt_tRNA,nonsense_mediated_decay,non_stop_decay,processed_pseudogene,processed_transcript,protein_coding,protein_coding_CDS_not_defined,protein_coding_LoF,pseudogene,retained_intron,ribozyme,rRNA,scaRNA,scRNA,snoRNA,snRNA,sRNA,TEC,transcribed_processed_pseudogene,transcribed_unitary_pseudogene,transcribed_unprocessed_pseudogene,translated_unprocessed_pseudogene,TR_C_gene,TR_D_gene,TR_J_gene,TR_J_pseudogene,TR_V_gene,TR_V_pseudogene,unitary_pseudogene,unprocessed_pseudogene \
+                tag:Tag=3_nested_supported_extension,3_standard_supported_extension,5_nested_supported_extension,5_standard_supported_extension,alternative_3_UTR,alternative_5_UTR,appris_alternative_1,appris_alternative_2,appris_principal_1,appris_principal_2,appris_principal_3,appris_principal_4,appris_principal_5,basic,bicistronic,CAGE_supported_TSS,CCDS,cds_end_NF,cds_start_NF,confirm_experimentally,dotter_confirmed,downstream_ATG,Ensembl_canonical,EnsEMBL_merge_exception,exp_conf,fragmented_locus,GENCODE_Primary,inferred_exon_combination,inferred_transcript_model,low_sequence_quality,mRNA_end_NF,mRNA_start_NF,NAGNAG_splice_site,ncRNA_host,NMD_exception,NMD_likely_if_extended,non_ATG_start,non_canonical_conserved,non_canonical_genome_sequence_error,non_canonical_other,non_canonical_polymorphism,non_canonical_TEC,non_canonical_U12,non_submitted_evidence,not_best_in_genome_evidence,not_organism_supported,overlapping_locus,overlapping_uORF,overlaps_pseudogene,polymorphic_pseudogene_no_stop,precursor_RNA,readthrough_gene,readthrough_transcript,reference_genome_error,retained_intron_CDS,retained_intron_final,retained_intron_first,retrogene,RNA_Seq_supported_only,RNA_Seq_supported_partial,RP_supported_TIS,seleno,Selenoprotein,semi_processed,sequence_error,stop_codon_readthrough,TAGENE,upstream_ATG,upstream_uORF \
+                supportLevel:Support_Level=tsl1,tsl2,tsl3,tsl4,tsl5,tslNA
+
+        track wgEncodeGencodeBasicVM39
+        trackHandler wgEncodeGencode
+        subTrack wgEncodeGencodeVM39ViewGenes on
+        shortLabel Basic
+        subGroups view=aGenes name=Basic
+        longLabel Basic Gene Annotation Set from GENCODE Version M39 (Ensembl 116)
+        type genePred
+        priority 1
+
+        track wgEncodeGencodeCompVM39
+        trackHandler wgEncodeGencode
+        subTrack wgEncodeGencodeVM39ViewGenes off
+        subGroups view=aGenes name=Comprehensive
+        shortLabel Comprehensive
+        longLabel Comprehensive Gene Annotation Set from GENCODE Version M39 (Ensembl 116)
+        type genePred
+        priority 2
+
+        track wgEncodeGencodePseudoGeneVM39
+        trackHandler wgEncodeGencode
+        subTrack wgEncodeGencodeVM39ViewGenes on
+        subGroups view=aGenes name=Pseudogenes
+        shortLabel Pseudogenes
+        longLabel Pseudogene Annotation Set from GENCODE Version M39 (Ensembl 116)
+        type genePred
+        color 255,51,255
+        priority 3
+
+    track wgEncodeGencodeVM39ViewPolya
+    shortLabel PolyA
+    view cPolya
+    visibility hide
+    subTrack wgEncodeGencodeVM39
+    type genePred
+    configurable off
+
+        track wgEncodeGencodePolyaVM39
+        trackHandler wgEncodeGencode
+        subTrack wgEncodeGencodeVM39ViewPolya off
+        subGroups view=bPolya name=zPolyA
+        shortLabel PolyA
+        longLabel PolyA Transcript Annotation Set from GENCODE Version M39 (Ensembl 116)
+        type genePred
+        color 0,0,0
+        priority 5
+
+# searches for basic
+searchName wgEncodeGencodeBasicVM39
+searchTable wgEncodeGencodeBasicVM39
+searchMethod prefix
+searchType genePred
+termRegex ENSMUST[0-9.]+
+searchPriority 2.23501
+
+searchName wgEncodeGencodeBasicGeneSymVM39
+searchTable wgEncodeGencodeBasicVM39
+searchMethod exact
+searchType genePred
+searchPriority 2.23502
+query select chrom, txStart, txEnd, name2 from %s where name2 like '%s'
+
+searchName wgEncodeGencodeBasicGeneVM39
+searchTable wgEncodeGencodeBasicVM39
+searchMethod prefix
+searchType genePred
+termRegex ENSMUSG[0-9.]+
+searchPriority 2.23503
+xrefTable wgEncodeGencodeAttrsVM39
+xrefQuery select transcriptId,geneId from %s where geneId like '%s%%'
+
+searchName wgEncodeGencodeBasicHavanaTranscriptVM39
+searchTable wgEncodeGencodeBasicVM39
+searchMethod prefix
+searchType genePred
+termRegex OTTMUST[0-9.]+
+searchPriority 2.23504
+xrefTable wgEncodeGencodeAttrsVM39
+xrefQuery select transcriptId,havanaTranscriptId from %s where havanaTranscriptId like '%s%%'
+
+searchName wgEncodeGencodeBasicHavanaGeneVM39
+searchTable wgEncodeGencodeBasicVM39
+searchMethod prefix
+searchType genePred
+termRegex OTTMUSG[0-9.]+
+searchPriority 2.23506
+xrefTable wgEncodeGencodeAttrsVM39
+xrefQuery select transcriptId,havanaGeneId from %s where havanaGeneId like '%s%%'
+
+searchName wgEncodeGencodeBasicProtVM39
+searchTable wgEncodeGencodeBasicVM39
+searchMethod prefix
+searchType genePred
+termRegex ENSMUSP[0-9.]+
+searchPriority 2.23507
+xrefTable wgEncodeGencodeAttrsVM39
+xrefQuery select transcriptId,proteinId from %s where proteinId like '%s%%'
+
+# searches for comp
+searchName wgEncodeGencodeCompVM39
+searchTable wgEncodeGencodeCompVM39
+searchMethod prefix
+searchType genePred
+termRegex ENSMUST[0-9.]+
+searchPriority 2.23508
+
+searchName wgEncodeGencodeCompGeneSymVM39
+searchTable wgEncodeGencodeCompVM39
+searchMethod exact
+searchType genePred
+searchPriority 2.23509
+query select chrom, txStart, txEnd, name2 from %s where name2 like '%s'
+
+searchName wgEncodeGencodeCompGeneVM39
+searchTable wgEncodeGencodeCompVM39
+searchMethod prefix
+searchType genePred
+termRegex ENSMUSG[0-9.]+
+searchPriority 2.23510
+xrefTable wgEncodeGencodeAttrsVM39
+xrefQuery select transcriptId,geneId from %s where geneId like '%s%%'
+
+searchName wgEncodeGencodeCompHavanaTranscriptVM39
+searchTable wgEncodeGencodeCompVM39
+searchMethod prefix
+searchType genePred
+termRegex OTTMUST[0-9.]+
+searchPriority 2.23511
+xrefTable wgEncodeGencodeAttrsVM39
+xrefQuery select transcriptId,havanaTranscriptId from %s where havanaTranscriptId like '%s%%'
+
+searchName wgEncodeGencodeCompHavanaGeneVM39
+searchTable wgEncodeGencodeCompVM39
+searchMethod prefix
+searchType genePred
+termRegex OTTMUSG[0-9.]+
+searchPriority 2.23512
+xrefTable wgEncodeGencodeAttrsVM39
+xrefQuery select transcriptId,havanaGeneId from %s where havanaGeneId like '%s%%'
+
+searchName wgEncodeGencodeCompProtVM39
+searchTable wgEncodeGencodeCompVM39
+searchMethod prefix
+searchType genePred
+termRegex ENSMUSP[0-9.]+
+searchPriority 2.23513
+xrefTable wgEncodeGencodeAttrsVM39
+xrefQuery select transcriptId,proteinId from %s where proteinId like '%s%%'
+
+# searches for pseudogene
+searchName wgEncodeGencodePseudoGeneVM39
+searchTable wgEncodeGencodePseudoGeneVM39
+searchMethod prefix
+searchType genePred
+termRegex ENSMUST[0-9.]+
+searchPriority 2.23514
+
+searchName wgEncodeGencodePseudoGeneGeneSymVM39
+searchTable wgEncodeGencodePseudoGeneVM39
+searchMethod exact
+searchType genePred
+searchPriority 2.23515
+query select chrom, txStart, txEnd, name2 from %s where name2 like '%s'
+
+searchName wgEncodeGencodePseudoGeneGeneVM39
+searchTable wgEncodeGencodePseudoGeneVM39
+searchMethod prefix
+searchType genePred
+termRegex ENSMUSG[0-9.]+
+searchPriority 2.23516
+xrefTable wgEncodeGencodeAttrsVM39
+xrefQuery select transcriptId,geneId from %s where geneId like '%s%%'
+
+searchName wgEncodeGencodePseudoGeneHavanaTranscriptVM39
+searchTable wgEncodeGencodePseudoGeneVM39
+searchMethod prefix
+searchType genePred
+termRegex OTTMUST[0-9.]+
+searchPriority 2.23517
+xrefTable wgEncodeGencodeAttrsVM39
+xrefQuery select transcriptId,havanaTranscriptId from %s where havanaTranscriptId like '%s%%'
+
+searchName wgEncodeGencodePseudoGeneHavanaGeneVM39
+searchTable wgEncodeGencodePseudoGeneVM39
+searchMethod prefix
+searchType genePred
+termRegex OTTMUSG[0-9.]+
+searchPriority 2.23518
+xrefTable wgEncodeGencodeAttrsVM39
+xrefQuery select transcriptId,havanaGeneId from %s where havanaGeneId like '%s%%'
+