b6aee4c6471cddebd638fec8dbb988c29a69bc22 markd Thu Apr 23 21:58:41 2026 -0700 import of GENCODE V50, MV39, and V50lift37; added a command to do import with a single command diff --git src/hg/makeDb/trackDb/mouse/mm39/wgEncodeGencodeVM39.ra src/hg/makeDb/trackDb/mouse/mm39/wgEncodeGencodeVM39.ra new file mode 100644 index 00000000000..098e09fab87 --- /dev/null +++ src/hg/makeDb/trackDb/mouse/mm39/wgEncodeGencodeVM39.ra @@ -0,0 +1,247 @@ +track wgEncodeGencodeVM39 +compositeTrack on +superTrack wgEncodeGencodeSuper pack +shortLabel All GENCODE VM39 +longLabel All GENCODE annotations from VM39 (Ensembl 116) +group genes +dragAndDrop subTracks +priority 2.945 +visibility pack +subGroup1 view View aGenes=Genes bPolya=PolyA +subGroup2 name Name Basic=Basic Comprehensive=Comprehensive Pseudogenes=Pseudogenes zPolyA=PolyA +allButtonPair on +sortOrder name=+ view=+ +fileSortOrder labVersion=Contents dccAccession=UCSC_Accession +type genePred +configurable off +wgEncodeGencodeVersion M39 +maxTransEnabled on + + track wgEncodeGencodeVM39ViewGenes + shortLabel Genes + view aGenes + configurable on + visibility pack + subTrack wgEncodeGencodeVM39 + type genePred + idXref wgEncodeGencodeAttrsVM39 transcriptId geneId + itemClassTbl wgEncodeGencodeAttrsVM39 + itemClassNameColumn transcriptId + itemClassClassColumn transcriptClass + cdsDrawDefault genomic\ codons + baseColorUseCds given + baseColorDefault genomicCodons + geneClasses coding nonCoding pseudo problem + gClass_coding 12,12,120 + gClass_nonCoding 0,153,0 + gClass_pseudo 255,51,255 + gClass_problem 254,0,0 + highlightColor 255,255,0 + # filterBy notes: + # - attrs is an alias for the current wgEncodeGencodeAttrs in the sql + # - transcriptMethod is a pseudo-column name, which is handled explictly in the code + # - attrs.transcriptType are transcript biotypes. This will get the current list of values: + # hgsql -Ne 'select distinct(transcriptType) from wgEncodeGencodeAttrsVM39 order by transcriptType' mm39 + # - tag - is s pseudo-column name for join with the tag table. This will get the current list of values: + # hgsql -Ne 'select distinct(tag) from wgEncodeGencodeTagVM39 order by tag' mm39 + # - supportLevel is a pseudo-column name handled in the code + filterBy attrs.transcriptClass:Transcript_Class=coding,nonCoding,pseudo,problem \ + transcriptMethod:Transcript_Annotation_Method=manual,automatic,manual_only,automatic_only \ + attrs.transcriptType:Transcript_Biotype=IG_C_gene,IG_C_pseudogene,IG_D_gene,IG_D_pseudogene,IG_J_gene,IG_LV_gene,IG_pseudogene,IG_V_gene,IG_V_pseudogene,lncRNA,miRNA,misc_RNA,Mt_rRNA,Mt_tRNA,nonsense_mediated_decay,non_stop_decay,processed_pseudogene,processed_transcript,protein_coding,protein_coding_CDS_not_defined,protein_coding_LoF,pseudogene,retained_intron,ribozyme,rRNA,scaRNA,scRNA,snoRNA,snRNA,sRNA,TEC,transcribed_processed_pseudogene,transcribed_unitary_pseudogene,transcribed_unprocessed_pseudogene,translated_unprocessed_pseudogene,TR_C_gene,TR_D_gene,TR_J_gene,TR_J_pseudogene,TR_V_gene,TR_V_pseudogene,unitary_pseudogene,unprocessed_pseudogene \ + tag:Tag=3_nested_supported_extension,3_standard_supported_extension,5_nested_supported_extension,5_standard_supported_extension,alternative_3_UTR,alternative_5_UTR,appris_alternative_1,appris_alternative_2,appris_principal_1,appris_principal_2,appris_principal_3,appris_principal_4,appris_principal_5,basic,bicistronic,CAGE_supported_TSS,CCDS,cds_end_NF,cds_start_NF,confirm_experimentally,dotter_confirmed,downstream_ATG,Ensembl_canonical,EnsEMBL_merge_exception,exp_conf,fragmented_locus,GENCODE_Primary,inferred_exon_combination,inferred_transcript_model,low_sequence_quality,mRNA_end_NF,mRNA_start_NF,NAGNAG_splice_site,ncRNA_host,NMD_exception,NMD_likely_if_extended,non_ATG_start,non_canonical_conserved,non_canonical_genome_sequence_error,non_canonical_other,non_canonical_polymorphism,non_canonical_TEC,non_canonical_U12,non_submitted_evidence,not_best_in_genome_evidence,not_organism_supported,overlapping_locus,overlapping_uORF,overlaps_pseudogene,polymorphic_pseudogene_no_stop,precursor_RNA,readthrough_gene,readthrough_transcript,reference_genome_error,retained_intron_CDS,retained_intron_final,retained_intron_first,retrogene,RNA_Seq_supported_only,RNA_Seq_supported_partial,RP_supported_TIS,seleno,Selenoprotein,semi_processed,sequence_error,stop_codon_readthrough,TAGENE,upstream_ATG,upstream_uORF \ + supportLevel:Support_Level=tsl1,tsl2,tsl3,tsl4,tsl5,tslNA + highlightBy transcriptMethod:Transcript_Annotation_Method=manual,automatic,manual_only,automatic_only \ + attrs.transcriptType:Transcript_Biotype=IG_C_gene,IG_C_pseudogene,IG_D_gene,IG_D_pseudogene,IG_J_gene,IG_LV_gene,IG_pseudogene,IG_V_gene,IG_V_pseudogene,lncRNA,miRNA,misc_RNA,Mt_rRNA,Mt_tRNA,nonsense_mediated_decay,non_stop_decay,processed_pseudogene,processed_transcript,protein_coding,protein_coding_CDS_not_defined,protein_coding_LoF,pseudogene,retained_intron,ribozyme,rRNA,scaRNA,scRNA,snoRNA,snRNA,sRNA,TEC,transcribed_processed_pseudogene,transcribed_unitary_pseudogene,transcribed_unprocessed_pseudogene,translated_unprocessed_pseudogene,TR_C_gene,TR_D_gene,TR_J_gene,TR_J_pseudogene,TR_V_gene,TR_V_pseudogene,unitary_pseudogene,unprocessed_pseudogene \ + tag:Tag=3_nested_supported_extension,3_standard_supported_extension,5_nested_supported_extension,5_standard_supported_extension,alternative_3_UTR,alternative_5_UTR,appris_alternative_1,appris_alternative_2,appris_principal_1,appris_principal_2,appris_principal_3,appris_principal_4,appris_principal_5,basic,bicistronic,CAGE_supported_TSS,CCDS,cds_end_NF,cds_start_NF,confirm_experimentally,dotter_confirmed,downstream_ATG,Ensembl_canonical,EnsEMBL_merge_exception,exp_conf,fragmented_locus,GENCODE_Primary,inferred_exon_combination,inferred_transcript_model,low_sequence_quality,mRNA_end_NF,mRNA_start_NF,NAGNAG_splice_site,ncRNA_host,NMD_exception,NMD_likely_if_extended,non_ATG_start,non_canonical_conserved,non_canonical_genome_sequence_error,non_canonical_other,non_canonical_polymorphism,non_canonical_TEC,non_canonical_U12,non_submitted_evidence,not_best_in_genome_evidence,not_organism_supported,overlapping_locus,overlapping_uORF,overlaps_pseudogene,polymorphic_pseudogene_no_stop,precursor_RNA,readthrough_gene,readthrough_transcript,reference_genome_error,retained_intron_CDS,retained_intron_final,retained_intron_first,retrogene,RNA_Seq_supported_only,RNA_Seq_supported_partial,RP_supported_TIS,seleno,Selenoprotein,semi_processed,sequence_error,stop_codon_readthrough,TAGENE,upstream_ATG,upstream_uORF \ + supportLevel:Support_Level=tsl1,tsl2,tsl3,tsl4,tsl5,tslNA + + track wgEncodeGencodeBasicVM39 + trackHandler wgEncodeGencode + subTrack wgEncodeGencodeVM39ViewGenes on + shortLabel Basic + subGroups view=aGenes name=Basic + longLabel Basic Gene Annotation Set from GENCODE Version M39 (Ensembl 116) + type genePred + priority 1 + + track wgEncodeGencodeCompVM39 + trackHandler wgEncodeGencode + subTrack wgEncodeGencodeVM39ViewGenes off + subGroups view=aGenes name=Comprehensive + shortLabel Comprehensive + longLabel Comprehensive Gene Annotation Set from GENCODE Version M39 (Ensembl 116) + type genePred + priority 2 + + track wgEncodeGencodePseudoGeneVM39 + trackHandler wgEncodeGencode + subTrack wgEncodeGencodeVM39ViewGenes on + subGroups view=aGenes name=Pseudogenes + shortLabel Pseudogenes + longLabel Pseudogene Annotation Set from GENCODE Version M39 (Ensembl 116) + type genePred + color 255,51,255 + priority 3 + + track wgEncodeGencodeVM39ViewPolya + shortLabel PolyA + view cPolya + visibility hide + subTrack wgEncodeGencodeVM39 + type genePred + configurable off + + track wgEncodeGencodePolyaVM39 + trackHandler wgEncodeGencode + subTrack wgEncodeGencodeVM39ViewPolya off + subGroups view=bPolya name=zPolyA + shortLabel PolyA + longLabel PolyA Transcript Annotation Set from GENCODE Version M39 (Ensembl 116) + type genePred + color 0,0,0 + priority 5 + +# searches for basic +searchName wgEncodeGencodeBasicVM39 +searchTable wgEncodeGencodeBasicVM39 +searchMethod prefix +searchType genePred +termRegex ENSMUST[0-9.]+ +searchPriority 2.23501 + +searchName wgEncodeGencodeBasicGeneSymVM39 +searchTable wgEncodeGencodeBasicVM39 +searchMethod exact +searchType genePred +searchPriority 2.23502 +query select chrom, txStart, txEnd, name2 from %s where name2 like '%s' + +searchName wgEncodeGencodeBasicGeneVM39 +searchTable wgEncodeGencodeBasicVM39 +searchMethod prefix +searchType genePred +termRegex ENSMUSG[0-9.]+ +searchPriority 2.23503 +xrefTable wgEncodeGencodeAttrsVM39 +xrefQuery select transcriptId,geneId from %s where geneId like '%s%%' + +searchName wgEncodeGencodeBasicHavanaTranscriptVM39 +searchTable wgEncodeGencodeBasicVM39 +searchMethod prefix +searchType genePred +termRegex OTTMUST[0-9.]+ +searchPriority 2.23504 +xrefTable wgEncodeGencodeAttrsVM39 +xrefQuery select transcriptId,havanaTranscriptId from %s where havanaTranscriptId like '%s%%' + +searchName wgEncodeGencodeBasicHavanaGeneVM39 +searchTable wgEncodeGencodeBasicVM39 +searchMethod prefix +searchType genePred +termRegex OTTMUSG[0-9.]+ +searchPriority 2.23506 +xrefTable wgEncodeGencodeAttrsVM39 +xrefQuery select transcriptId,havanaGeneId from %s where havanaGeneId like '%s%%' + +searchName wgEncodeGencodeBasicProtVM39 +searchTable wgEncodeGencodeBasicVM39 +searchMethod prefix +searchType genePred +termRegex ENSMUSP[0-9.]+ +searchPriority 2.23507 +xrefTable wgEncodeGencodeAttrsVM39 +xrefQuery select transcriptId,proteinId from %s where proteinId like '%s%%' + +# searches for comp +searchName wgEncodeGencodeCompVM39 +searchTable wgEncodeGencodeCompVM39 +searchMethod prefix +searchType genePred +termRegex ENSMUST[0-9.]+ +searchPriority 2.23508 + +searchName wgEncodeGencodeCompGeneSymVM39 +searchTable wgEncodeGencodeCompVM39 +searchMethod exact +searchType genePred +searchPriority 2.23509 +query select chrom, txStart, txEnd, name2 from %s where name2 like '%s' + +searchName wgEncodeGencodeCompGeneVM39 +searchTable wgEncodeGencodeCompVM39 +searchMethod prefix +searchType genePred +termRegex ENSMUSG[0-9.]+ +searchPriority 2.23510 +xrefTable wgEncodeGencodeAttrsVM39 +xrefQuery select transcriptId,geneId from %s where geneId like '%s%%' + +searchName wgEncodeGencodeCompHavanaTranscriptVM39 +searchTable wgEncodeGencodeCompVM39 +searchMethod prefix +searchType genePred +termRegex OTTMUST[0-9.]+ +searchPriority 2.23511 +xrefTable wgEncodeGencodeAttrsVM39 +xrefQuery select transcriptId,havanaTranscriptId from %s where havanaTranscriptId like '%s%%' + +searchName wgEncodeGencodeCompHavanaGeneVM39 +searchTable wgEncodeGencodeCompVM39 +searchMethod prefix +searchType genePred +termRegex OTTMUSG[0-9.]+ +searchPriority 2.23512 +xrefTable wgEncodeGencodeAttrsVM39 +xrefQuery select transcriptId,havanaGeneId from %s where havanaGeneId like '%s%%' + +searchName wgEncodeGencodeCompProtVM39 +searchTable wgEncodeGencodeCompVM39 +searchMethod prefix +searchType genePred +termRegex ENSMUSP[0-9.]+ +searchPriority 2.23513 +xrefTable wgEncodeGencodeAttrsVM39 +xrefQuery select transcriptId,proteinId from %s where proteinId like '%s%%' + +# searches for pseudogene +searchName wgEncodeGencodePseudoGeneVM39 +searchTable wgEncodeGencodePseudoGeneVM39 +searchMethod prefix +searchType genePred +termRegex ENSMUST[0-9.]+ +searchPriority 2.23514 + +searchName wgEncodeGencodePseudoGeneGeneSymVM39 +searchTable wgEncodeGencodePseudoGeneVM39 +searchMethod exact +searchType genePred +searchPriority 2.23515 +query select chrom, txStart, txEnd, name2 from %s where name2 like '%s' + +searchName wgEncodeGencodePseudoGeneGeneVM39 +searchTable wgEncodeGencodePseudoGeneVM39 +searchMethod prefix +searchType genePred +termRegex ENSMUSG[0-9.]+ +searchPriority 2.23516 +xrefTable wgEncodeGencodeAttrsVM39 +xrefQuery select transcriptId,geneId from %s where geneId like '%s%%' + +searchName wgEncodeGencodePseudoGeneHavanaTranscriptVM39 +searchTable wgEncodeGencodePseudoGeneVM39 +searchMethod prefix +searchType genePred +termRegex OTTMUST[0-9.]+ +searchPriority 2.23517 +xrefTable wgEncodeGencodeAttrsVM39 +xrefQuery select transcriptId,havanaTranscriptId from %s where havanaTranscriptId like '%s%%' + +searchName wgEncodeGencodePseudoGeneHavanaGeneVM39 +searchTable wgEncodeGencodePseudoGeneVM39 +searchMethod prefix +searchType genePred +termRegex OTTMUSG[0-9.]+ +searchPriority 2.23518 +xrefTable wgEncodeGencodeAttrsVM39 +xrefQuery select transcriptId,havanaGeneId from %s where havanaGeneId like '%s%%' +