b6aee4c6471cddebd638fec8dbb988c29a69bc22 markd Thu Apr 23 21:58:41 2026 -0700 import of GENCODE V50, MV39, and V50lift37; added a command to do import with a single command diff --git src/hg/makeDb/trackDb/human/hg38/wgEncodeGencodeV50.ra src/hg/makeDb/trackDb/human/hg38/wgEncodeGencodeV50.ra new file mode 100644 index 00000000000..dde0ce1b698 --- /dev/null +++ src/hg/makeDb/trackDb/human/hg38/wgEncodeGencodeV50.ra @@ -0,0 +1,247 @@ +track wgEncodeGencodeV50 +compositeTrack on +superTrack wgEncodeGencodeSuper pack +shortLabel All GENCODE V50 +longLabel All GENCODE annotations from V50 (Ensembl 116) +group genes +dragAndDrop subTracks +priority 34.155 +visibility pack +subGroup1 view View aGenes=Genes bPolya=PolyA +subGroup2 name Name Basic=Basic Comprehensive=Comprehensive Pseudogenes=Pseudogenes zPolyA=PolyA +allButtonPair on +sortOrder name=+ view=+ +fileSortOrder labVersion=Contents dccAccession=UCSC_Accession +type genePred +configurable off +wgEncodeGencodeVersion 50 +maxTransEnabled on + + track wgEncodeGencodeV50ViewGenes + shortLabel Genes + view aGenes + configurable on + visibility pack + subTrack wgEncodeGencodeV50 + type genePred + idXref wgEncodeGencodeAttrsV50 transcriptId geneId + itemClassTbl wgEncodeGencodeAttrsV50 + itemClassNameColumn transcriptId + itemClassClassColumn transcriptClass + cdsDrawDefault genomic\ codons + baseColorUseCds given + baseColorDefault genomicCodons + geneClasses coding nonCoding pseudo problem + gClass_coding 12,12,120 + gClass_nonCoding 0,153,0 + gClass_pseudo 255,51,255 + gClass_problem 254,0,0 + highlightColor 255,255,0 + # filterBy notes: + # - attrs is an alias for the current wgEncodeGencodeAttrs in the sql + # - transcriptMethod is a pseudo-column name, which is handled explictly in the code + # - attrs.transcriptType are transcript biotypes. This will get the current list of values: + # hgsql -Ne 'select distinct(transcriptType) from wgEncodeGencodeAttrsV50 order by transcriptType' hg38 + # - tag - is s pseudo-column name for join with the tag table. This will get the current list of values: + # hgsql -Ne 'select distinct(tag) from wgEncodeGencodeTagV50 order by tag' hg38 + # - supportLevel is a pseudo-column name handled in the code + filterBy attrs.transcriptClass:Transcript_Class=coding,nonCoding,pseudo,problem \ + transcriptMethod:Transcript_Annotation_Method=manual,automatic,manual_only,automatic_only \ + attrs.transcriptType:Transcript_Biotype=artifact,IG_C_gene,IG_C_pseudogene,IG_D_gene,IG_J_gene,IG_J_pseudogene,IG_pseudogene,IG_V_gene,IG_V_pseudogene,lncRNA,miRNA,misc_RNA,Mt_rRNA,Mt_tRNA,nonsense_mediated_decay,non_stop_decay,processed_pseudogene,processed_transcript,protein_coding,protein_coding_CDS_not_defined,protein_coding_LoF,pseudogene,retained_intron,ribozyme,rRNA,rRNA_pseudogene,scaRNA,snoRNA,snRNA,sRNA,TEC,transcribed_processed_pseudogene,transcribed_unitary_pseudogene,transcribed_unprocessed_pseudogene,translated_processed_pseudogene,TR_C_gene,TR_D_gene,TR_J_gene,TR_J_pseudogene,TR_V_gene,TR_V_pseudogene,unitary_pseudogene,unprocessed_pseudogene,vault_RNA \ + tag:Tag=3_nested_supported_extension,3_standard_supported_extension,454_RNA_Seq_supported,5_nested_supported_extension,5_standard_supported_extension,alternative_3_UTR,alternative_5_UTR,appris_alternative_1,appris_alternative_2,appris_alternative_M,appris_principal_1,appris_principal_2,appris_principal_3,appris_principal_4,appris_principal_5,appris_principal_M,artifactual_duplication,basic,bicistronic,CAGE_supported_TSS,CCDS,cds_end_NF,cds_start_NF,confirm_experimentally,dotter_confirmed,downstream_ATG,Ensembl_canonical,Ensembl_canonical_extended,EnsEMBL_merge_exception,exp_conf,fragmented_locus,fragmented_mixed_strand_locus,GENCODE_Primary,inferred_exon_combination,inferred_transcript_model,low_sequence_quality,MANE_Plus_Clinical,MANE_Select,mRNA_end_NF,mRNA_start_NF,NAGNAG_splice_site,ncRNA_host,nested_454_RNA_Seq_supported,NMD_exception,NMD_likely_if_extended,non_ATG_start,non_canonical_conserved,non_canonical_genome_sequence_error,non_canonical_other,non_canonical_polymorphism,non_canonical_TEC,non_canonical_U12,non_submitted_evidence,not_best_in_genome_evidence,not_organism_supported,overlapping_locus,overlapping_uORF,overlaps_pseudogene,polymorphic_pseudogene_no_stop,precursor_RNA,pseudo_consens,readthrough_gene,readthrough_transcript,reference_genome_error,retained_intron_CDS,retained_intron_final,retained_intron_first,retrogene,RNA_Seq_supported_only,RNA_Seq_supported_partial,RP_supported_TIS,seleno,Selenoprotein,semi_processed,sequence_error,stop_codon_readthrough,TAGENE,upstream_ATG,upstream_uORF \ + supportLevel:Support_Level=tsl1,tsl2,tsl3,tsl4,tsl5,tslNA + highlightBy transcriptMethod:Transcript_Annotation_Method=manual,automatic,manual_only,automatic_only \ + attrs.transcriptType:Transcript_Biotype=artifact,IG_C_gene,IG_C_pseudogene,IG_D_gene,IG_J_gene,IG_J_pseudogene,IG_pseudogene,IG_V_gene,IG_V_pseudogene,lncRNA,miRNA,misc_RNA,Mt_rRNA,Mt_tRNA,nonsense_mediated_decay,non_stop_decay,processed_pseudogene,processed_transcript,protein_coding,protein_coding_CDS_not_defined,protein_coding_LoF,pseudogene,retained_intron,ribozyme,rRNA,rRNA_pseudogene,scaRNA,snoRNA,snRNA,sRNA,TEC,transcribed_processed_pseudogene,transcribed_unitary_pseudogene,transcribed_unprocessed_pseudogene,translated_processed_pseudogene,TR_C_gene,TR_D_gene,TR_J_gene,TR_J_pseudogene,TR_V_gene,TR_V_pseudogene,unitary_pseudogene,unprocessed_pseudogene,vault_RNA \ + tag:Tag=3_nested_supported_extension,3_standard_supported_extension,454_RNA_Seq_supported,5_nested_supported_extension,5_standard_supported_extension,alternative_3_UTR,alternative_5_UTR,appris_alternative_1,appris_alternative_2,appris_alternative_M,appris_principal_1,appris_principal_2,appris_principal_3,appris_principal_4,appris_principal_5,appris_principal_M,artifactual_duplication,basic,bicistronic,CAGE_supported_TSS,CCDS,cds_end_NF,cds_start_NF,confirm_experimentally,dotter_confirmed,downstream_ATG,Ensembl_canonical,Ensembl_canonical_extended,EnsEMBL_merge_exception,exp_conf,fragmented_locus,fragmented_mixed_strand_locus,GENCODE_Primary,inferred_exon_combination,inferred_transcript_model,low_sequence_quality,MANE_Plus_Clinical,MANE_Select,mRNA_end_NF,mRNA_start_NF,NAGNAG_splice_site,ncRNA_host,nested_454_RNA_Seq_supported,NMD_exception,NMD_likely_if_extended,non_ATG_start,non_canonical_conserved,non_canonical_genome_sequence_error,non_canonical_other,non_canonical_polymorphism,non_canonical_TEC,non_canonical_U12,non_submitted_evidence,not_best_in_genome_evidence,not_organism_supported,overlapping_locus,overlapping_uORF,overlaps_pseudogene,polymorphic_pseudogene_no_stop,precursor_RNA,pseudo_consens,readthrough_gene,readthrough_transcript,reference_genome_error,retained_intron_CDS,retained_intron_final,retained_intron_first,retrogene,RNA_Seq_supported_only,RNA_Seq_supported_partial,RP_supported_TIS,seleno,Selenoprotein,semi_processed,sequence_error,stop_codon_readthrough,TAGENE,upstream_ATG,upstream_uORF \ + supportLevel:Support_Level=tsl1,tsl2,tsl3,tsl4,tsl5,tslNA + + track wgEncodeGencodeBasicV50 + trackHandler wgEncodeGencode + subTrack wgEncodeGencodeV50ViewGenes on + shortLabel Basic + subGroups view=aGenes name=Basic + longLabel Basic Gene Annotation Set from GENCODE Version 50 (Ensembl 116) + type genePred + priority 1 + + track wgEncodeGencodeCompV50 + trackHandler wgEncodeGencode + subTrack wgEncodeGencodeV50ViewGenes off + subGroups view=aGenes name=Comprehensive + shortLabel Comprehensive + longLabel Comprehensive Gene Annotation Set from GENCODE Version 50 (Ensembl 116) + type genePred + priority 2 + + track wgEncodeGencodePseudoGeneV50 + trackHandler wgEncodeGencode + subTrack wgEncodeGencodeV50ViewGenes on + subGroups view=aGenes name=Pseudogenes + shortLabel Pseudogenes + longLabel Pseudogene Annotation Set from GENCODE Version 50 (Ensembl 116) + type genePred + color 255,51,255 + priority 3 + + track wgEncodeGencodeV50ViewPolya + shortLabel PolyA + view cPolya + visibility hide + subTrack wgEncodeGencodeV50 + type genePred + configurable off + + track wgEncodeGencodePolyaV50 + trackHandler wgEncodeGencode + subTrack wgEncodeGencodeV50ViewPolya off + subGroups view=bPolya name=zPolyA + shortLabel PolyA + longLabel PolyA Transcript Annotation Set from GENCODE Version 50 (Ensembl 116) + type genePred + color 0,0,0 + priority 5 + +# searches for basic +searchName wgEncodeGencodeBasicV50 +searchTable wgEncodeGencodeBasicV50 +searchMethod prefix +searchType genePred +termRegex ENST[0-9.]+ +searchPriority 2.24701 + +searchName wgEncodeGencodeBasicGeneSymV50 +searchTable wgEncodeGencodeBasicV50 +searchMethod exact +searchType genePred +searchPriority 2.24702 +query select chrom, txStart, txEnd, name2 from %s where name2 like '%s' + +searchName wgEncodeGencodeBasicGeneV50 +searchTable wgEncodeGencodeBasicV50 +searchMethod prefix +searchType genePred +termRegex ENSG[0-9.]+ +searchPriority 2.24703 +xrefTable wgEncodeGencodeAttrsV50 +xrefQuery select transcriptId,geneId from %s where geneId like '%s%%' + +searchName wgEncodeGencodeBasicHavanaTranscriptV50 +searchTable wgEncodeGencodeBasicV50 +searchMethod prefix +searchType genePred +termRegex OTTHUMT[0-9.]+ +searchPriority 2.24704 +xrefTable wgEncodeGencodeAttrsV50 +xrefQuery select transcriptId,havanaTranscriptId from %s where havanaTranscriptId like '%s%%' + +searchName wgEncodeGencodeBasicHavanaGeneV50 +searchTable wgEncodeGencodeBasicV50 +searchMethod prefix +searchType genePred +termRegex OTTHUMG[0-9.]+ +searchPriority 2.24706 +xrefTable wgEncodeGencodeAttrsV50 +xrefQuery select transcriptId,havanaGeneId from %s where havanaGeneId like '%s%%' + +searchName wgEncodeGencodeBasicProtV50 +searchTable wgEncodeGencodeBasicV50 +searchMethod prefix +searchType genePred +termRegex ENSP[0-9.]+ +searchPriority 2.24707 +xrefTable wgEncodeGencodeAttrsV50 +xrefQuery select transcriptId,proteinId from %s where proteinId like '%s%%' + +# searches for comp +searchName wgEncodeGencodeCompV50 +searchTable wgEncodeGencodeCompV50 +searchMethod prefix +searchType genePred +termRegex ENST[0-9.]+ +searchPriority 2.24708 + +searchName wgEncodeGencodeCompGeneSymV50 +searchTable wgEncodeGencodeCompV50 +searchMethod exact +searchType genePred +searchPriority 2.24709 +query select chrom, txStart, txEnd, name2 from %s where name2 like '%s' + +searchName wgEncodeGencodeCompGeneV50 +searchTable wgEncodeGencodeCompV50 +searchMethod prefix +searchType genePred +termRegex ENSG[0-9.]+ +searchPriority 2.24710 +xrefTable wgEncodeGencodeAttrsV50 +xrefQuery select transcriptId,geneId from %s where geneId like '%s%%' + +searchName wgEncodeGencodeCompHavanaTranscriptV50 +searchTable wgEncodeGencodeCompV50 +searchMethod prefix +searchType genePred +termRegex OTTHUMT[0-9.]+ +searchPriority 2.24711 +xrefTable wgEncodeGencodeAttrsV50 +xrefQuery select transcriptId,havanaTranscriptId from %s where havanaTranscriptId like '%s%%' + +searchName wgEncodeGencodeCompHavanaGeneV50 +searchTable wgEncodeGencodeCompV50 +searchMethod prefix +searchType genePred +termRegex OTTHUMG[0-9.]+ +searchPriority 2.24712 +xrefTable wgEncodeGencodeAttrsV50 +xrefQuery select transcriptId,havanaGeneId from %s where havanaGeneId like '%s%%' + +searchName wgEncodeGencodeCompProtV50 +searchTable wgEncodeGencodeCompV50 +searchMethod prefix +searchType genePred +termRegex ENSP[0-9.]+ +searchPriority 2.24713 +xrefTable wgEncodeGencodeAttrsV50 +xrefQuery select transcriptId,proteinId from %s where proteinId like '%s%%' + +# searches for pseudogene +searchName wgEncodeGencodePseudoGeneV50 +searchTable wgEncodeGencodePseudoGeneV50 +searchMethod prefix +searchType genePred +termRegex ENST[0-9.]+ +searchPriority 2.24714 + +searchName wgEncodeGencodePseudoGeneGeneSymV50 +searchTable wgEncodeGencodePseudoGeneV50 +searchMethod exact +searchType genePred +searchPriority 2.24715 +query select chrom, txStart, txEnd, name2 from %s where name2 like '%s' + +searchName wgEncodeGencodePseudoGeneGeneV50 +searchTable wgEncodeGencodePseudoGeneV50 +searchMethod prefix +searchType genePred +termRegex ENSG[0-9.]+ +searchPriority 2.24716 +xrefTable wgEncodeGencodeAttrsV50 +xrefQuery select transcriptId,geneId from %s where geneId like '%s%%' + +searchName wgEncodeGencodePseudoGeneHavanaTranscriptV50 +searchTable wgEncodeGencodePseudoGeneV50 +searchMethod prefix +searchType genePred +termRegex OTTHUMT[0-9.]+ +searchPriority 2.24717 +xrefTable wgEncodeGencodeAttrsV50 +xrefQuery select transcriptId,havanaTranscriptId from %s where havanaTranscriptId like '%s%%' + +searchName wgEncodeGencodePseudoGeneHavanaGeneV50 +searchTable wgEncodeGencodePseudoGeneV50 +searchMethod prefix +searchType genePred +termRegex OTTHUMG[0-9.]+ +searchPriority 2.24718 +xrefTable wgEncodeGencodeAttrsV50 +xrefQuery select transcriptId,havanaGeneId from %s where havanaGeneId like '%s%%' +