src/hg/utils/otto/uniprot/bigPslUniprot.as 1a36012f9f2eb9d087e503b1fd2c37f3a9adedfd

1a36012f9f2eb9d087e503b1fd2c37f3a9adedfd
max
  Fri Nov 26 08:11:01 2021 -0800
A major update for the UniProt otto job, refs #28560

diff --git src/hg/utils/otto/uniprot/bigPslUniprot.as src/hg/utils/otto/uniprot/bigPslUniprot.as
index 308024b..cb01cfd 100644
--- src/hg/utils/otto/uniprot/bigPslUniprot.as
+++ src/hg/utils/otto/uniprot/bigPslUniprot.as
@@ -1,57 +1,58 @@
 table bigPsl
 "bigPsl pairwise alignment"
     (
     string chrom;       "Reference sequence chromosome or scaffold"
     uint   chromStart;  "Start position in chromosome"
     uint   chromEnd;    "End position in chromosome"
-    string name;        "Name or ID of item, ideally both human readable and unique"
+    string name;        "UniProt isoform seq. ID"
     uint score;         "Score (0-1000)"
     char[1] strand;     "+ or - indicates whether the query aligns to the + or - strand on the reference"
     uint thickStart;    "Start of where display should be thick (start codon)"
     uint thickEnd;      "End of where display should be thick (stop codon)"
     uint reserved;       "RGB value (use R,G,B string in input file)"
     int blockCount;     "Number of blocks"
     int[blockCount] blockSizes; "Comma separated list of block sizes"
     int[blockCount] chromStarts; "Start positions relative to chromStart"
 
     uint    oChromStart;"Start position in other chromosome"
     uint    oChromEnd;  "End position in other chromosome"
     char[1] oStrand;    "+ or -, - means that psl was reversed into BED-compatible coordinates" 
     uint    oChromSize; "Size of other chromosome."
     int[blockCount] oChromStarts; "Start positions relative to oChromStart or from oChromStart+oChromSize depending on strand"
 
     lstring  oSequence;  "Sequence on other chrom (or edit list, or empty)"
     string   oCDS;       "CDS in NCBI format"
 
     uint    chromSize;"Size of target chromosome"
 
     uint match;        "Number of bases matched."
     uint misMatch; " Number of bases that don't match "
     uint repMatch; " Number of bases that match but are part of repeats "
     uint nCount;   " Number of 'N' bases "
     uint seqType;    "0=empty, 1=nucleotide, 2=amino_acid"
 
-    string acc; "UniProt main accession"
-    lstring uniprotName; "UniProt main record name"
+    lstring transList; "Mapped to genome through these transcripts"
+    string acc; "UniProt record accession"
+    lstring uniprotName; "UniProt record name"
     string status; "UniProt status"
-    lstring accList; "UniProt all accessions"
-    lstring isoIds; "UniProt isoform accessions"
+    lstring accList; "UniProt previous and alternative accessions"
+    lstring isoIds; "All UniProt sequence isoform accessions"
 
     lstring protFullNames; "UniProt protein name"
     lstring protShortNames; "UniProt protein short name"
     lstring protAltFullNames; "UniProt alternative names"
     lstring protAltShortNames; "UniProt alternative short names"
     lstring geneName; "UniProt gene name"
     lstring geneSynonyms; "UniProt gene synonyms"
     lstring functionText; "UniProt function"
 
     lstring hgncSym; "HGNC Gene Symbol"
-    lstring hgncId; "HGNC ID"
-    lstring refSeq; "RefSeq IDs"
+    lstring hgncId; "HGNC IDs"
+    lstring refSeq; "RefSeq Transcript IDs"
     lstring refSeqProt; "RefSeq Protein IDs"
-    lstring entrezGene; "NCBI Entrez Gene"
+    lstring entrezGene; "NCBI Gene IDs"
     lstring ensGene; "Ensembl Gene IDs"
-    lstring ensTrans; "Ensembl Transcript IDs"
     lstring ensProt; "Ensembl Protein IDs"
+    lstring ensTrans; "Ensembl Transcript IDs"
     )