9b6958624673a00fdbe8447b7167e08301fd84bb baertsch Fri May 20 13:31:58 2011 -0700 retro track changes to add ucscRetroOrtho table and hgc, hgTrackUI changes. diff --git src/hg/hgc/retroClick.c src/hg/hgc/retroClick.c index f7ceadc..527fabc 100644 --- src/hg/hgc/retroClick.c +++ src/hg/hgc/retroClick.c @@ -1,46 +1,47 @@ /* retroClick - retroGene click handling */ /* * Tables associated with retroGenes: * retroXxxInfo - features of retroGene * retroXxxAli - mapped PSLs aligments * retroXxxAliGene - mapped alignmetns with CDS and frame annotation * * Xxx is: * - Mrna - GenBank mRNAs and refSeq */ #include "common.h" #include "hgc.h" #include "retroClick.h" -#include "retroMrnaInfo.h" +#include "ucscRetroInfo.h" +#include "ucscRetroOrtho.h" #include "genbank.h" #include "hui.h" /* combine blocks separated by gaps less than this number */ #define MAXBLOCKGAP 50 /* space to allocate for a id */ #define ID_BUFSZ 64 struct mappingInfo /* various pieces of information about mapping from table name and * retroXxxInfo table */ { char tblPre[64]; /* table prefix */ char geneSet[6]; /* source gene set abbrv used in table name */ - struct retroMrnaInfo *pg; /* general info for retro gene */ + struct ucscRetroInfo *pg; /* general info for retro gene */ boolean indirect; /* an indirect mapping */ char gbAcc[ID_BUFSZ]; /* src accession */ short gbVer; /* version from gbId */ char seqId[ID_BUFSZ]; /* id used to look up sequence, different than * srcAcc if multiple levels of mappings have * been done */ char suffix[ID_BUFSZ]; char *sym; /* src gene symbol and desc */ char *desc; short gbCurVer; /* version from genbank table */ }; static void parseSrcId(struct mappingInfo *mi) /* parse srcId parts and save in mi */ { @@ -137,51 +138,51 @@ } if (suffix != NULL) { suffix +=suffixLen; safef(mi->suffix,ID_BUFSZ,"%s",suffix); } preLen = strlen(mi->tblPre); if (startsWith("retroAugust", tbl)) strcpy(mi->geneSet, "August"); else if (startsWith("retro", tbl)) strcpy(mi->geneSet, "Mrna"); else strcpy(mi->geneSet, "Retro"); if (suffix != NULL && strlen(suffix) > 0) - mi->pg = sqlQueryObjs(conn, (sqlLoadFunc)retroMrnaInfoLoad, sqlQueryMust|sqlQuerySingle, + mi->pg = sqlQueryObjs(conn, (sqlLoadFunc)ucscRetroInfoLoad, sqlQueryMust|sqlQuerySingle, "select * from %s%sInfo%s where name='%s'", mi->tblPre, mi->geneSet, suffix, mappedId); else { - mi->pg = sqlQueryObjs(conn, (sqlLoadFunc)retroMrnaInfoLoad, sqlQueryMust|sqlQuerySingle, + mi->pg = sqlQueryObjs(conn, (sqlLoadFunc)ucscRetroInfoLoad, sqlQueryMust|sqlQuerySingle, "select * from %s%sInfo where name='%s'", mi->tblPre, mi->geneSet, mappedId); } parseSrcId(mi); getGenbankInfo(conn, mi); return mi; } static void mappingInfoFree(struct mappingInfo **mip) /* free mappingInfo object */ { struct mappingInfo *mi = *mip; if (mi != NULL) { - retroMrnaInfoFree(&mi->pg); + ucscRetroInfoFree(&mi->pg); freeMem(mi->sym); freeMem(mi->desc); } } static void displaySrcGene(struct sqlConnection *conn, struct mappingInfo *mi) /* display information about the source gene that was mapped */ { char srcGeneUrl[1024]; /* description will be NULL if deleted */ if (!startsWith("retroAugust",mi->geneSet)) getGenbankInfo(conn, mi); /* construct URL to browser */ @@ -193,50 +194,61 @@ printf("Source gene\n"); printf("\n"); printf("%s", srcGeneUrl, mi->pg->name); if (mi->desc == NULL) printf(" gene no longer in source database"); else printf("%s%s", mi->sym, mi->desc); printf("\n"); printf("\n"); } static void displayRetroDetails(struct sqlConnection *conn, struct mappingInfo *mi) /* display information from a retroXXXInfo table */ { -struct retroMrnaInfo *pg = mi->pg; +struct ucscRetroInfo *pg = mi->pg; +char query[256]; +char orthoTable[128]; +if (mi->suffix != NULL && strlen(mi->suffix) > 0) + safef(orthoTable, sizeof(orthoTable), "%s%sOrtho%s", + mi->tblPre, mi->geneSet, mi->suffix); +else + safef(orthoTable, sizeof(orthoTable), "%s%sOrtho", + mi->tblPre, mi->geneSet); + printf("\n"); printf("\n"); printf("\n"); printf("\n"); -if (sameString(organism,"Human")) - printf("\n", pg->overlapMouse); -if (sameString(organism,"Human")) - printf("\n", pg->overlapDog); -if (sameString(organism,"Human")) - printf("\n", puro->overlap); + } + sqlFreeResult(&sr); + } else - printf("\n", pg->overlapRhesus); + printf("", orthoTable); printf("
Orthology (net) BreakCoverage %%
Mouse "); -else - printf("
Dog "); -printf("%d
Dog "); -else - printf("
Rat "); -printf("%d
Rhesus "); +if (hTableExists(database, orthoTable)) + { + struct sqlResult *sr; + char **row; + safef(query, sizeof(query), "select * from %s where name = '%s' ", + orthoTable, pg->name); + sr = sqlGetResult(conn, query); + while ((row = sqlNextRow(sr)) != NULL) + { + struct ucscRetroOrtho *puro = ucscRetroOrthoLoad(row); + printf("
%s ", puro->db); + printf("%d
Human "); -printf("%d
table %s not found
\n"); } static struct psl *loadPslRangeT(char *table, char *qName, char *tName, int tStart, int tEnd) /* Load a list of psls given qName tName tStart tEnd */ { struct sqlResult *sr = NULL; char **row; struct psl *psl = NULL, *pslList = NULL; boolean hasBin; char splitTable[64]; char query[256]; struct sqlConnection *conn = hAllocConn(database); hFindSplitTable(database, seqName, table, splitTable, &hasBin); @@ -385,31 +397,31 @@ psl->nCount = 0; psl->qNumInsert = gp->exonCount; psl->tNumInsert = 0; psl->tBaseInsert = 0; psl->qStart = 0; psl->qEnd = psl->qSize; psl->tSize = targetSize; psl->tStart = gp->txStart; psl->tEnd = gp->txEnd; psl->blockCount = gp->exonCount; return psl; } struct psl *getParentAligns(struct sqlConnection *conn, struct mappingInfo *mi, char **table) { -struct retroMrnaInfo *pg = mi->pg; +struct ucscRetroInfo *pg = mi->pg; struct psl *pslList = NULL; char query[512]; if (startsWith("August",mi->geneSet)) { if (hTableExists(database, "augustusXAli")) { *table = cloneString( "augustusXAli"); pslList = loadPslRangeT(*table, mi->seqId, pg->gChrom, pg->gStart, pg->gEnd); } else if (hTableExists(database, "augustusX")) { struct sqlResult *sr; char **row; int targetSize = 0; *table = cloneString( "augustusX"); @@ -440,68 +452,67 @@ *dotPtr = '\0'; pslList = loadPslRangeT(*table, mi->gbAcc, pg->gChrom, pg->gStart, pg->gEnd); if (pslList == NULL) { *table = cloneString( "refSeqAli"); pslList = loadPslRangeT(*table, mi->gbAcc, pg->gChrom, pg->gStart, pg->gEnd); } } else printf("no all_mrna table found
\n"); return pslList; } static void displayParentAligns(struct mappingInfo *mi, struct psl *pslList, char *table) { -struct retroMrnaInfo *pg = mi->pg; +struct ucscRetroInfo *pg = mi->pg; if (pslList != NULL && *table ) { printf("

Parent Locus/Parent mRNA Alignments

"); printAlignments(pslList, pslList->tStart, "htcCdnaAli", table, \ mi->gbAcc); } else printf("missing alignment %s chr %s:%d-%d from table %s
\n", mi->gbAcc, pg->gChrom, pg->gStart, pg->gEnd, table); } /* return count of coding exons */ int genePredcountCdsExons(struct genePred *gp) { int i; int count = 0; for (i=0; i<(gp->exonCount); i++) { if ( (gp->cdsStart <= gp->exonEnds[i]) && (gp->cdsEnd >= gp->exonStarts[i]) ) count++; } return count; } static void displayMappingInfo(struct sqlConnection *conn, struct mappingInfo *mi) /* display information from a transMap table */ { -struct retroMrnaInfo *pg = mi->pg; -int overlapOrtholog = max(pg->overlapMouse, pg->overlapDog); +struct ucscRetroInfo *pg = mi->pg; double wt[12]; /* weights on score function*/ char query[512]; char *name; char alignTbl[128]; struct psl *psl; float coverFactor = 0; -float maxOverlap = 0, rawScore = 0; +float maxOverlap = 0; if (mi->suffix == NULL) safef(alignTbl, sizeof(alignTbl), "%s%sAli", mi->tblPre, mi->geneSet); else safef(alignTbl, sizeof(alignTbl), "%s%sAli%s", mi->tblPre, mi->geneSet, mi->suffix); printf("\n"); printf("\n"); printf("\n"); printf("\n"); printf("\n"); if (sameString(pg->type, "singleExon")) printf("\n",pg->type); else printf("\n",pg->type); printf("\n", pg->score, @@ -511,41 +522,30 @@ printf("\n", pg->conservedSpliceSites); printf("\n", pg->parentSpliceCount); psl = getAlignments(conn, alignTbl, mi->pg->name); if (psl != NULL) { maxOverlap = (float)pg->maxOverlap/(float)(psl->match+psl->misMatch+psl->repMatch) ; coverFactor = ((float)(psl->qSize-psl->qEnd)/(float)psl->qSize); } else { maxOverlap = 0; } wt[0] = 0; wt[1] = 0.85; wt[2] = 0.2; wt[3] = 0.3; wt[4] = 0.8; wt[5] = 1; wt[6] = 1 ; wt[7] = 0.5; wt[8] = 0.5; wt[9] = 1; wt[10] = 1; -rawScore = wt[0]*pg->milliBad+ - wt[1]*(log(pg->exonCover+1)/log(2))*200 + - wt[2]*(((log(pg->axtScore>0?pg->axtScore:1)/log(2))*170)-1000)+ - wt[3]*(log(pg->polyAlen+2)*200) + - wt[4]*overlapOrtholog*10 + - wt[5]*(((log(pg->processedIntrons > 0 ? pg->processedIntrons : 1))/log(2))*600) + - (float)wt[6]*pow(pg->intronCount,0.5)*750 + - (float)wt[7]*(maxOverlap*300)+ - wt[8]*((pg->coverage/100.0)*(1.0-coverFactor)*300.0)+ - wt[9]*(pg->tReps*10)+ - wt[10]*pg->oldIntronCount; #ifdef debug char table[512]; struct psl *pslList = getParentAligns(conn, mi, &table); if (psl != NULL) { printf("\n"); } if (pslList != NULL) { printf("\n"); pslFreeList(&pslList); @@ -569,41 +569,41 @@ wt[1]*(log(pg->exonCover+1)/log(2))*200 , pg->conservedSpliceSites, wt[2]*(((log(pg->axtScore>0?pg->axtScore:1)/log(2))*170)-1000), wt[3]*(log(pg->polyAlen+2)*200) , wt[4]*overlapOrtholog*10 , pg->overlapMouse, pg->overlapDog, pg->processedIntrons, wt[5]*(((log(pg->processedIntrons > 0 ? pg->processedIntrons : 1))/log(2))*600) , pg->intronCount, wt[6]*pow(pg->intronCount,0.5)*750 , wt[7]*(maxOverlap*300), pg->coverage, pg->qEnd, pg->qSize , pg->qSize, wt[8]*((pg->coverage/100.0)*(1.0-coverFactor)*300.0), wt[9]*(pg->tReps*10), pg->oldIntronCount, wt[10]*pg->oldIntronCount); -printf("\n", +printf("\n", wt[1]*(log(pg->exonCover+1)/log(2))*200 , wt[2]*(((log(pg->axtScore>0?pg->axtScore:1)/log(2))*170)-1000), wt[3]*(log(pg->polyAlen+2)*200) , wt[4]*overlapOrtholog*10 , wt[5]*(((log(pg->processedIntrons > 0 ? pg->processedIntrons : 1))/log(2))*600) , (float)wt[6]*pow(pg->intronCount,0.5)*750 , (float)wt[7]*(maxOverlap*300), wt[8]*((pg->coverage/100.0)*(1.0-coverFactor)*300.0), wt[9]*(pg->tReps*10), - wt[10]*pg->oldIntronCount, rawScore , rawScore/3.0); + wt[10]*pg->oldIntronCount); if (pg->kaku > 0 && pg->kaku < 1000000) printf("\n", pg->kaku); #endif #ifdef xxx safef(query, sizeof(query), "select * from refGene where chrom = '%d' and txEnd > %d and txStart %d and name = '%s'", pg->chrom, pg->gStart, pg->gEnd , pg->overName ); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) overlappingGene = genePredLoad(row); if (overlappingGene != NULL) { printf ("CDS exons %d ",genePredcountCdsExons(overlappingGene)); } #endif
Retrogene stats
FeatureValue
Type of Parent%s
Expression of Retrocopy%s
Score %d (range from 0 - %d)
Possible Introns (or gaps) in Retro%d + %d\n", pg->intronCount, pg->oldIntronCount); printf("
Conserved Splice Sites%d
Parent Splice Sites%d
Blocks in retro:gap%%/intronsSpliced \n"); printBlocks(psl, MAXBLOCKGAP, pslList); printf("
Exons in parent:gap%% \n"); printBlocks(pslList, MAXBLOCKGAP, NULL); printf("
score function%4.1f+ %4.1f+ %4.1f+ %4.1f+ %4.1f - %4.1f - %4.1f+ %4.1f - %4.1f - %4.1f=%4.1f %4.1f
score function%4.1f+ %4.1f+ %4.1f+ %4.1f+ %4.1f - %4.1f - %4.1f+ %4.1f - %4.1f - %4.1f
KA/KU mutation rate in non-syn sites vs utr with repect to parent gene%4.2f