9b6958624673a00fdbe8447b7167e08301fd84bb
baertsch
Fri May 20 13:31:58 2011 -0700
retro track changes to add ucscRetroOrtho table and hgc, hgTrackUI changes.
diff --git src/hg/hgc/retroClick.c src/hg/hgc/retroClick.c
index f7ceadc..527fabc 100644
--- src/hg/hgc/retroClick.c
+++ src/hg/hgc/retroClick.c
@@ -1,46 +1,47 @@
/* retroClick - retroGene click handling */
/*
* Tables associated with retroGenes:
* retroXxxInfo - features of retroGene
* retroXxxAli - mapped PSLs aligments
* retroXxxAliGene - mapped alignmetns with CDS and frame annotation
*
* Xxx is:
* - Mrna - GenBank mRNAs and refSeq
*/
#include "common.h"
#include "hgc.h"
#include "retroClick.h"
-#include "retroMrnaInfo.h"
+#include "ucscRetroInfo.h"
+#include "ucscRetroOrtho.h"
#include "genbank.h"
#include "hui.h"
/* combine blocks separated by gaps less than this number */
#define MAXBLOCKGAP 50
/* space to allocate for a id */
#define ID_BUFSZ 64
struct mappingInfo
/* various pieces of information about mapping from table name and
* retroXxxInfo table */
{
char tblPre[64]; /* table prefix */
char geneSet[6]; /* source gene set abbrv used in table name */
- struct retroMrnaInfo *pg; /* general info for retro gene */
+ struct ucscRetroInfo *pg; /* general info for retro gene */
boolean indirect; /* an indirect mapping */
char gbAcc[ID_BUFSZ]; /* src accession */
short gbVer; /* version from gbId */
char seqId[ID_BUFSZ]; /* id used to look up sequence, different than
* srcAcc if multiple levels of mappings have
* been done */
char suffix[ID_BUFSZ];
char *sym; /* src gene symbol and desc */
char *desc;
short gbCurVer; /* version from genbank table */
};
static void parseSrcId(struct mappingInfo *mi)
/* parse srcId parts and save in mi */
{
@@ -137,51 +138,51 @@
}
if (suffix != NULL)
{
suffix +=suffixLen;
safef(mi->suffix,ID_BUFSZ,"%s",suffix);
}
preLen = strlen(mi->tblPre);
if (startsWith("retroAugust", tbl))
strcpy(mi->geneSet, "August");
else if (startsWith("retro", tbl))
strcpy(mi->geneSet, "Mrna");
else
strcpy(mi->geneSet, "Retro");
if (suffix != NULL && strlen(suffix) > 0)
- mi->pg = sqlQueryObjs(conn, (sqlLoadFunc)retroMrnaInfoLoad, sqlQueryMust|sqlQuerySingle,
+ mi->pg = sqlQueryObjs(conn, (sqlLoadFunc)ucscRetroInfoLoad, sqlQueryMust|sqlQuerySingle,
"select * from %s%sInfo%s where name='%s'", mi->tblPre, mi->geneSet, suffix,
mappedId);
else
{
- mi->pg = sqlQueryObjs(conn, (sqlLoadFunc)retroMrnaInfoLoad, sqlQueryMust|sqlQuerySingle,
+ mi->pg = sqlQueryObjs(conn, (sqlLoadFunc)ucscRetroInfoLoad, sqlQueryMust|sqlQuerySingle,
"select * from %s%sInfo where name='%s'", mi->tblPre, mi->geneSet,
mappedId);
}
parseSrcId(mi);
getGenbankInfo(conn, mi);
return mi;
}
static void mappingInfoFree(struct mappingInfo **mip)
/* free mappingInfo object */
{
struct mappingInfo *mi = *mip;
if (mi != NULL)
{
- retroMrnaInfoFree(&mi->pg);
+ ucscRetroInfoFree(&mi->pg);
freeMem(mi->sym);
freeMem(mi->desc);
}
}
static void displaySrcGene(struct sqlConnection *conn, struct mappingInfo *mi)
/* display information about the source gene that was mapped */
{
char srcGeneUrl[1024];
/* description will be NULL if deleted */
if (!startsWith("retroAugust",mi->geneSet))
getGenbankInfo(conn, mi);
/* construct URL to browser */
@@ -193,50 +194,61 @@
printf("
Source gene\n");
printf("\n");
printf("%s", srcGeneUrl, mi->pg->name);
if (mi->desc == NULL)
printf(" | | gene no longer in source database");
else
printf(" | %s | %s", mi->sym, mi->desc);
printf("\n");
printf(" | \n");
}
static void displayRetroDetails(struct sqlConnection *conn, struct mappingInfo *mi)
/* display information from a retroXXXInfo table */
{
-struct retroMrnaInfo *pg = mi->pg;
+struct ucscRetroInfo *pg = mi->pg;
+char query[256];
+char orthoTable[128];
+if (mi->suffix != NULL && strlen(mi->suffix) > 0)
+ safef(orthoTable, sizeof(orthoTable), "%s%sOrtho%s",
+ mi->tblPre, mi->geneSet, mi->suffix);
+else
+ safef(orthoTable, sizeof(orthoTable), "%s%sOrtho",
+ mi->tblPre, mi->geneSet);
+
printf("\n");
printf("\n");
printf("Orthology (net) Break | Coverage %% |
\n");
printf("\n");
-if (sameString(organism,"Human"))
- printf("Mouse ");
-else
- printf(" |
---|
Dog ");
-printf(" | %d |
\n", pg->overlapMouse);
-if (sameString(organism,"Human"))
- printf("Dog ");
-else
- printf(" |
---|
Rat ");
-printf(" | %d |
\n", pg->overlapDog);
-if (sameString(organism,"Human"))
- printf("Rhesus ");
+if (hTableExists(database, orthoTable))
+ {
+ struct sqlResult *sr;
+ char **row;
+ safef(query, sizeof(query), "select * from %s where name = '%s' ",
+ orthoTable, pg->name);
+ sr = sqlGetResult(conn, query);
+ while ((row = sqlNextRow(sr)) != NULL)
+ {
+ struct ucscRetroOrtho *puro = ucscRetroOrthoLoad(row);
+ printf(" |
---|
%s ", puro->db);
+ printf(" | %d |
\n", puro->overlap);
+ }
+ sqlFreeResult(&sr);
+ }
else
- printf("Human ");
-printf(" | %d |
\n", pg->overlapRhesus);
+ printf("table %s not found |
", orthoTable);
printf("
\n");
}
static struct psl *loadPslRangeT(char *table, char *qName, char *tName, int tStart, int tEnd)
/* Load a list of psls given qName tName tStart tEnd */
{
struct sqlResult *sr = NULL;
char **row;
struct psl *psl = NULL, *pslList = NULL;
boolean hasBin;
char splitTable[64];
char query[256];
struct sqlConnection *conn = hAllocConn(database);
hFindSplitTable(database, seqName, table, splitTable, &hasBin);
@@ -385,31 +397,31 @@
psl->nCount = 0;
psl->qNumInsert = gp->exonCount;
psl->tNumInsert = 0;
psl->tBaseInsert = 0;
psl->qStart = 0;
psl->qEnd = psl->qSize;
psl->tSize = targetSize;
psl->tStart = gp->txStart;
psl->tEnd = gp->txEnd;
psl->blockCount = gp->exonCount;
return psl;
}
struct psl *getParentAligns(struct sqlConnection *conn, struct mappingInfo *mi, char **table)
{
-struct retroMrnaInfo *pg = mi->pg;
+struct ucscRetroInfo *pg = mi->pg;
struct psl *pslList = NULL;
char query[512];
if (startsWith("August",mi->geneSet))
{
if (hTableExists(database, "augustusXAli"))
{
*table = cloneString( "augustusXAli");
pslList = loadPslRangeT(*table, mi->seqId, pg->gChrom, pg->gStart, pg->gEnd);
}
else if (hTableExists(database, "augustusX"))
{
struct sqlResult *sr;
char **row;
int targetSize = 0;
*table = cloneString( "augustusX");
@@ -440,68 +452,67 @@
*dotPtr = '\0';
pslList = loadPslRangeT(*table, mi->gbAcc, pg->gChrom, pg->gStart, pg->gEnd);
if (pslList == NULL)
{
*table = cloneString( "refSeqAli");
pslList = loadPslRangeT(*table, mi->gbAcc, pg->gChrom, pg->gStart, pg->gEnd);
}
}
else
printf("no all_mrna table found
\n");
return pslList;
}
static void displayParentAligns(struct mappingInfo *mi, struct psl *pslList, char *table)
{
-struct retroMrnaInfo *pg = mi->pg;
+struct ucscRetroInfo *pg = mi->pg;
if (pslList != NULL && *table )
{
printf("Parent Locus/Parent mRNA Alignments
");
printAlignments(pslList, pslList->tStart, "htcCdnaAli", table, \
mi->gbAcc);
}
else
printf("missing alignment %s chr %s:%d-%d from table %s
\n",
mi->gbAcc, pg->gChrom, pg->gStart, pg->gEnd, table);
}
/* return count of coding exons */
int genePredcountCdsExons(struct genePred *gp)
{
int i;
int count = 0;
for (i=0; i<(gp->exonCount); i++)
{
if ( (gp->cdsStart <= gp->exonEnds[i]) &&
(gp->cdsEnd >= gp->exonStarts[i]) )
count++;
}
return count;
}
static void displayMappingInfo(struct sqlConnection *conn, struct mappingInfo *mi)
/* display information from a transMap table */
{
-struct retroMrnaInfo *pg = mi->pg;
-int overlapOrtholog = max(pg->overlapMouse, pg->overlapDog);
+struct ucscRetroInfo *pg = mi->pg;
double wt[12]; /* weights on score function*/
char query[512];
char *name;
char alignTbl[128];
struct psl *psl;
float coverFactor = 0;
-float maxOverlap = 0, rawScore = 0;
+float maxOverlap = 0;
if (mi->suffix == NULL)
safef(alignTbl, sizeof(alignTbl), "%s%sAli", mi->tblPre, mi->geneSet);
else
safef(alignTbl, sizeof(alignTbl), "%s%sAli%s", mi->tblPre, mi->geneSet, mi->suffix);
printf("\n");
printf("Retrogene stats\n");
printf("\n");
printf("Feature | Value |
\n");
printf("\n");
if (sameString(pg->type, "singleExon"))
printf("Type of Parent | %s |
\n",pg->type);
else
printf("Expression of Retrocopy | %s |
\n",pg->type);
printf("Score | %d (range from 0 - %d) |
\n",
pg->score,
@@ -511,41 +522,30 @@
printf("Possible Introns (or gaps) in Retro | %d + %d\n", pg->intronCount, pg->oldIntronCount);
printf(" |
---|
Conserved Splice Sites | %d |
\n", pg->conservedSpliceSites);
printf("Parent Splice Sites | %d |
\n", pg->parentSpliceCount);
psl = getAlignments(conn, alignTbl, mi->pg->name);
if (psl != NULL)
{
maxOverlap = (float)pg->maxOverlap/(float)(psl->match+psl->misMatch+psl->repMatch) ;
coverFactor = ((float)(psl->qSize-psl->qEnd)/(float)psl->qSize);
}
else
{
maxOverlap = 0;
}
wt[0] = 0; wt[1] = 0.85; wt[2] = 0.2; wt[3] = 0.3; wt[4] = 0.8;
wt[5] = 1; wt[6] = 1 ; wt[7] = 0.5; wt[8] = 0.5; wt[9] = 1; wt[10] = 1;
-rawScore = wt[0]*pg->milliBad+
- wt[1]*(log(pg->exonCover+1)/log(2))*200 +
- wt[2]*(((log(pg->axtScore>0?pg->axtScore:1)/log(2))*170)-1000)+
- wt[3]*(log(pg->polyAlen+2)*200) +
- wt[4]*overlapOrtholog*10 +
- wt[5]*(((log(pg->processedIntrons > 0 ? pg->processedIntrons : 1))/log(2))*600) +
- (float)wt[6]*pow(pg->intronCount,0.5)*750 +
- (float)wt[7]*(maxOverlap*300)+
- wt[8]*((pg->coverage/100.0)*(1.0-coverFactor)*300.0)+
- wt[9]*(pg->tReps*10)+
- wt[10]*pg->oldIntronCount;
#ifdef debug
char table[512];
struct psl *pslList = getParentAligns(conn, mi, &table);
if (psl != NULL)
{
printf("Blocks in retro:gap%%/intronsSpliced | \n");
printBlocks(psl, MAXBLOCKGAP, pslList);
printf(" |
\n");
}
if (pslList != NULL)
{
printf("Exons in parent:gap%% | \n");
printBlocks(pslList, MAXBLOCKGAP, NULL);
printf(" |
\n");
pslFreeList(&pslList);
@@ -569,41 +569,41 @@
wt[1]*(log(pg->exonCover+1)/log(2))*200 ,
pg->conservedSpliceSites,
wt[2]*(((log(pg->axtScore>0?pg->axtScore:1)/log(2))*170)-1000),
wt[3]*(log(pg->polyAlen+2)*200) ,
wt[4]*overlapOrtholog*10 , pg->overlapMouse, pg->overlapDog,
pg->processedIntrons,
wt[5]*(((log(pg->processedIntrons > 0 ? pg->processedIntrons : 1))/log(2))*600) ,
pg->intronCount,
wt[6]*pow(pg->intronCount,0.5)*750 ,
wt[7]*(maxOverlap*300),
pg->coverage, pg->qEnd, pg->qSize , pg->qSize,
wt[8]*((pg->coverage/100.0)*(1.0-coverFactor)*300.0),
wt[9]*(pg->tReps*10),
pg->oldIntronCount,
wt[10]*pg->oldIntronCount);
-printf("score function | %4.1f+ %4.1f+ %4.1f+ %4.1f+ %4.1f - %4.1f - %4.1f+ %4.1f - %4.1f - %4.1f=%4.1f %4.1f |
\n",
+printf("score function | %4.1f+ %4.1f+ %4.1f+ %4.1f+ %4.1f - %4.1f - %4.1f+ %4.1f - %4.1f - %4.1f |
\n",
wt[1]*(log(pg->exonCover+1)/log(2))*200 ,
wt[2]*(((log(pg->axtScore>0?pg->axtScore:1)/log(2))*170)-1000),
wt[3]*(log(pg->polyAlen+2)*200) ,
wt[4]*overlapOrtholog*10 ,
wt[5]*(((log(pg->processedIntrons > 0 ? pg->processedIntrons : 1))/log(2))*600) ,
(float)wt[6]*pow(pg->intronCount,0.5)*750 ,
(float)wt[7]*(maxOverlap*300),
wt[8]*((pg->coverage/100.0)*(1.0-coverFactor)*300.0),
wt[9]*(pg->tReps*10),
- wt[10]*pg->oldIntronCount, rawScore , rawScore/3.0);
+ wt[10]*pg->oldIntronCount);
if (pg->kaku > 0 && pg->kaku < 1000000)
printf("KA/KU mutation rate in non-syn sites vs utr with repect to parent gene | %4.2f |
\n", pg->kaku);
#endif
#ifdef xxx
safef(query, sizeof(query), "select * from refGene where chrom = '%d' and txEnd > %d and txStart %d and name = '%s'",
pg->chrom, pg->gStart, pg->gEnd , pg->overName );
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
overlappingGene = genePredLoad(row);
if (overlappingGene != NULL)
{
printf ("CDS exons %d ",genePredcountCdsExons(overlappingGene));
}
#endif