a44421a79fb36cc2036fe116b97ea3bc9590cd0c braney Fri Dec 2 09:34:39 2011 -0800 removed rcsid (#295) diff --git src/hg/lib/altGraphX.c src/hg/lib/altGraphX.c index b7b950f..8362dac 100644 --- src/hg/lib/altGraphX.c +++ src/hg/lib/altGraphX.c @@ -1,2467 +1,2466 @@ /* altGraphX.c was originally generated by the autoSql program, which also * generated altGraphX.h and altGraphX.sql. This module links the database and * the RAM representation of objects. */ #include "common.h" #include "linefile.h" #include "dystring.h" #include "jksql.h" #include "altGraphX.h" #include "geneGraph.h" #include "bed.h" -static char const rcsid[] = "$Id: altGraphX.c,v 1.37 2009/08/13 03:08:30 markd Exp $"; struct altGraphX *_agxSortable = NULL; /* used for sorting. */ struct evidence *evidenceCommaIn(char **pS, struct evidence *ret) /* Create a evidence out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new evidence */ { char *s = *pS; int i; if(s == NULL) return NULL; if (ret == NULL) AllocVar(ret); ret->evCount = sqlSignedComma(&s); s = sqlEatChar(s, '{'); if (ret->evCount > 0) AllocArray(ret->mrnaIds, ret->evCount); for (i=0; i<ret->evCount; ++i) { ret->mrnaIds[i] = sqlSignedComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); *pS = s; return ret; } void evidenceFree(struct evidence **pEl) /* Free a single dynamically allocated evidence such as created * with evidenceLoad(). */ { struct evidence *el; if ((el = *pEl) == NULL) return; freeMem(el->mrnaIds); freez(pEl); } void evidenceFreeList(struct evidence **pList) /* Free a list of dynamically allocated evidence's */ { struct evidence *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; evidenceFree(&el); } *pList = NULL; } void evidenceOutput(struct evidence *el, FILE *f, char sep, char lastSep) /* Print out evidence. Separate fields with sep. Follow last field with lastSep. */ { int i; fprintf(f, "%d", el->evCount); fputc(sep,f); if (sep == ',') fputc('{',f); for (i=0; i<el->evCount; ++i) { fprintf(f, "%d", el->mrnaIds[i]); fputc(',', f); } if (sep == ',') fputc('}',f); fputc(lastSep,f); } struct altGraphX *altGraphXLoad(char **row) /* Load a altGraphX from row fetched with select * from altGraphX * from database. Dispose of this with altGraphXFree(). */ { struct altGraphX *ret; int sizeOne,i; char *s; AllocVar(ret); ret->vertexCount = sqlUnsigned(row[6]); ret->edgeCount = sqlUnsigned(row[9]); ret->mrnaRefCount = sqlSigned(row[14]); ret->tName = cloneString(row[0]); ret->tStart = sqlSigned(row[1]); ret->tEnd = sqlSigned(row[2]); ret->name = cloneString(row[3]); ret->id = sqlUnsigned(row[4]); strcpy(ret->strand, row[5]); sqlUbyteDynamicArray(row[7], &ret->vTypes, &sizeOne); assert(sizeOne == ret->vertexCount); sqlSignedDynamicArray(row[8], &ret->vPositions, &sizeOne); assert(sizeOne == ret->vertexCount); sqlSignedDynamicArray(row[10], &ret->edgeStarts, &sizeOne); assert(sizeOne == ret->edgeCount); sqlSignedDynamicArray(row[11], &ret->edgeEnds, &sizeOne); assert(sizeOne == ret->edgeCount); s = row[12]; for (i=0; i<ret->edgeCount; ++i) { s = sqlEatChar(s, '{'); slSafeAddHead(&ret->evidence, evidenceCommaIn(&s, NULL)); s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); } slReverse(&ret->evidence); sqlSignedDynamicArray(row[13], &ret->edgeTypes, &sizeOne); assert(sizeOne == ret->edgeCount); sqlStringDynamicArray(row[15], &ret->mrnaRefs, &sizeOne); assert(sizeOne == ret->mrnaRefCount); sqlSignedDynamicArray(row[16], &ret->mrnaTissues, &sizeOne); assert(sizeOne == ret->mrnaRefCount); sqlSignedDynamicArray(row[17], &ret->mrnaLibs, &sizeOne); assert(sizeOne == ret->mrnaRefCount); return ret; } struct altGraphX *altGraphXLoadAll(char *fileName) /* Load all altGraphX from a tab-separated file. * Dispose of this with altGraphXFreeList(). */ { struct altGraphX *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[18]; while (lineFileRow(lf, row)) { el = altGraphXLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } struct altGraphX *altGraphXLoadByQuery(struct sqlConnection *conn, char *query) /* Load all altGraphX from table that satisfy the query given. * Where query is of the form 'select * from example where something=something' * or 'select example.* from example, anotherTable where example.something = * anotherTable.something'. * Dispose of this with altGraphXFreeList(). */ { struct altGraphX *list = NULL, *el; struct sqlResult *sr; char **row; int colCount = 0; int offSet = 0; sr = sqlGetResult(conn, query); colCount = sqlCountColumns(sr); offSet = colCount - 18; /* This is to account for a possible bin field. */ while ((row = sqlNextRow(sr)) != NULL) { el = altGraphXLoad(row+offSet); slAddHead(&list, el); } slReverse(&list); sqlFreeResult(&sr); return list; } void altGraphXSaveToDb(struct sqlConnection *conn, struct altGraphX *el, char *tableName, int updateSize) /* Save altGraphX as a row to the table specified by tableName. * As blob fields may be arbitrary size updateSize specifies the approx size * of a string that would contain the entire query. Arrays of native types are * converted to comma separated strings and loaded as such, User defined types are * inserted as NULL. Note that strings must be escaped to allow insertion into the database. * For example "autosql's features include" --> "autosql\'s features include" * If worried about this use altGraphXSaveToDbEscaped() */ { struct dyString *update = newDyString(updateSize); char *vTypesArray, *vPositionsArray, *edgeStartsArray, *edgeEndsArray, *edgeTypesArray, *mrnaRefsArray, *mrnaTissuesArray, *mrnaLibsArray; vTypesArray = sqlUbyteArrayToString(el->vTypes, el->vertexCount); vPositionsArray = sqlSignedArrayToString(el->vPositions, el->vertexCount); edgeStartsArray = sqlSignedArrayToString(el->edgeStarts, el->edgeCount); edgeEndsArray = sqlSignedArrayToString(el->edgeEnds, el->edgeCount); edgeTypesArray = sqlSignedArrayToString(el->edgeTypes, el->edgeCount); mrnaRefsArray = sqlStringArrayToString(el->mrnaRefs, el->mrnaRefCount); mrnaTissuesArray = sqlSignedArrayToString(el->mrnaTissues, el->mrnaRefCount); mrnaLibsArray = sqlSignedArrayToString(el->mrnaLibs, el->mrnaRefCount); dyStringPrintf(update, "insert into %s values ( '%s',%d,%d,'%s',%u,'%s',%u,'%s','%s',%u,'%s','%s', NULL ,'%s',%d,'%s','%s','%s')", tableName, el->tName, el->tStart, el->tEnd, el->name, el->id, el->strand, el->vertexCount, vTypesArray , vPositionsArray , el->edgeCount, edgeStartsArray , edgeEndsArray , edgeTypesArray , el->mrnaRefCount, mrnaRefsArray , mrnaTissuesArray , mrnaLibsArray ); sqlUpdate(conn, update->string); freeDyString(&update); freez(&vTypesArray); freez(&vPositionsArray); freez(&edgeStartsArray); freez(&edgeEndsArray); freez(&edgeTypesArray); freez(&mrnaRefsArray); freez(&mrnaTissuesArray); freez(&mrnaLibsArray); } void altGraphXSaveToDbEscaped(struct sqlConnection *conn, struct altGraphX *el, char *tableName, int updateSize) /* Save altGraphX as a row to the table specified by tableName. * As blob fields may be arbitrary size updateSize specifies the approx size. * of a string that would contain the entire query. Automatically * escapes all simple strings (not arrays of string) but may be slower than altGraphXSaveToDb(). * For example automatically copies and converts: * "autosql's features include" --> "autosql\'s features include" * before inserting into database. */ { struct dyString *update = newDyString(updateSize); char *tName, *name, *strand, *vTypesArray, *vPositionsArray, *edgeStartsArray, *edgeEndsArray, *edgeTypesArray, *mrnaRefsArray, *mrnaTissuesArray, *mrnaLibsArray; tName = sqlEscapeString(el->tName); name = sqlEscapeString(el->name); strand = sqlEscapeString(el->strand); vTypesArray = sqlUbyteArrayToString(el->vTypes, el->vertexCount); vPositionsArray = sqlSignedArrayToString(el->vPositions, el->vertexCount); edgeStartsArray = sqlSignedArrayToString(el->edgeStarts, el->edgeCount); edgeEndsArray = sqlSignedArrayToString(el->edgeEnds, el->edgeCount); edgeTypesArray = sqlSignedArrayToString(el->edgeTypes, el->edgeCount); mrnaRefsArray = sqlStringArrayToString(el->mrnaRefs, el->mrnaRefCount); mrnaTissuesArray = sqlSignedArrayToString(el->mrnaTissues, el->mrnaRefCount); mrnaLibsArray = sqlSignedArrayToString(el->mrnaLibs, el->mrnaRefCount); dyStringPrintf(update, "insert into %s values ( '%s',%d,%d,'%s',%u,'%s',%u,'%s','%s',%u,'%s','%s', NULL ,'%s',%d,'%s','%s','%s')", tableName, tName, el->tStart , el->tEnd , name, el->id , strand, el->vertexCount , vTypesArray , vPositionsArray , el->edgeCount , edgeStartsArray , edgeEndsArray , edgeTypesArray , el->mrnaRefCount , mrnaRefsArray , mrnaTissuesArray , mrnaLibsArray ); sqlUpdate(conn, update->string); freeDyString(&update); freez(&tName); freez(&name); freez(&strand); freez(&vTypesArray); freez(&vPositionsArray); freez(&edgeStartsArray); freez(&edgeEndsArray); freez(&edgeTypesArray); freez(&mrnaRefsArray); freez(&mrnaTissuesArray); freez(&mrnaLibsArray); } struct altGraphX *altGraphXCommaIn(char **pS, struct altGraphX *ret) /* Create a altGraphX out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new altGraphX */ { char *s = *pS; int i; if (ret == NULL) AllocVar(ret); ret->tName = sqlStringComma(&s); ret->tStart = sqlSignedComma(&s); ret->tEnd = sqlSignedComma(&s); ret->name = sqlStringComma(&s); ret->id = sqlUnsignedComma(&s); sqlFixedStringComma(&s, ret->strand, sizeof(ret->strand)); ret->vertexCount = sqlUnsignedComma(&s); s = sqlEatChar(s, '{'); AllocArray(ret->vTypes, ret->vertexCount); for (i=0; i<ret->vertexCount; ++i) { ret->vTypes[i] = sqlUnsignedComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); s = sqlEatChar(s, '{'); AllocArray(ret->vPositions, ret->vertexCount); for (i=0; i<ret->vertexCount; ++i) { ret->vPositions[i] = sqlSignedComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); ret->edgeCount = sqlUnsignedComma(&s); s = sqlEatChar(s, '{'); AllocArray(ret->edgeStarts, ret->edgeCount); for (i=0; i<ret->edgeCount; ++i) { ret->edgeStarts[i] = sqlSignedComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); s = sqlEatChar(s, '{'); AllocArray(ret->edgeEnds, ret->edgeCount); for (i=0; i<ret->edgeCount; ++i) { ret->edgeEnds[i] = sqlSignedComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); s = sqlEatChar(s, '{'); for (i=0; i<ret->edgeCount; ++i) { s = sqlEatChar(s, '{'); slSafeAddHead(&ret->evidence, evidenceCommaIn(&s,NULL)); s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); } slReverse(&ret->evidence); s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); s = sqlEatChar(s, '{'); AllocArray(ret->edgeTypes, ret->edgeCount); for (i=0; i<ret->edgeCount; ++i) { ret->edgeTypes[i] = sqlSignedComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); ret->mrnaRefCount = sqlSignedComma(&s); s = sqlEatChar(s, '{'); AllocArray(ret->mrnaRefs, ret->mrnaRefCount); for (i=0; i<ret->mrnaRefCount; ++i) { ret->mrnaRefs[i] = sqlStringComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); s = sqlEatChar(s, '{'); AllocArray(ret->mrnaTissues, ret->mrnaRefCount); for (i=0; i<ret->mrnaRefCount; ++i) { ret->mrnaTissues[i] = sqlSignedComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); s = sqlEatChar(s, '{'); AllocArray(ret->mrnaLibs, ret->mrnaRefCount); for (i=0; i<ret->mrnaRefCount; ++i) { ret->mrnaLibs[i] = sqlSignedComma(&s); } s = sqlEatChar(s, '}'); s = sqlEatChar(s, ','); *pS = s; return ret; } void altGraphXFree(struct altGraphX **pEl) /* Free a single dynamically allocated altGraphX such as created * with altGraphXLoad(). */ { struct altGraphX *el; if ((el = *pEl) == NULL) return; freeMem(el->tName); freeMem(el->name); freeMem(el->vTypes); freeMem(el->vPositions); freeMem(el->edgeStarts); freeMem(el->edgeEnds); evidenceFreeList(&el->evidence); freeMem(el->edgeTypes); /* it appears that the mrnaRefs are really one big string from loadAll function, so they can be free'd all at once */ /* for(i=0;i<el->mrnaRefCount; i++) */ /* freez(&el->mrnaRefs[i]); */ /* First free the first entry if exists, this frees all of the rest of them. */ if(el->mrnaRefs != NULL) freeMem(el->mrnaRefs[0]); /* Now free the array that points to them. */ freeMem(el->mrnaRefs); freeMem(el->mrnaTissues); freeMem(el->mrnaLibs); freez(pEl); } void altGraphXFreeList(struct altGraphX **pList) /* Free a list of dynamically allocated altGraphX's */ { struct altGraphX *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; altGraphXFree(&el); } *pList = NULL; } void altGraphXOutput(struct altGraphX *el, FILE *f, char sep, char lastSep) /* Print out altGraphX. Separate fields with sep. Follow last field with lastSep. */ { int i; if (sep == ',') fputc('"',f); fprintf(f, "%s", el->tName); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%d", el->tStart); fputc(sep,f); fprintf(f, "%d", el->tEnd); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->name); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%u", el->id); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->strand); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%u", el->vertexCount); fputc(sep,f); if (sep == ',') fputc('{',f); for (i=0; i<el->vertexCount; ++i) { fprintf(f, "%u", el->vTypes[i]); fputc(',', f); } if (sep == ',') fputc('}',f); fputc(sep,f); if (sep == ',') fputc('{',f); for (i=0; i<el->vertexCount; ++i) { fprintf(f, "%d", el->vPositions[i]); fputc(',', f); } if (sep == ',') fputc('}',f); fputc(sep,f); fprintf(f, "%u", el->edgeCount); fputc(sep,f); if (sep == ',') fputc('{',f); for (i=0; i<el->edgeCount; ++i) { fprintf(f, "%d", el->edgeStarts[i]); fputc(',', f); } if (sep == ',') fputc('}',f); fputc(sep,f); if (sep == ',') fputc('{',f); for (i=0; i<el->edgeCount; ++i) { fprintf(f, "%d", el->edgeEnds[i]); fputc(',', f); } if (sep == ',') fputc('}',f); fputc(sep,f); /* Loading evidence list. */ { struct evidence *it = el->evidence; if (sep == ',') fputc('{',f); for (i=0; i<el->edgeCount && it != NULL; ++i) { fputc('{',f); evidenceCommaOut(it,f); it = it->next; fputc('}',f); fputc(',',f); } if (sep == ',') fputc('}',f); } fputc(sep,f); if (sep == ',') fputc('{',f); for (i=0; i<el->edgeCount; ++i) { fprintf(f, "%d", el->edgeTypes[i]); fputc(',', f); } if (sep == ',') fputc('}',f); fputc(sep,f); fprintf(f, "%d", el->mrnaRefCount); fputc(sep,f); if (sep == ',') fputc('{',f); for (i=0; i<el->mrnaRefCount; ++i) { if (sep == ',') fputc('"',f); fprintf(f, "%s", el->mrnaRefs[i]); if (sep == ',') fputc('"',f); fputc(',', f); } if (sep == ',') fputc('}',f); fputc(sep,f); if (sep == ',') fputc('{',f); for (i=0; i<el->mrnaRefCount; ++i) { fprintf(f, "%d", el->mrnaTissues[i]); fputc(',', f); } if (sep == ',') fputc('}',f); fputc(sep,f); if (sep == ',') fputc('{',f); for (i=0; i<el->mrnaRefCount; ++i) { fprintf(f, "%d", el->mrnaLibs[i]); fputc(',', f); } if (sep == ',') fputc('}',f); fputc(lastSep,f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */ void altGraphXoffset(struct altGraphX *ag, int offset) /* add offset to all coordinates of altGraphX */ { int i; ag->tStart += offset; ag->tEnd += offset; for(i=0; i<ag->vertexCount; i++) ag->vPositions[i] += offset; } struct altGraphX *altGraphXClone(struct altGraphX *ag) /* Make a clone of a current altGraphX structure. Free with altGraphXFree() */ { struct altGraphX *agNew = NULL; struct dyString *names = NULL; struct evidence *ev = NULL, *evNew = NULL; int i; assert(ag); names = newDyString(256); agNew = CloneVar(ag); agNew->tName = cloneString(ag->tName); agNew->name = cloneString(ag->name); agNew->vTypes = CloneArray(ag->vTypes, ag->vertexCount); agNew->vPositions = CloneArray(ag->vPositions, ag->vertexCount); agNew->edgeStarts = CloneArray(ag->edgeStarts, ag->edgeCount); agNew->edgeEnds = CloneArray(ag->edgeEnds, ag->edgeCount); agNew->edgeTypes = CloneArray(ag->edgeTypes, ag->edgeCount); for(i=0; i<ag->mrnaRefCount; i++) dyStringPrintf(names, "%s,", ag->mrnaRefs[i]); sqlStringDynamicArray(names->string, &agNew->mrnaRefs, &agNew->mrnaRefCount); assert(ag->mrnaRefCount == agNew->mrnaRefCount); dyStringFree(&names); agNew->mrnaTissues = CloneArray(ag->mrnaTissues, ag->mrnaRefCount); agNew->mrnaLibs = CloneArray(ag->mrnaLibs, ag->mrnaRefCount); agNew->evidence = NULL; for(ev = ag->evidence; ev != NULL; ev = ev->next) { evNew = CloneVar(ev); if (ev->evCount > 0) evNew->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount); slAddHead(&agNew->evidence, evNew); } slReverse(&agNew->evidence); return agNew; } static boolean isEndXVertice(bool **em, int vertexCount, int vertice, unsigned char *vTypes) /* check to see if there are any edges coming out of this vertice */ { int i; for(i=0; i<vertexCount; i++) { if(em[vertice][i] && (vTypes[i] != ggUnused)) return FALSE; } return TRUE; } static void countXPathsFromVertice(bool **em, bool *seen, unsigned char *vTypes, int vertexCount, int vertice, int *pathCount) /* recursively count paths from vertice */ { int i=0; /* color me counted */ seen[vertice] = TRUE; /* check for termination */ if(isEndXVertice(em, vertexCount, vertice, vTypes)) { (*pathCount)++; } else { /* recursively call for all vertices that are connected to from this vertex */ for(i = 0; i < vertexCount; i++) { if(em[vertice][i] && vTypes[i] != ggUnused) { countXPathsFromVertice(em, seen, vTypes, vertexCount, i, pathCount); } } } } int altGraphXNumAltSplices(struct altGraphX *ag) /* Count number of times that exons have more than one edge through them */ { int pathCount=0; int i=0; int vertexCount = ag->vertexCount; bool **em; /* edge matrix */ bool *seen; /* has this edge been seen? */ /* construct the edge matrix */ AllocArray(em, vertexCount); AllocArray(seen, vertexCount); for (i=0; i<vertexCount; ++i) { AllocArray(em[i], vertexCount); } for (i=0; i<ag->edgeCount; ++i) em[ag->edgeStarts[i]][ag->edgeEnds[i]] = TRUE; /* recursively count all possible paths for each vertice if we've seen it */ for(i=0; i<vertexCount; i++) if(!seen[i]) { countXPathsFromVertice(em, seen, ag->vTypes, vertexCount, i, &pathCount); } /* clean up */ for(i = 0; i < vertexCount; i++) freez(&em[i]); freez(&em); return pathCount; } enum agColor /* color of a vertex */ { agWhite, /* unseen */ agGray, /* seen */ agBlack, /* finished */ }; bool **altGraphXCreateEdgeMatrix(struct altGraphX *ag) /* create an edgematix from an altGraphX, free this with * agFreeEdgeMatrix. */ { int numVert = ag->vertexCount; bool **em; int i=0; int numEdges = ag->edgeCount; int *eStarts, *eEnds; eStarts = ag->edgeStarts; eEnds = ag->edgeEnds; assert(numVert > 0); /* allocate memory */ AllocArray(em, numVert); for(i=0; i< numVert; i++) AllocArray(em[i], numVert); /* fill in edges */ for(i=0; i < numEdges; i++) { em[eStarts[i]][eEnds[i]] = ag->edgeTypes[i] + 1; } return em; } static UBYTE **altGraphXCreateEdgeTypePlusOneMatrix(struct altGraphX *ag) /* create an edgematix from an altGraphX, free this with * agFreeEdgeMatrix. Contents of edge matrix is edge-type + 1 * (so as to make ggExon non-zero) */ { int numVert = ag->vertexCount; UBYTE **em; int i=0; int numEdges = ag->edgeCount; int *eStarts, *eEnds; eStarts = ag->edgeStarts; eEnds = ag->edgeEnds; assert(numVert > 0); /* allocate memory */ AllocArray(em, numVert); for(i=0; i< numVert; i++) AllocArray(em[i], numVert); /* fill in edges */ for(i=0; i < numEdges; i++) { em[eStarts[i]][eEnds[i]] = ag->edgeTypes[i] + 1; } return em; } void altGraphXFreeEdgeMatrix(bool ***pEm, int vertCount) /* Free an edge matrix. */ { int i; bool **em = *pEm; if(em == NULL) return; for(i=0; i<vertCount; i++) freez(&em[i]); freez(pEm); } static void agDfsTopo(bool **em, int vertex, int *colors, int *topo, int numVert, int *topoNum) /* recursive depth first search routine for topological sort */ { int i; colors[vertex] = agGray; for(i =0; i<numVert; i++) if(em[vertex][i] && colors[i] == agWhite) agDfsTopo(em, i, colors, topo, numVert, topoNum); (*topoNum)--; assert(*topoNum >= 0); topo[vertex] = *topoNum; colors[vertex] = agBlack; } void agTranslateEdgeArray(int *from, int *to, int *translator, int count) /* change order of from as described in translator and * return in to. */ { int i; for(i=0;i<count; i++) { to[i] = translator[from[i]]; } } void agTranslateIntArray(int *from, int *to, int *translator, int count) /* change order of from as described in translator and * return in to. */ { int i; for(i=0;i<count; i++) { to[translator[i]] = from[i]; } } void agTranslateUCharArray(unsigned char *from, unsigned char *to, int *translator, int count) /* change order of from as described in translator and * return in to. */ { int i; for(i=0;i<count; i++) { to[translator[i]] = from[i]; } } void altGraphXArrangeVertsByOrder(struct altGraphX *ag, int *translator) /* use the translator as a way to "sort" vertices, and * edges */ { struct evidence ***agEv = NULL; int *sEdgeStarts = NULL; int *sEdgeEnds = NULL; unsigned char *sVTypes = NULL; int *sVPositions = NULL; int vertCount = ag->vertexCount; int edgeCount = ag->edgeCount; UBYTE **em = NULL; int i,j; /* convert vertices to translator order */ AllocArray(sVTypes, vertCount); agTranslateUCharArray(ag->vTypes, sVTypes, translator, vertCount); AllocArray(sVPositions, vertCount); agTranslateIntArray(ag->vPositions, sVPositions, translator, vertCount); AllocArray(sEdgeStarts, edgeCount); agTranslateEdgeArray(ag->edgeStarts, sEdgeStarts, translator, edgeCount); AllocArray(sEdgeEnds, edgeCount); agTranslateEdgeArray(ag->edgeEnds, sEdgeEnds, translator, edgeCount); /* repackage our sorted stuff into the altGraphX */ freez(&ag->vTypes); ag->vTypes = sVTypes; freez(&ag->vPositions); ag->vPositions = sVPositions; freez(&ag->edgeStarts); ag->edgeStarts = sEdgeStarts; freez(&ag->edgeEnds); ag->edgeEnds = sEdgeEnds; em = altGraphXCreateEdgeTypePlusOneMatrix(ag); /* sort our edges by the same translator order, * use edge matrix to do so. */ AllocArray(agEv, vertCount); for(i=0; i<vertCount; i++) AllocArray(agEv[i], vertCount); for(i=0; i<edgeCount; i++) { agEv[sEdgeStarts[i]][sEdgeEnds[i]] = slElementFromIx(ag->evidence, i); } edgeCount =0; ag->evidence = NULL; for(i=0; i<vertCount; i++) { for(j=0; j<vertCount; j++) { if(em[i][j]) { ag->edgeStarts[edgeCount] = i; ag->edgeEnds[edgeCount] = j; ag->edgeTypes[edgeCount] = em[i][j] - 1; /* obscure code. */ slAddHead(&ag->evidence, agEv[i][j]); edgeCount++; } } } slReverse(&ag->evidence); bool **emb = (bool**)em; altGraphXFreeEdgeMatrix(&emb, vertCount); for(i=0; i<vertCount; i++) freez(&agEv[i]); freez(&agEv); } struct agVertSortable /* package for sorting vertices */ { int index; int vPosition; }; int agVertSortableCmp(const void *e1, const void *e2) /* used with qsort to sort array */ { const struct agVertSortable *a = *((struct agVertSortable **)e1); const struct agVertSortable *b = *((struct agVertSortable **)e2); return (a->vPosition - b->vPosition); } void agxPrintEdges(struct altGraphX *ag) { int i = 0; for(i = 0; i < ag->edgeCount; i++) warn("%d - %d", ag->edgeStarts[i], ag->edgeEnds[i]); } void altGraphXVertPosSort(struct altGraphX *ag) /* sort an altGraphX's vertices by position on tName */ { struct agVertSortable **pAvg = NULL; struct agVertSortable *avg = NULL; int i; int *translator = NULL; int vertCount = ag->vertexCount; assert(vertCount > 0); /* get order sorted by position */ AllocArray(pAvg, vertCount); for(i=0;i<vertCount; i++) { AllocVar(avg); avg->index = i; avg->vPosition = ag->vPositions[i]; pAvg[i] = avg; } qsort(pAvg, vertCount, sizeof(pAvg[0]), agVertSortableCmp); AllocArray(translator, vertCount); for(i=0;i<vertCount; i++) translator[pAvg[i]->index] = i; /* arrange vertices and edges in that order */ altGraphXArrangeVertsByOrder(ag, translator); /* cleanup */ freez(&translator); for(i=0;i<vertCount; i++) freez(&pAvg[i]); freez(&pAvg); } void altGraphXTopologicalSort(struct altGraphX *ag) /* Do a topological sort on vertices in altGraphX, basic * algorithm from "Computer Algorithms" Sara Baase and Allen Van Gelder * 3rd Edition 2000, pp 345-353 */ { int *colors = NULL; int *topo = NULL; int topoNum = 0; int vertCount = ag->vertexCount; bool **em = NULL; int i; /* allocate memeory and initialize */ assert(vertCount > 0); AllocArray(colors, vertCount); AllocArray(topo, vertCount); em = altGraphXCreateEdgeMatrix(ag); for(i=0; i<vertCount; i++) colors[i] = agWhite; topoNum = vertCount; /* get topological order */ for(i=0; i<vertCount; i++) { if(colors[i] == agWhite) agDfsTopo(em, i, colors, topo, vertCount, &topoNum); } altGraphXArrangeVertsByOrder(ag, topo); altGraphXFreeEdgeMatrix(&em, vertCount); freez(&colors); freez(&topo); } boolean altGraphXEdgeObserved(struct altGraphX *ag, int *seen, int *seenCount, int mrnaIx) /* is the mrnaIx already in seen? */ { int i=0; boolean result = FALSE; for(i=0; i<*seenCount; i++) { if(ag->mrnaTissues[seen[i]] == ag->mrnaTissues[mrnaIx] || ag->mrnaLibs[seen[i]] == ag->mrnaLibs[mrnaIx]) { result = TRUE; break; } } if(!result) { seen[*seenCount++] = mrnaIx; } return result; } int altGraphTimesSeenEdge(struct altGraphX *ag, int eIx) /* Count how many times we see evidence for a particular edge. */ { struct evidence *ev = slElementFromIx(ag->evidence, eIx); int *seen = NULL; int seenCount = 0,i; int conf = 0; AllocArray(seen, ag->edgeCount); for(i=0; i<ag->edgeCount; i++) seen[i] = -1; for(i=0; i<ev->evCount; i++) if(!altGraphXEdgeObserved(ag, seen, &seenCount, ev->mrnaIds[i])) conf++; freez(&seen); return conf; } static void assignToArray(int *array, int arraySize, int count, int val) { if(count >= arraySize) errAbort("Can't have count: %d greater than array size: %d", count, arraySize); array[count] = val; } boolean uniqeInArray(int *array, int size, int val) { int i; for(i=0;i<size; i++) if(array[i] == val) return FALSE; return TRUE; } int edgePopularityCmp(const void *va, const void *vb) /* Compare to sort on commonness edge. */ { int cmp = 0; const int *a = ((int *)va); const int *b = ((int *)vb); struct evidence *evB = slElementFromIx(_agxSortable->evidence, *b); struct evidence *evA = slElementFromIx(_agxSortable->evidence, *a); cmp = evB->evCount - evA->evCount; return cmp; } int findMostCommonEdge(struct altGraphX *ag, int *edgeArray, int edgeCount) /* Find the most common edge in the edge array. */ { assert(edgeArray); assert(ag); _agxSortable = ag; qsort(edgeArray, edgeCount, sizeof(int *), edgePopularityCmp); _agxSortable = NULL; return (edgeArray[0]); } int findUpStartFrom(struct altGraphX *ag, int fpSplice) /* Given a five prime splice site, find the three prime splice that makes an exon. Returns -1 if can't find any. */ { int *exonStartEdges = NULL; int exonStartCount = 0; int eCount = ag->edgeCount; int tpSite = 0; int i; AllocArray(exonStartEdges, eCount); for(i = 0; i < eCount; i++) { if(ag->edgeEnds[i] == ag->edgeStarts[fpSplice]) assignToArray(exonStartEdges, eCount, exonStartCount++, i); } if(exonStartCount == 0) tpSite =-1; else tpSite = findMostCommonEdge(ag, exonStartEdges, exonStartCount); freez(&exonStartEdges); return tpSite; } int findDownEndFrom(struct altGraphX *ag, int tpSplice) /* Given a three prime splice site, find the downstream five prime splice that makes an exon. Returns -1 if can't find any. */ { int *exonEndEdges = NULL; int exonEndCount = 0; int eCount = ag->edgeCount; int tpSite = 0; int i; AllocArray(exonEndEdges, eCount); for(i = 0; i < eCount; i++) { if(ag->edgeStarts[i] == ag->edgeEnds[tpSplice]) assignToArray(exonEndEdges, eCount, exonEndCount++, i); } if(exonEndCount == 0) tpSite =-1; else tpSite = findMostCommonEdge(ag, exonEndEdges, exonEndCount); freez(&exonEndEdges); return tpSite; } int getEvidenceCount(struct altGraphX *ag, int edge) { struct evidence *ev = slElementFromIx(ag->evidence, edge); return ev->evCount; } struct bed *createBedFromEdges( struct altGraphX *ag, int fpEdge, int cassEdge, int tpEdge) /* Put the edges together to from a bed structure. */ { int *vPos = ag->vPositions; int *starts = ag->edgeStarts; int *ends = ag->edgeEnds; struct bed *bed; int numBlocks = 3; AllocVar(bed); bed->chrom = cloneString(ag->tName); snprintf(bed->strand, sizeof(bed->strand), "%s", ag->strand); bed->chromStart = bed->thickStart = vPos[starts[fpEdge]]; bed->chromEnd = bed->thickEnd = vPos[ends[tpEdge]]; bed->name = cloneString(ag->name); bed->blockCount = numBlocks; AllocArray(bed->blockSizes, numBlocks); AllocArray(bed->chromStarts, numBlocks); bed->blockSizes[0] = vPos[ends[fpEdge]] - vPos[starts[fpEdge]]; bed->blockSizes[1] = vPos[ends[cassEdge]] - vPos[starts[cassEdge]]; bed->blockSizes[2] = vPos[ends[tpEdge]] - vPos[starts[tpEdge]]; bed->chromStarts[0] = vPos[starts[fpEdge]] -bed->chromStart; bed->chromStarts[1] = vPos[starts[cassEdge]] - bed->chromStart; bed->chromStarts[2] = vPos[starts[tpEdge]] -bed->chromStart; bed->score = getEvidenceCount(ag,fpEdge)+ getEvidenceCount(ag, tpEdge); bed->expCount = numBlocks; AllocArray(bed->expIds, numBlocks); AllocArray(bed->expScores, numBlocks); bed->expIds[0] = fpEdge; bed->expIds[1] = cassEdge; bed->expIds[2] = tpEdge; return bed; } enum ggEdgeType altGraphXEdgeVertexType(struct altGraphX *ag, int v1, int v2) /* Return edge type. */ { if( (ag->vTypes[v1] == ggHardStart || ag->vTypes[v1] == ggSoftStart) && (ag->vTypes[v2] == ggHardEnd || ag->vTypes[v2] == ggSoftEnd)) return ggExon; else if( (ag->vTypes[v1] == ggHardEnd || ag->vTypes[v1] == ggSoftEnd) && (ag->vTypes[v2] == ggHardStart || ag->vTypes[v2] == ggSoftStart)) return ggSJ; else return ggIntron; } enum ggEdgeType altGraphXEdgeType(struct altGraphX *ag, int edge) /* Return edge type. */ { return altGraphXEdgeVertexType(ag, ag->edgeStarts[edge], ag->edgeEnds[edge]); } struct bed *altGraphXToBed(struct altGraphX *ag) /* Merge all overlapping exons to form bed datatype. Free with bedFree().*/ { int *bases = NULL; int baseCount = ag->tEnd - ag->tStart +1; struct bed *bed = NULL; int i=0, k=0; int offSet = ag->tStart; int *vPos = ag->vPositions; int start = 0; int width = 0; int currentStart = 0; boolean extending = FALSE; enum ggEdgeType eType; assert(ag && ag->edgeCount > 0 && baseCount > 0); AllocArray(bases, baseCount); /* first paint the bases. */ for(i=0; i<ag->edgeCount; i++) { eType = altGraphXEdgeType(ag, i); if(eType == ggExon) { width = vPos[ag->edgeEnds[i]] - vPos[ag->edgeStarts[i]]; start = vPos[ag->edgeStarts[i]]-offSet; for(k=start; k<width+start;k++) { bases[k] = 1; } } } AllocVar(bed); AllocArray(bed->blockSizes, ag->edgeCount); AllocArray(bed->chromStarts, ag->edgeCount); bed->chrom = cloneString(ag->tName); bed->name = cloneString(ag->name); safef(bed->strand, sizeof(bed->strand), "%s", ag->strand); bed->thickStart = bed->chromStart = ag->tStart; bed->thickEnd = bed->chromEnd = ag->tEnd; for(i=0; i<baseCount; i++) { if(bases[i] == 1 && !extending) { currentStart = i; extending = TRUE; } else if(bases[i] == 0 && extending) { bed->blockSizes[bed->blockCount] = i - currentStart; bed->chromStarts[bed->blockCount] = currentStart; /* bed->thickEnd = bed->chromEnd = max(bed->thickEnd, i+offSet); */ /* bed->thickStart = bed->chromStart = min(bed->thickStart, (currentStart + offSet)); */ bed->blockCount++; currentStart = BIGNUM; extending = FALSE; } } /* Finish her off..*/ if(extending) { bed->blockSizes[bed->blockCount] = i - currentStart; bed->chromStarts[bed->blockCount] = currentStart; /* bed->thickEnd = bed->chromEnd = max(bed->thickEnd, i+offSet); */ /* bed->thickStart = bed->chromStart = min(bed->thickStart, currentStart+offSet); */ bed->blockCount++; currentStart = BIGNUM; extending = FALSE; } bed->chromStart = BIGNUM; bed->chromEnd = 0; for(i=0; i<bed->blockCount; i++) { bed->chromStart = bed->thickStart = min(bed->chromStarts[i], bed->chromStart); bed->chromEnd = bed->thickEnd = max(bed->chromStarts[i]+bed->blockSizes[i], bed->chromEnd); } for(i=0; i<bed->blockCount; i++) { bed->chromStarts[i] -= bed->chromStart; } bed->chromEnd -= bed->chromStart; bed->chromStart = bed->thickStart = bed->chromStart + ag->tStart; bed->chromEnd = bed->thickEnd = bed->chromEnd + bed->chromStart; freez(&bases); return bed; } struct bed *altGraphGetExonCassette(struct altGraphX *ag, int eIx) /* Get a bed which corresponds to the exons involved in a cassette exon. */ { int eStart =0, eEnd = 0; int *intStartEdges = NULL, *intEndEdges =NULL, *altEdges = NULL; int intStartCount = 0, intEndCount =0, altCount =0; int i,j,k; int eCount = ag->edgeCount; struct bed *bedList = NULL; AllocArray(intStartEdges, ag->edgeCount); AllocArray(intEndEdges, ag->edgeCount); AllocArray(altEdges, ag->edgeCount); eStart = ag->edgeStarts[eIx]; eEnd = ag->edgeEnds[eIx]; /* First find the introns that connect to our edge of interest. */ for(i = 0; i < ag->edgeCount; i++) { if(ag->edgeEnds[i] == eStart) assignToArray(intStartEdges, eCount, intStartCount++, i); if(ag->edgeStarts[i] == eEnd) assignToArray(intEndEdges, eCount, intEndCount++, i); } /* for each intron that connects to our exon. */ for(i = 0; i < intStartCount; i++) { for(j =0; j < ag->edgeCount; j++) { /* Look for and edge that starts at the same place as our introns. */ if(intStartEdges[i] != j && (ag->edgeStarts[j] == ag->edgeStarts[intStartEdges[i]])) { for(k=0; k < intEndCount; k++) { /* Then connects to one of the same ends. */ if(ag->edgeEnds[j] == ag->edgeEnds[intEndEdges[k]]) { /* If it is new... */ if(uniqeInArray(altEdges, altCount, j)) { struct bed *bed = NULL; int upStart = findUpStartFrom(ag, intStartEdges[i]); int downEnd = findDownEndFrom(ag, intEndEdges[k]); bed = createBedFromEdges(ag, upStart, eIx, downEnd); assignToArray(altEdges, eCount, altCount++, j); slAddHead(&bedList, bed); } } } } } } freez(&altEdges); freez(&intStartEdges); freez(&intEndEdges); return bedList; } int altGraphTimesNotSeenEdge(struct altGraphX *ag, int eIx) /* Discover how many times there is a transcript that uses an alternative to the edge eIx. */ { int eStart =0, eEnd = 0; int *intStartEdges = NULL, *intEndEdges =NULL, *altEdges = NULL; int intStartCount = 0, intEndCount =0, altCount =0; int i,j,k; int conf = 0; int eCount = ag->edgeCount; AllocArray(intStartEdges, ag->edgeCount); AllocArray(intEndEdges, ag->edgeCount); AllocArray(altEdges, ag->edgeCount); eStart = ag->edgeStarts[eIx]; eEnd = ag->edgeEnds[eIx]; /* First find the introns that connect to our edge of interest. */ for(i = 0; i < ag->edgeCount; i++) { if(ag->edgeEnds[i] == eStart) assignToArray(intStartEdges, eCount, intStartCount++, i); if(ag->edgeStarts[i] == eEnd) assignToArray(intEndEdges, eCount, intEndCount++, i); } /* for each intron that connects to our exon. */ for(i = 0; i < intStartCount; i++) { for(j =0; j < ag->edgeCount; j++) { /* Look for and edge that starts at the same place as our introns. */ if(intStartEdges[i] != j && (ag->edgeStarts[j] == ag->edgeStarts[intStartEdges[i]])) { for(k=0; k < intEndCount; k++) { /* Then connects to one of the same ends. */ if(ag->edgeEnds[j] == ag->edgeEnds[intEndEdges[k]]) if(uniqeInArray(altEdges, altCount, j)) assignToArray(altEdges, eCount, altCount++, j); } } } } for(i=0; i< altCount; i++) { conf += altGraphTimesSeenEdge(ag, altEdges[i]); } freez(&altEdges); freez(&intStartEdges); freez(&intEndEdges); return conf; } float altGraphCassetteConfForEdge(struct altGraphX *ag, int eIx, float prior) /* Return the score for this cassette exon. Want to have cassette exons that are present in multiple transcripts and that are not present in multiple exons. We want to see both forms of the cassette exon, we don't want to have one outlier be chosen. Thus we count the times that the exon is seen, we count the times that the exon isn't seen and we calculate a final score by: (seen + notseen + prior)/(abs(seen - notSeen+prior) + 1) . Thus larger scores are better. */ { int seen = altGraphTimesSeenEdge(ag, eIx); int notSeen = altGraphTimesNotSeenEdge(ag, eIx); float conf = 0; conf = (float)(seen + notSeen + prior)/(float)(abs(seen - notSeen) + prior); return conf; } enum ggEdgeType getSpliceEdgeType(struct altGraphX *ag, int edge) /* Return edge type. */ { if( (ag->vTypes[ag->edgeStarts[edge]] == ggHardStart || ag->vTypes[ag->edgeStarts[edge]] == ggSoftStart) && (ag->vTypes[ag->edgeEnds[edge]] == ggHardEnd || ag->vTypes[ag->edgeEnds[edge]] == ggSoftEnd)) return ggExon; else if( (ag->vTypes[ag->edgeStarts[edge]] == ggHardEnd || ag->vTypes[ag->edgeStarts[edge]] == ggSoftEnd) && (ag->vTypes[ag->edgeEnds[edge]] == ggHardStart || ag->vTypes[ag->edgeEnds[edge]] == ggSoftStart)) return ggSJ; else return ggIntron; } boolean altGraphXEdgeSeen(struct altGraphX *ag, int *seen, int *seenCount, int mrnaIx) /* is the mrnaIx already in seen? */ { int i=0; boolean result = FALSE; for(i=0; i<(*seenCount); i++) { if(ag->mrnaTissues[seen[i]] == ag->mrnaTissues[mrnaIx] || ag->mrnaLibs[seen[i]] == ag->mrnaLibs[mrnaIx]) { result = TRUE; break; } } if(!result) { seen[(*seenCount)] = mrnaIx; (*seenCount)++; } return result; } int altGraphConfidenceForEdge(struct altGraphX *ag, int eIx) /* count how many unique libraries or tissues contain a given edge */ { struct evidence *ev = slElementFromIx(ag->evidence, eIx); int *seen = NULL; int seenCount = 0,i; int conf = 0; if (ev->evCount > 0) AllocArray(seen, ev->evCount); for(i=0; i<ev->evCount; i++) seen[i] = -1; for(i=0; i<ev->evCount; i++) if(!altGraphXEdgeSeen(ag, seen, &seenCount, ev->mrnaIds[i])) conf++; freez(&seen); return conf; } struct spliceEdge *altGraphXToEdges(struct altGraphX *ag) /* Return a list of splice edges based on data in altGraphX. */ { int i; struct spliceEdge *e = NULL, *eList = NULL; int *vPos = ag->vPositions; int *starts = ag->edgeStarts; int *ends = ag->edgeEnds; for(i=0; i<ag->edgeCount; i++) { AllocVar(e); e->type = getSpliceEdgeType(ag,i); e->start = vPos[starts[i]]; e->end = vPos[ends[i]]; e->v1 = starts[i]; e->v2 = ends[i]; // e->conf = ag->evidence[i].evCount; e->conf = altGraphConfidenceForEdge(ag, i); slAddHead(&eList, e); } return eList; } int spliceEdgeTypeConfCmp(const void *va, const void *vb) /* Compare to sort based on type, confidence, and then start. */ { const struct spliceEdge *a = *((struct spliceEdge **)va); const struct spliceEdge *b = *((struct spliceEdge **)vb); int diff = a->type - b->type; if(diff == 0) diff = a->start - b->start; if(diff == 0) diff = b->end - a->end; return diff; } void addSpliceNode(struct hash *spliceHash, int count, int vertex, int position, int row, int maxRows) /* Add a 1 in the correct row to indicate that there is a feature there. */ { char key[128]; int *levels = NULL; safef(key, sizeof(key), "%d-%d", count, vertex); if(row >= maxRows) errAbort("addSpliceNode() - Row %d is greater than maxRows %d\n", row, maxRows); levels = hashFindVal(spliceHash, key); if(levels == NULL) { AllocArray(levels, maxRows); levels[row] = 1; hashAdd(spliceHash, key, levels); } else levels[row] = 1; } static void drawExonAt(struct spliceEdge *se, int heightPer, int regionStart, int regionEnd, struct hvGfx *hvg, int xOff, int y, double scale, MgFont *font, Color color, Color *shades) /* Draw an exon at. */ { int x1 = round((double)((int)se->start-regionStart)*scale) + xOff; int x2 = round((double)((int)se->end-regionStart)*scale) + xOff; int w=0, textWidth=0; boolean drawLabel = FALSE; Color exonColor; char buff[256]; int conf = 0; if(se->conf > 9) conf = 9; else if(se->conf < 2) conf = 2; else conf = (int)se->conf; w = x2-x1; if (w < 1) w = 1; //exonColor = shades[conf]; exonColor = MG_BLACK; hvGfxBox(hvg, x1, y, w, heightPer/2, exonColor); if(drawLabel) { safef(buff, sizeof(buff), "%d-%d-%d", se->v1, se->v2, (int)se->conf); textWidth = mgFontStringWidth(font, buff); if(textWidth <= w) hvGfxTextCentered(hvg, x1, y, w, heightPer/2, MG_WHITE, font, buff); } } struct spliceEdge *createFakeExon(int vertex, int position, int confidence) /* Return a spliceEdge of type exon that is a single base pair for this splice site. */ { struct spliceEdge *se = NULL; AllocVar(se); se->v1 = vertex; se->v2 = vertex; se->start = position; se->end = position+1; se->conf = confidence; return se; } static boolean bitsClear(struct spaceSaver *ss, struct spliceEdge *eg, int currentPos, double scale) /* Return FALSE if any of the rows are full at this position, TRUE otherwise. */ { struct spaceRowTracker *sr = NULL; int count=0,i=0; int minSpace = max(round(((eg->end-eg->start)*scale)/5),5); if(minSpace > 15) minSpace = 15; for(sr = ss->rowList; sr != NULL; sr = sr->next) { if(eg->rowStarts[count] == 1 || eg->rowEnds[count] == 1) { int start = (currentPos-minSpace)*ss->scale; int end = (currentPos+minSpace)*ss->scale; for(i = start; i < end && i < ss->cellsInRow; i++) { if(sr->used[i] == 1) return FALSE; } } count++; } return TRUE; } static void setBits(struct spaceSaver *ss, struct spliceEdge *eg, int pos, double scale) /* Set the bits in spaceSaver ss to mark this spliceEdge being used. */ { struct spaceRowTracker *sr = NULL; int count=0,i=0; int minSpace = min(round(((eg->end-eg->start)*scale)/7.5),6); for(sr = ss->rowList; sr != NULL; sr = sr->next) { if(eg->rowStarts[count] == 1 || eg->rowEnds[count] == 1) { for(i=(pos-minSpace)*ss->scale; i<(pos+minSpace)*ss->scale; i++) sr->used[i] = 1; } count++; } } static int findBestMidPoint(int s, int e, struct spaceSaver *ss, struct spliceEdge *eg, double scale) /* Try to find a good place to put the splice eg midpoint. */ { int midWay = (s+e)/2; int currentPos = midWay; boolean found = FALSE; struct spaceNode *sn = NULL; int step = max(1,(e - s)/10); /* Look for empty pixels. */ if(bitsClear(ss, eg, currentPos,scale)) found = TRUE; while(!found && currentPos < e-step && currentPos > s+step) { int diff = currentPos - midWay; if(diff >= 0) diff += step; else diff -= step; diff = -1 * diff; currentPos = midWay + diff; if(bitsClear(ss, eg, currentPos, scale)) { found = TRUE; break; } } if(!found) currentPos = midWay; setBits(ss, eg, currentPos,scale); /* Add the splice node. */ AllocVar(sn); eg->mid = currentPos; sn->val = eg; slAddHead(&ss->nodeList, sn); return currentPos; } void altGraphXFillInJunctions(struct spliceEdge *egList, struct altGraphX *ag, int offSet, int end, int regionStart, double scale, struct spaceSaver *ss, int agIx) /* Figure out a good place to put the splice junctions. */ { struct spliceEdge *eg = NULL, *egNext = NULL; struct hash *spliceHash = newHash(8); char key[128]; int *sjStarts = NULL, *sjEnds = NULL; int x1=0,x2=0; struct spaceNode *sn = NULL; /* Record the row that this exon was drawn at. */ for (sn = ss->nodeList; sn != NULL; sn = sn->next) { struct spliceEdge *se = sn->val; int s = se->start; int e = se->end; addSpliceNode(spliceHash, agIx, se->v1, s, sn->row, ss->rowCount); addSpliceNode(spliceHash, agIx, se->v2, e, sn->row, ss->rowCount); } /* Now roll through and place the splice junctions. */ for(eg = egList; eg != NULL; eg = egNext) { x1 = round((eg->start - regionStart)*scale); x2 = round((eg->end - regionStart)*scale); egNext = eg->next; /* We are only laying out splice junctions here. */ if(eg->type != ggSJ) continue; safef(key, sizeof(key), "%d-%d", agIx, eg->v1); sjStarts = hashFindVal(spliceHash, key); safef(key, sizeof(key), "%d-%d", agIx, eg->v2); sjEnds = hashFindVal(spliceHash, key); if(sjStarts != NULL) eg->rowStarts = CloneArray(sjStarts, ss->rowCount); if(sjEnds != NULL) eg->rowEnds = CloneArray(sjEnds, ss->rowCount); eg->rowCount = ss->rowCount; eg->mid = findBestMidPoint(x1, x2, ss, eg, scale) - (scale*offSet); } freeHashAndVals(&spliceHash); } void altGraphXLayoutEdges(struct spliceEdge *egList, struct altGraphX *ag, int seqStart, int seqEnd, int seqOffset, double scale, int maxRows, int agIx, struct spaceSaver **ssList, struct hash **heightHash, int *rowCount) /* Layout a list of edges to minimize overlap. */ { struct spliceEdge *eg = NULL, *egNext = NULL; struct spaceSaver *ss = NULL; int width = round((seqEnd - seqStart)*scale); char keyBuff[1024]; int start, end; bool *vSeen = NULL; /* Have we seen a given vertex yet? */ int maxCells = 0; AllocArray(vSeen, ag->vertexCount); if(width == 0) width=1; maxCells = width/4; if(maxCells == 0) maxCells = 1; ss = spaceSaverMaxCellsNew(0, width, maxRows, maxCells); /* Layout the exons using the spaceSaver to make sure that different exons don't overlap. */ for(eg = egList; eg != NULL; eg = egNext) { egNext = eg->next; eg->itemNumber = agIx; if(eg->type != ggExon) { if(vSeen[eg->v1] && vSeen[eg->v2]) continue; /* If we don't have an exon for a splice site we have to do a little futzing. to create a "fake" exon for the splice site so it will be drawn. */ else { slAddHead(&egNext, eg); if(!vSeen[eg->v2]) { struct spliceEdge *tmp = createFakeExon(eg->v2, eg->end, eg->conf); slAddHead(&egNext, tmp); } if(!vSeen[eg->v1]) { struct spliceEdge *tmp = createFakeExon(eg->v1, eg->start-1, eg->conf); slAddHead(&egNext, tmp); } } continue; } vSeen[eg->v1] = 1; vSeen[eg->v2] = 1; start = round((double)(eg->start - seqStart)*scale); end = round((double)(eg->end - seqStart)*scale); if(end == start) end++; if (start <= width && end >= 0) { if (start < 0) start = 0; if (end > width) end = width; if (spaceSaverAdd(ss, start, end, eg) == NULL) break; } } spaceSaverFinish(ss); slAddHead(ssList, ss); (*rowCount) += ss->rowCount; /* Record the number of rows used for this item. */ safef(keyBuff, sizeof(keyBuff), "%d", agIx); hashAddInt((*heightHash), keyBuff, ss->rowCount); altGraphXFillInJunctions(egList, ag, seqOffset, width, seqStart, scale, ss, agIx); freez(&vSeen); //*rowCount = ss->rowCount; } void altGraphXLayout(struct altGraphX *agList, int seqStart, int seqEnd, double scale, int maxRows, struct spaceSaver **ssList, struct hash **heightHash, int *rowCount) /** Layout a list of altGraphX's in a space (seqEnd-seqStart) * scale wide. Return a list of one spaceSaver per altGraphX record, a hash with the row layout offset of the exons, and the number of rows required to layout. */ { struct altGraphX *ag = NULL; struct spliceEdge *egList = NULL; int agCount = 0; *heightHash = newHash(8); /* For every item */ for(ag = agList; ag != NULL; ag = ag->next) { egList = altGraphXToEdges(ag); slSort(&egList, spliceEdgeTypeConfCmp); altGraphXLayoutEdges(egList, ag, ag->tStart, ag->tEnd, seqStart - ag->tStart, scale, maxRows, agCount++, ssList, heightHash, rowCount); } slReverse(ssList); } void altGraphXDrawPack(struct altGraphX *agList, struct spaceSaver *ssList, struct hvGfx *hvg, int xOff, int yOff, int width, int heightPer, int lineHeight, int seqStart, int seqEnd, double scale, MgFont *font, Color color, Color *shades, char *drawName, void (*mapItem)(char *tableName, struct altGraphX *ag, struct hvGfx *hvg, int start, int end, int x, int y, int width, int height)) /** Draw a splicing graph for each altGraphX in the agList where the exons don't overlap as they have been laid out in the spaceSaver list. */ { struct altGraphX *ag = NULL; struct spaceSaver *ss = NULL; int count =0; int i=0,j=0, minRow=0; int y=0; for(ss = ssList, ag=agList; ss != NULL && ag != NULL; ss=ss->next, ag=ag->next) { struct spaceNode *sn = NULL; /* Draw a clickable thing if we can. */ if(mapItem != NULL) { int mapStart = round((ag->tStart - seqStart)*scale) + xOff; int mapEnd = round((ag->tEnd - seqStart)*scale) + xOff; int mapHeight = ss->rowCount * lineHeight; int mapWidth = mapEnd - mapStart; if(mapWidth < 1) mapStart = 1; mapItem(drawName, ag, hvg, ag->tStart, ag->tEnd, mapStart, yOff, mapWidth, mapHeight); } /* Draw all of the exons that have been stored in the spacesaver. */ for (sn = ss->nodeList; sn != NULL; sn = sn->next) { struct spliceEdge *se = sn->val; if(se->type == ggExon) { y = yOff + (lineHeight * sn->row) + (lineHeight/2); drawExonAt(se, heightPer, seqStart, seqEnd, hvg, xOff, y, scale, font, color, shades); } else if(se->type == ggSJ) { for(i=0; i< ss->rowCount; i++) { if(se->rowStarts != NULL && se->rowStarts[i] == 1) { for(j=0; j< ss->rowCount; j++) { if(se->rowEnds != NULL && se->rowEnds[j] == 1) { int x1 = round((se->start - seqStart)*scale) + xOff; int x2 = round((se->end - seqStart)*scale) + xOff; int midX = 0, midY =0; Color c = MG_BLACK; minRow = min(j, i); midY = yOff + (lineHeight * minRow); midX = se->mid + xOff; hvGfxLine(hvg, x1,round(yOff+(lineHeight*i)+lineHeight/2), midX, midY, c); hvGfxLine(hvg, midX, midY, x2, round(yOff+(lineHeight*j)+lineHeight/2), c); } } } } } } count++; yOff += ss->rowCount *lineHeight; } } void altGraphXReverseComplement(struct altGraphX *ag) /* Switch an altGraphX record around so it looks like the chromosomal coordinates were reverse complemented. */ { int i; int width = ag->tEnd - ag->tStart; int *tmp = NULL; for(i=0; i<ag->vertexCount; i++) { ag->vPositions[i] = (width - (ag->vPositions[i] - ag->tStart)) + ag->tStart; if(ag->vTypes[i] == ggHardEnd) ag->vTypes[i] = ggHardStart; else if(ag->vTypes[i] ==ggHardStart) ag->vTypes[i] = ggHardEnd; else if(ag->vTypes[i] == ggSoftEnd) ag->vTypes[i] = ggSoftStart; else if(ag->vTypes[i] == ggSoftStart) ag->vTypes[i] = ggSoftEnd; } if(sameString(ag->strand, "+")) snprintf(ag->strand, sizeof(ag->strand), "%s", "-"); else if(sameString(ag->strand, "-")) snprintf(ag->strand, sizeof(ag->strand), "%s", "+"); else errAbort("altGraphX::altGraphXReverseComplement() - Don't recognize strand: %s", ag->strand); tmp = ag->edgeEnds; ag->edgeEnds = ag->edgeStarts; ag->edgeStarts = tmp; } int altGraphXGetEdgeNum(struct altGraphX *ag, int v1, int v2) /** Find the edge index that corresponds to v1 and v2 */ { int eC = ag->edgeCount; int i=0; for(i=0;i<eC; i++) { if(ag->edgeStarts[i] == v1 && ag->edgeEnds[i] == v2) { return i; } } errAbort("altGraphX::getEdgeNum() - Didn't find edge with vertices %d-%d in %s.", v1, v2, ag->name); return -1; } void altGraphXEnlargeExons(struct altGraphX *ag) /* Scale the exons in size such that the largest exon and the largest intron are * the same size. The method is to extend the ends of exons. This makes the * exons appear larger, but the coordinates will no longer be meaningful with * respect to the genome coordinates. Exons with alternative left ends will be * scaled according to just one of the left ends. */ { bool **em = altGraphXCreateEdgeMatrix(ag); int i,j,k; int maxIntron=1; int *vPos = ag->vPositions; int maxExon = 1; int increment = 0; double multFact = 1.2; int vC = ag->vertexCount; int last = 0; /* Find largest intron and smallest exon. */ for(i=0; i<vC; i++) { for(j=0; j<vC; j++) { if(em[i][j]) { if(altGraphXEdgeVertexType(ag, i, j) == ggExon) maxExon = max(maxExon, abs(vPos[i] - vPos[j])); else maxIntron = max(maxIntron, abs(vPos[i] - vPos[j])); } } } multFact = (double)maxIntron/maxExon; /* Some genes already have small introns- have to avoid shrinking the exons. */ if(multFact < 1) multFact = 1; last = -1; /* Enlarge each exon by stretching one end. */ for(i=0; i<vC; i++) { for(j=0; j<vC; j++) { if(em[i][j]) { /* If it is an exon and has a different end than the last exon stretch the end. */ if((altGraphXEdgeVertexType(ag, i, j) == ggExon) && (last < vPos[i])) { int mark = vPos[j]; int size = abs(vPos[i] - vPos[j]); increment = multFact*size - size; last = vPos[j]; ag->tEnd += increment; for(k=0; k<vC; k++) { if(vPos[k] >= mark) vPos[k] += increment; } } } } } altGraphXFreeEdgeMatrix(&em, vC); } int agxFindClosestDownstreamVertex(struct altGraphX *ag, bool **em, int v) /* Return the closest vertex that connects from v. -1 if none connect. */ { int i; int minV=-1; int minDist = BIGNUM; int *vPos = ag->vPositions; for(i=v+1; i<ag->vertexCount; i++) { if(em[v][i]) { int dist = vPos[i] - vPos[v]; if(dist < minDist) { minDist = dist; minV = i; } } } return minV; } int agxFindClosestUpstreamVertex(struct altGraphX *ag, bool **em, int v) /* Return the closest vertex that connects to v. -1 if none connect. */ { int i; int minV=-1; int minDist = BIGNUM; int *vPos = ag->vPositions; for(i=0; i<v; i++) { if(em[i][v]) { int dist = vPos[v] - vPos[i]; if(dist < minDist) { minDist = dist; minV = i; } } } return minV; } static int agxRowSum(bool *em, unsigned char *vTypes, int vCount) /* Return the number of hard starts and stops in a row. */ { int count =0; int i=0; for(i=0; i<vCount; i++) { if(em[i] && (vTypes[i] == ggHardStart || vTypes[i] == ggHardEnd) ) count++; } return count; } int agxColSum(bool **em, unsigned char *vTypes, int vCount, int col) /* Return the number or hard starts and ends in a column. */ { int count =0; int i=0; for(i=0; i<vCount; i++) { if(em[i][col] && (vTypes[i] == ggHardStart || vTypes[i] == ggHardEnd) ) count++; } return count; } static int agxEdgesInArea(struct altGraphX *ag, bool **em, int v1, int v2) /* Return the number of edges in an area of the adjacency matrix. */ { int sum =0; int i=0, j=0; int vC = ag->vertexCount; for(i=0; i<=v1; i++) { for(j=v2; j<vC; j++) { if(em[i][j]) sum++; } } return sum; } boolean agxIsAlt3Prime(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, int *altBpStart, int *altBpEnd, int *firstVertex, int *lastVertex) /* Return TRUE if we have an edge pattern of: he->hs----->he \-->hs--/ Which inicates two possible starts to an exon (alt 3' starts). Use agxEdgesInArea() to investigate an area that begins in the rows with the common hard end and finishes with common hard end. 01234 (4 vertices involved in alt splicing) 0 1 ++ 2 + 3 + 4 */ { unsigned char *vTypes = ag->vTypes; int i=0; int numAltVerts = 4; /* Number of edges in alt spliced portion of graph. */ /* Quick check. */ if(vTypes[vs] != ggHardEnd || vTypes[ve1] != ggHardStart || vTypes[ve2] != ggHardStart) return FALSE; if(em[vs][ve1] && em[vs][ve2]) { /* Try to find common end for the two hard starts. */ for(i=0; i<ag->vertexCount; i++) { if(vTypes[i] == ggHardEnd && em[ve1][i] && em[ve2][i]) { /* Make sure that they only connect to that one hard start. */ if(agxRowSum(em[ve1],ag->vTypes,ag->vertexCount) == 1 && agxRowSum(em[ve2],ag->vTypes,ag->vertexCount) == 1 && agxEdgesInArea(ag,em,i-1,vs+1) == numAltVerts) { *lastVertex = i; *firstVertex = agxFindClosestUpstreamVertex(ag, em, vs); *altBpStart = ve1; *altBpEnd = ve2; return TRUE; } } } } return FALSE; } boolean agxIsAlt5Prime(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, int *altBpStart, int *altBpEnd, int *termExonStart, int *termExonEnd) /* Return TRUE if we have an edge pattern of: hs->he----->hs \-->he--/ Which indicates two possible ends to an exon (alt 5' intron starts). Use agxEdgesInArea() to investigate an area that begins in the rows with the common hard end and finishes with common hard end. esees 01234 (4 vertices involved in alt splicing) 0 1 ++ 2 + 3 + 4 */ { unsigned char *vTypes = ag->vTypes; int i=0; int numAltVerts = 4; /* Quick check. */ if(vTypes[vs] != ggHardStart || vTypes[ve1] != ggHardEnd || vTypes[ve2] != ggHardEnd) return FALSE; if(em[vs][ve1] && em[vs][ve2]) { /* Try to find common start for the two hard ends. */ for(i=0; i<ag->vertexCount; i++) { if(vTypes[i] == ggHardStart && em[ve1][i] && em[ve2][i]) { /* Make sure that they only connect to that one hard start. */ if(agxRowSum(em[ve1],ag->vTypes,ag->vertexCount) == 1 && agxRowSum(em[ve2],ag->vTypes,ag->vertexCount) == 1 && agxEdgesInArea(ag,em,i-1,vs+1) == numAltVerts) { /* Report the first and last vertexes. */ *termExonStart = i; *termExonEnd = agxFindClosestDownstreamVertex(ag, em, i); *altBpStart = ve1; *altBpEnd = ve2; return TRUE; } } } } return FALSE; } boolean agxIsCassette(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, int *altBpStartV, int *altBpEndV, int *startV, int *endV) /* Return TRUE if SIMPLE cassette exon. Looking for pattern: he--->hs---->he---->hs \----------------/ Use agxEdgesInArea() to investigate that encompasses the common hard end and common hard start. Should only be 4 edges in area defined by splicing. sesese 012345 0 1 + + 2 + 3 + 4 5 */ { unsigned char *vTypes = ag->vTypes; int i=0; int numAltVerts = 4; /* Quick check. */ if(vTypes[vs] != ggHardEnd || vTypes[ve1] != ggHardStart || vTypes[ve2] != ggHardStart) return FALSE; if(em[vs][ve1] && em[vs][ve2]) { /* Try to find a hard end that connects ve1 and ve2. */ for(i=0; i<ag->vertexCount; i++) { if(vTypes[i] == ggHardEnd && em[ve1][i] && em[i][ve2]) { /* Make sure that our cassette only connect to downstream. otherwise not so simple...*/ if(agxRowSum(em[i],ag->vTypes,ag->vertexCount) == 1 && agxRowSum(em[ve1],ag->vTypes,ag->vertexCount) == 1 && agxColSum(em, ag->vTypes, ag->vertexCount, ve1) == 1 && agxEdgesInArea(ag,em,ve2-1,vs+1) == numAltVerts) { *startV = agxFindClosestUpstreamVertex(ag, em, vs); *endV = agxFindClosestDownstreamVertex(ag, em, ve2); *altBpStartV = ve1; *altBpEndV = i; return TRUE; } } } } return FALSE; } boolean agxIsRetainIntron(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, int *altBpStart, int *altBpEnd) /* return TRUE if retained intron. Looking for pattern: hs-->he---->hs--->he \---------------/ Use agxEdgesInArea() to investigate that encompasses the common hard end and common hard start. Should only be 4 edges in area defined by splicing. eseses 012345 0 1 + + 2 + 3 + 4 5 */ { unsigned char *vTypes = ag->vTypes; int i=0; int numAltVerts = 4; /* Quick check. */ if(vTypes[vs] != ggHardStart || vTypes[ve1] != ggHardEnd) return FALSE; if(em[vs][ve1] && em[vs][ve2]) { /* Try to find a hard start that connects ve1 and ve2. */ for(i=0; i<ag->vertexCount; i++) { if(vTypes[i] == ggHardStart && em[ve1][i] && em[i][ve2] && agxEdgesInArea(ag,em,ve2-1,vs+1) == numAltVerts) { *altBpStart = ve1; *altBpEnd = i; return TRUE; } } } return FALSE; } boolean agxIsAlt5PrimeSoft(struct altGraphX *ag, bool **em, int vs1, int vs2, int ve1, int ve2, int *altBpStart, int *altBpEnd, int *termExonStart, int *termExonEnd) /* Return TRUE if we have an edge pattern of: ss->he----->hs ss-->he--/ Which indicates a possible alternative transcription start site. Use agxEdgesInArea() to investigate an area that begins in the rows with differnt starts, one of which is soft, and finishes with common hard start. esees 012345 (4 vertices involved in alt splicing) 0 1 + 2 + 3 + 4 + 5 */ { unsigned char *vTypes = ag->vTypes; int i=0; int numAltVerts = 4; /* Ends must be intron sites */ if(vTypes[ve1] != ggHardEnd || vTypes[ve2] != ggHardEnd) return FALSE; /* One has to be soft started (transcription start). */ if(vTypes[vs1] != ggSoftStart && vTypes[vs2] != ggSoftStart) return FALSE; if(em[vs1][ve1] && em[vs2][ve2]) { /* Try to find common start for the two hard ends. */ for(i=0; i<ag->vertexCount; i++) { if(vTypes[i] == ggHardStart && em[ve1][i] && em[ve2][i]) { /* Make sure that they only connect to that one hard start. */ if(agxRowSum(em[ve1],ag->vTypes,ag->vertexCount) <= 1 && agxRowSum(em[ve2],ag->vTypes,ag->vertexCount) <= 1 && agxEdgesInArea(ag,em,i-1,vs1+1) <= numAltVerts) { /* Report the first and last vertexes. */ *termExonStart = i; *termExonEnd = agxFindClosestDownstreamVertex(ag, em, i); *altBpStart = ve1; *altBpEnd = ve2; return TRUE; } } } } return FALSE; } boolean agxIsAlt3PrimeSoft(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, int *altBpStart, int *altBpEnd, int *firstVertex, int *lastVertex) /* Return TRUE if we have an edge pattern of: he->hs----->se \------>hs-->se Which inicates two possible transcription ends. Use agxEdgesInArea() to investigate an area that begins in the rows with the common hard end and finishes with different ends at least one of which is a soft end. 012345 (5 vertices involved in alt splicing) 0 1 ++ 2 + 3 + 4 5 */ { unsigned char *vTypes = ag->vTypes; int i=0; int numAltVerts = 5; /* Number of edges in alt spliced portion of graph. */ /* Quick check. */ if(vTypes[vs] != ggHardEnd || vTypes[ve1] != ggHardStart || vTypes[ve2] != ggHardStart) return FALSE; if(em[vs][ve1] && em[vs][ve2]) { int sEnd1 = -1, sEnd2 = -1; /* Find the ends connecting to the vertex ends. */ sEnd1 = agxFindClosestDownstreamVertex(ag, em, ve1); sEnd2 = agxFindClosestDownstreamVertex(ag, em, ve2); /* Make sure they are connected to something. */ if(sEnd1 == -1 || sEnd2 == -1) return FALSE; /* Make sure one is a soft end. */ if(vTypes[sEnd1] != ggSoftEnd && vTypes[sEnd2] != ggSoftEnd) return FALSE; /* Make sure that they only connect to one edge each and that there isn't some other alt-splicing going on around them. */ if(agxRowSum(em[ve1],ag->vTypes,ag->vertexCount) <= 1 && agxRowSum(em[ve2],ag->vTypes,ag->vertexCount) <= 1 && agxEdgesInArea(ag,em,max(sEnd1,sEnd2),vs+1) <= numAltVerts) { *lastVertex = i; *firstVertex = agxFindClosestUpstreamVertex(ag, em, vs); *altBpStart = ve1; *altBpEnd = ve2; return TRUE; } } return FALSE; } static struct altGraphX *agxCopyIndividualComponents(struct altGraphX *agx, int *vComponents, int compCount) /* Take the original splicing graph and an array that describes what component each vertex belongs to. In current implementation there may be mrnaRefs, tissues and libs that aren't used as all of them are copied for each graph. */ { int i = 0; struct altGraphX *agxNew = NULL, *agxList = NULL; int compIx = 0; int vC = agx->vertexCount; int eC = agx->edgeCount; int *eStarts = agx->edgeStarts; int *eEnds = agx->edgeEnds; /* For each component. */ for(compIx = 0; compIx < compCount; compIx++) { char buff[256]; int vCNew = 0; int eCNew = 0; int *mapping = NULL; struct evidence **evNew = NULL; struct evidence *ev = NULL, *evList = NULL; AllocArray(mapping, vC); AllocArray(evNew, eC); /* Basic idea is to clone the graph and then prune out the unused vertices and edges. */ agxNew = altGraphXClone(agx); safef(buff, sizeof(buff), "%s-%d", agx->name, compCount); freez(&agxNew->name); agxNew->name = cloneString(buff); /* Set tStart and tEnd to maximum and minimum values possible. */ agxNew->tStart = agx->tEnd; agxNew->tEnd = agx->tStart; /* Copy over the vertices. */ for(i = 0; i < vC; i++) { if(vComponents[i] == compIx) { mapping[i] = vCNew; agxNew->vTypes[vCNew] = agx->vTypes[i]; agxNew->vPositions[vCNew] = agx->vPositions[i]; agxNew->tStart = min(agxNew->tStart, agxNew->vPositions[vCNew]); agxNew->tEnd = max(agxNew->tEnd, agxNew->vPositions[vCNew]); vCNew++; } else mapping[i] = -1; } /* Set up a temp array for the evidence. */ for(ev = agxNew->evidence, i=0; ev != NULL; ev = ev->next, i++) evNew[i] = ev; /* Copy over the new edges. */ for(i = 0; i < eC; i++) { if(mapping[eStarts[i]] != -1) { agxNew->edgeStarts[eCNew] = mapping[eStarts[i]]; agxNew->edgeEnds[eCNew] = mapping[eEnds[i]]; /* Add evidence to growing list. */ slAddHead(&evList, evNew[i]); eCNew++; } else { /* Don't forget to free the evidence before we lose a handle to it. */ evidenceFree(&evNew[i]); } } agxNew->vertexCount = vCNew; agxNew->edgeCount = eCNew; slReverse(&evList); agxNew->evidence = evList; slAddHead(&agxList, agxNew); for(i=0; i < eCNew; i++) agx->edgeTypes[i] = getSpliceEdgeType(agxNew, i); /* Cleanup. */ freez(&mapping); freez(&evNew); } slReverse(&agxList); return agxList; } static void agxCcDfs(int **adjList, int *adjCounts, int *vColors, int *vComponents, int vCount, int vertIx, int component) /* Depth first search to identify all of the vertices that connect to this one. */ { int nextV = 0; int adjVertCount = 0; /* Mark as seen. */ vColors[vertIx] = agGray; vComponents[vertIx] = component; /* Recurse into each vertex connected to this one that hasn't been discovered */ while(adjVertCount < adjCounts[vertIx]) { nextV = adjList[vertIx][adjVertCount++]; if(vColors[nextV] == agWhite) { agxCcDfs(adjList, adjCounts, vColors, vComponents, vCount, nextV, component); } } /* Mark as done. */ vColors[vertIx] = agBlack; } struct altGraphX *agxConnectedComponents(struct altGraphX *agx) /* Find the connected components of the graph and break them up into seperate graphs. Free the result with altGraphXFreeList(). Algorithm for connected components from: Baase & Van Gelder "Computer Algorithms Introduction to Design and Analysis" 3rd edition 2000. pp 340-341 */ { int **adjList = NULL; /* Adjacency list. */ int *adjCounts = NULL; /* Number of vertices in the adj list. */ int *vColors = NULL; /* Colors of vertices. */ int *vComponents = NULL; /* Component each vertice is in. */ int *vPos = agx->vPositions; /* Convenience variable. */ int *starts = agx->edgeStarts; int *ends = agx->edgeEnds; int i = 0, vertIx = 0, j = 0; int vC = agx->vertexCount; int eC = agx->edgeCount; int *eS = agx->edgeStarts; int *eE = agx->edgeEnds; int compCount = 0; struct altGraphX *agxList = NULL; /* Allocate the adj list memeory. */ AllocArray(adjList, vC); AllocArray(adjCounts, vC); AllocArray(vColors, vC); AllocArray(vComponents, vC); for(i = 0; i < vC; i++) AllocArray(adjList[i], vC); /* Fill in the adjList. Connect all edges both ways, want to connecte everything possible. Wow, a lot happens in those two lines of code... */ for(i = 0; i < eC; i++) { adjList[eS[i]][adjCounts[eS[i]]++] = eE[i]; adjList[eE[i]][adjCounts[eE[i]]++] = eS[i]; } /* If two exons overlap link the starts and ends so they can be in the same component. */ for(i = 0; i < eC; i++) { if(!(getSpliceEdgeType(agx, i) == ggExon)) /* only exons. */ continue; for(j = 0; j < eC; j++) { if(!(getSpliceEdgeType(agx, i) == ggExon) || i == j || starts[i] == starts[j] || ends[i] == ends[j]) continue; /* Only looking at exons. */ /* If the exons overlap connect the vertices in the adjacency lists. */ if(rangeIntersection(vPos[starts[i]],vPos[ends[i]], vPos[starts[j]], vPos[ends[j]]) > 0) { if(uniqeInArray(adjList[eS[i]], adjCounts[eS[i]], eS[j])) adjList[eS[i]][adjCounts[eS[i]]++] = eS[j]; if(uniqeInArray(adjList[eS[j]], adjCounts[eS[j]], eS[i])) adjList[eS[j]][adjCounts[eS[j]]++] = eS[i]; if(uniqeInArray(adjList[eE[i]], adjCounts[eE[i]], eE[j])) adjList[eE[i]][adjCounts[eE[i]]++] = eE[j]; if(uniqeInArray(adjList[eE[j]], adjCounts[eE[j]], eE[i])) adjList[eE[j]][adjCounts[eE[j]]++] = eE[i]; } } } for(vertIx = 0; vertIx < vC; vertIx++) if(vColors[vertIx] == agWhite) agxCcDfs(adjList, adjCounts, vColors, vComponents, vC, vertIx, compCount++); agxList = agxCopyIndividualComponents(agx, vComponents, compCount); for(i = 0; i < vC; i++) freez(&adjList[i]); freez(&adjCounts); freez(&vColors); freez(&vComponents); freez(&adjList); return agxList; }