2888f5c0526208e57ceae28ebaf4e4e0172afec3 tdreszer Wed Apr 13 12:58:33 2011 -0700 Turned on expId and dccAccession updating in mdbUpdate. Removed old expTbl function from mdbPrint diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c index 8e1c103..a426599 100644 --- src/hg/lib/mdb.c +++ src/hg/lib/mdb.c @@ -2653,56 +2653,58 @@ // returns NULL or the Experiment Defining Variables and values for this composite member object // If includeNone, then defined variables not found in obj will be included as {var}="None". { // In rare cases, the EDVs reside with the object and NOT in a objType=composite. struct slName *compositeEdvs = mdbObjGetNamedEncodeEdvs(mdbObj); // looking locally first. if (compositeEdvs == NULL) { compositeEdvs = mdbObjFindCompositeNamedEncodeEdvs(conn,tableName,mdbObj); if (compositeEdvs == NULL) return NULL; } return mdbObjEncodeEdvsAsMdbVars(mdbObj,compositeEdvs,includeNone); } -struct mdbObj *mdbObjsEncodeExperimentify(struct sqlConnection *conn,char *db,char *tableName,struct mdbObj **pMdbObjs, - int warn,boolean createExpIfNecessary) +struct mdbObj *mdbObjsEncodeExperimentify(struct sqlConnection *conn,char *db,char *tableName,char *expTable, + struct mdbObj **pMdbObjs,int warn,boolean createExpIfNecessary,boolean updateAccession) // Organizes objects into experiments and validates experiment IDs. Will add/update the ids in the structures. // If warn=1, then prints to stdout all the experiments/obs with missing or wrong expIds; // warn=2, then print line for each obj with expId or warning. -// createExpIfNecessary means go ahead and add to the hgFixed.encodeExp table to get an ID +// createExpIfNecessary means add expId to encodeExp table. updateAccession too if necessary. // Returns a new set of mdbObjs that is what can (and should) be used to update the mdb via mdbObjsSetToDb(). { // Here is what "experimentify" does from "mdbPrint -encodeExp" and "mdbUpdate -encodeExp": // - Uses normal selection methods to get a set of objects (e.g. one composite worth) or all objs. (in mdbPrint and mdbUpdate) // - This API: // - Breaks up and walks through set of objects composite by composite // - Looks up EDVs (Experiment Defining Variables) for composite. // These are defined in the mdb under objType=composite expVars= // - Breaks up and walks through composite objects exp by exp as defined by EDVs // - Uses encodeExp API to determine what expId should be. // - Creates new mdbObjs list of updates needed to put expId and dccAccession into the mdb. // - From "mdbPrint", this API warns of mismatches or missing expIds // - From "mdbUpdate" (not -test) then that utility will update the mdb from this API's return structs. If -test, will reveal what would be updated. // - FIXME: Need to extend this to update dccAccession separately from expId. if (pMdbObjs == NULL || *pMdbObjs == NULL) return 0; struct mdbObj *mdbObjs = *pMdbObjs; struct mdbObj *mdbProcessedObs = NULL; struct mdbObj *mdbUpdateObjs = NULL; +if (expTable == NULL) + expTable = ENCODE_EXP_TABLE; verbose(2, "mdbObjsEncodeExperimentify() beginning for %d objects.\n",slCount(*pMdbObjs)); // Sort all objects by composite, so that we handle composite by composite mdbObjsSortOnVars(&mdbObjs, MDB_VAR_COMPOSITE); struct dyString *dyVars = dyStringNew(256); while(mdbObjs != NULL) { // Work on a composite at a time boolean compositelessObj = FALSE; char *compName = NULL; while(mdbObjs != NULL && compName == NULL) { compName = mdbObjFindValue(mdbObjs,MDB_VAR_COMPOSITE); @@ -2816,43 +2818,47 @@ // Work on one experiment at a time verbose(2, "mdbObjsEncodeExperimentify() working on EDVs: %s.\n",dyStringContents(dyVars)); struct mdbObj *mdbExpObjs = mdbObjsFilterByVars(&mdbCompositeObjs,dyStringContents(dyVars),TRUE,TRUE); // None={notFound} // --- At this point we have nibbled off an experiment worth of objects from the composite set of objects int objsInExp = slCount(mdbExpObjs); assert(objsInExp > 0); expCount++; expObjsCount += objsInExp; // Total of all experimental objects across the composite // Look up each exp in EXPERIMENTS_TABLE char experimentId[128]; int expId = -1; - struct encodeExp *exp = encodeExpGetByMdbVars(db, edvVarVals); + struct encodeExp *exp = encodeExpGetByMdbVarsFromTable(db, edvVarVals, expTable); if (exp == NULL && createExpIfNecessary) - exp = encodeExpGetOrCreateByMdbVars(db, edvVarVals); + exp = encodeExpGetOrCreateByMdbVarsFromTable(db, edvVarVals, expTable); mdbVarsFree(&edvVarVals); // No longer needed + // Make sure the accession is set if requested. + if (createExpIfNecessary && updateAccession && exp->ix != -1 && exp->accession == NULL) + encodeExpSetAccession(exp, expTable); + if (exp != NULL) expId = exp->ix; if (expId == -1) { safef(experimentId,sizeof(experimentId),"{missing}"); if (warn > 0) - printf("Experiment %s EDV: [%s] is not defined in %s.%s table.\n",experimentId,dyStringContents(dyVars), ENCODE_EXP_DATABASE, ENCODE_EXP_TABLE); + printf("Experiment %s EDV: [%s] is not defined in %s.%s table.\n",experimentId,dyStringContents(dyVars), ENCODE_EXP_DATABASE, expTable); //printf("Experiment %s EDV: [%s] is not defined in %s table. Remaining:%d and %d\n",experimentId,dyStringContents(dyVars),EXPERIMENTS_TABLE,slCount(mdbCompositeObjs),slCount(mdbObjs)); if (warn < 2) // From mdbUpdate (warn=1), just interested in testing waters. From mdbPrint (warn=2) list all objs in exp. { expMissing++; mdbProcessedObs = slCat(mdbProcessedObs,mdbExpObjs); mdbExpObjs = NULL; encodeExpFree(&exp); continue; } } else { safef(experimentId,sizeof(experimentId),"%d",expId); if (warn > 0) printf("Experiment %s has %d objects based upon %d EDVs: [%s].\n",experimentId,slCount(mdbExpObjs),valsFound,dyStringContents(dyVars)); // Set the stage @@ -2868,47 +2874,57 @@ while(mdbExpObjs != NULL) { struct mdbObj *obj = slPopHead(&mdbExpObjs); { // NOTE: This list could expand but we expect only tables and files to be objs in an experiment char *objType = mdbObjFindValue(obj,MDB_OBJ_TYPE); assert(objType != NULL && (sameString(objType,MDB_OBJ_TYPE_TABLE) || sameString(objType,MDB_OBJ_TYPE_FILE))); } boolean updateObj = FALSE; char *val = mdbObjFindValue(obj,MDB_VAR_ENCODE_EXP_ID); if (val != NULL) { foundId = TRUE; // warn==1 will give only 1 exp wide error if no individual errors. NOTE: would be nice if those with expId sorted to beginning, but can't have everything. int thisId = atoi(val); - if (thisId == expId && expId != -1) + if (expId == -1 || thisId != expId) { - errors--; // One less error - if (warn > 1) // NOTE: Could give more info for each obj as per wrangler's desires + updateObj = TRUE; + if (warn > 0) + printf(" %s %s has bad %s=%s.\n",experimentId,obj->obj,MDB_VAR_ENCODE_EXP_ID,val); + } + else { char *acc = mdbObjFindValue(obj,MDB_VAR_DCC_ACCESSION); // FIXME: Add code to update accession to encodeExp + if (exp->accession != NULL && (acc == NULL || differentString(acc,exp->accession))) + { + updateObj = TRUE; + if (warn > 1) // NOTE: Could give more info for each obj as per wrangler's desires + { if (acc == NULL) - printf(" %s %s\n",experimentId,obj->obj); - else printf(" %s %s %s set, needs %s.\n",experimentId,obj->obj,MDB_VAR_ENCODE_EXP_ID,MDB_VAR_DCC_ACCESSION); + else + printf(" %s %s %s set, has wrong %s: %s.\n",experimentId,obj->obj, + MDB_VAR_ENCODE_EXP_ID,MDB_VAR_DCC_ACCESSION,exp->accession); } } else { - updateObj = TRUE; - if (warn > 0) - printf(" %s %s has bad %s=%s.\n",experimentId,obj->obj,MDB_VAR_ENCODE_EXP_ID,val); + errors--; // One less error + if (warn > 1) // NOTE: Could give more info for each obj as per wrangler's desires + printf(" %s %s\n",experimentId,obj->obj); + } } } else { updateObj = (expId != -1); if ((foundId && warn > 0) || warn > 1) { if (updateObj) printf(" %s %s needs updating to mdb.\n",experimentId,obj->obj); else printf(" %s %s\n",experimentId,obj->obj); // missing } } // This object needs to be updated.