0713928c047bb0bbc8766e38edbc1b475e3a111b tdreszer Fri Apr 8 17:41:06 2011 -0700 Adjusted encodeExp funtionality to handle compositeless experiments without choking diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c index 11ea4f4..12fe3a2 100644 --- src/hg/lib/mdb.c +++ src/hg/lib/mdb.c @@ -2584,31 +2584,31 @@ { //char buffer[128]; //regerror(err, ®Ex, buffer, sizeof buffer); printf("INVALID regex '%s' match: %s -> %s = '%s'.\n",validationRule, mdbObj->obj,mdbVar->var,mdbVar->val); invalids++; } regfree(®Ex); } else verbose(1,"ERROR in %s: Unknown validationRule rule '%s' for term %s.\n",CV_FILE_NAME,validationRule,mdbVar->var); } } return invalids; } -static struct slName *mdbCompositeFindEncodeEdvs(struct mdbObj *compObj) +static struct slName *mdbObjGetNamedEncodeEdvs(struct mdbObj *compObj) // returns NULL or the list of EDVs defined for this composite { char *edvs = mdbObjFindValue(compObj,MDB_VAR_ENCODE_EDVS); if (edvs == NULL) return NULL; edvs = cloneString(edvs); if (strchr( edvs,',') != NULL) // Tolerate delimit by commas strSwapChar(edvs,',',' '); else if (strchr(edvs,';') != NULL) // Tolerate delimit by semicolons strSwapChar(edvs,';',' '); struct slName *compositeEdvs = slNameListFromString(edvs,' '); freeMem(edvs); return compositeEdvs; @@ -2623,52 +2623,57 @@ for(;var!=NULL;var=var->next) { char *val = mdbObjFindValue(mdbObj,var->name); if (val) mdbVarAdd(&edvVars, var->name,val); else if (includeNone) { if (differentWord(var->name,ENCODE_EXP_FIELD_ORGANISM)) // Does not go into EDV's sent to encodeExp table mdbVarAdd(&edvVars, var->name,MDB_VAL_ENCODE_EDV_NONE); } } slReverse(&edvVars); return edvVars; } -struct slName *mdbObjFindCompositeEncodeEdvNames(struct sqlConnection *conn,char *tableName,struct mdbObj *mdbObj) +struct slName *mdbObjFindCompositeNamedEncodeEdvs(struct sqlConnection *conn,char *tableName,struct mdbObj *mdbObj) // returns NULL or the Experiment Defining Variable names for this composite { if (!mdbObjIsCompositeMember(mdbObj)) return NULL; // This should be a valid composite memeber struct mdbObj *compObj = mdbObjQueryCompositeObj(conn,tableName,mdbObj); if (compObj == NULL) return NULL; -struct slName *edvs = mdbCompositeFindEncodeEdvs(compObj); +struct slName *edvs = mdbObjGetNamedEncodeEdvs(compObj); mdbObjFree(&compObj); return edvs; } struct mdbVar *mdbObjFindEncodeEdvPairs(struct sqlConnection *conn,char *tableName,struct mdbObj *mdbObj,boolean includeNone) // returns NULL or the Experiment Defining Variables and values for this composite member object // If includeNone, then defined variables not found in obj will be included as {var}="None". { -struct slName *compositeEdvs = mdbObjFindCompositeEncodeEdvNames(conn,tableName,mdbObj); +// In rare cases, the EDVs reside with the object and NOT in a objType=composite. +struct slName *compositeEdvs = mdbObjGetNamedEncodeEdvs(mdbObj); // looking locally first. +if (compositeEdvs == NULL) + { + struct slName *compositeEdvs = mdbObjFindCompositeNamedEncodeEdvs(conn,tableName,mdbObj); if (compositeEdvs == NULL) return NULL; + } return mdbObjEncodeEdvsAsMdbVars(mdbObj,compositeEdvs,includeNone); } #define EXPERIMENTS_TABLE "hgFixed.encodeExp" struct mdbObj *mdbObjsEncodeExperimentify(struct sqlConnection *conn,char *db,char *tableName,struct mdbObj **pMdbObjs, int warn,boolean createExpIfNecessary) // Organizes objects into experiments and validates experiment IDs. Will add/update the ids in the structures. // If warn=1, then prints to stdout all the experiments/obs with missing or wrong expIds; // warn=2, then print line for each obj with expId or warning. // createExpIfNecessary means go ahead and add to the hgFixed.encodeExp table to get an ID // Returns a new set of mdbObjs that is what can (and should) be used to update the mdb via mdbObjsSetToDb(). { // Here is what "experimentify" does from "mdbPrint -encodeExp" and "mdbUpdate -encodeExp": @@ -2678,82 +2683,107 @@ // - Looks up EDVs (Experiment Defining Variables) for composite. // These are defined in the mdb under objType=composite expVars= // - Breaks up and walks through composite objects exp by exp as defined by EDVs // - Uses encodeExp API to determine what expId should be. // - Creates new mdbObjs list of updates needed to put expId and dccAccession into the mdb. // - From "mdbPrint", this API warns of mismatches or missing expIds // - From "mdbUpdate" (not -test) then that utility will update the mdb from this API's return structs. If -test, will reveal what would be updated. // - FIXME: Need to extend this to update dccAccession separately from expId. if (pMdbObjs == NULL || *pMdbObjs == NULL) return 0; struct mdbObj *mdbObjs = *pMdbObjs; struct mdbObj *mdbProcessedObs = NULL; struct mdbObj *mdbUpdateObjs = NULL; +verbose(2, "mdbObjsEncodeExperimentify() beginning for %d objects.\n",slCount(*pMdbObjs)); // Sort all objects by composite, so that we handle composite by composite mdbObjsSortOnVars(&mdbObjs, MDB_VAR_COMPOSITE); struct dyString *dyVars = dyStringNew(256); while(mdbObjs != NULL) { // Work on a composite at a time + boolean compositelessObj = FALSE; char *compName = NULL; while(mdbObjs != NULL && compName == NULL) { compName = mdbObjFindValue(mdbObjs,MDB_VAR_COMPOSITE); if (compName == NULL) { - verbose(1, "Object '%s' has no %s defined.\n",mdbObjs->obj,MDB_VAR_COMPOSITE); + if (mdbObjFindValue(mdbObjs,MDB_VAR_ENCODE_EDVS) == NULL) + { + verbose(1, "Object '%s' has no %s or %s defined.\n",mdbObjs->obj,MDB_VAR_COMPOSITE,MDB_VAR_ENCODE_EDVS); mdbProcessedObs = slCat(mdbProcessedObs,slPopHead(&mdbObjs)); continue; } + verbose(2, "mdbObjsEncodeExperimentify() starting on compositeless set.\n"); + break; + } } - struct mdbObj *mdbCompositeObjs = mdbObjsFilter(&mdbObjs, MDB_VAR_COMPOSITE, compName,TRUE); + struct mdbObj *mdbCompositeObjs = NULL; + if (compName != NULL) + mdbCompositeObjs = mdbObjsFilter(&mdbObjs, MDB_VAR_COMPOSITE, compName,TRUE); + else + mdbCompositeObjs = slPopHead(&mdbObjs); // Rare cases there is no composite set. + assert(mdbCompositeObjs != NULL); // --- At this point we have nibbled off a composite worth of objects from the full set of objects // Find the composite obj if it exists - struct mdbObj *compObj = mdbObjsFilter(&mdbCompositeObjs, MDB_OBJ_TYPE, MDB_OBJ_TYPE_COMPOSITE,TRUE); + struct mdbObj *compObj = NULL; + if (compName != NULL) + { + compObj =mdbObjsFilter(&mdbCompositeObjs, MDB_OBJ_TYPE, MDB_OBJ_TYPE_COMPOSITE,TRUE); if (compObj == NULL) // May be NULL if mdbObjs passed in was produced by too narrow of selection criteria + { compObj = mdbObjQueryCompositeObj(conn,tableName,mdbCompositeObjs); // First obj on list will do - else - slAddHead(&mdbProcessedObs,compObj); // We can still use the pointer, but will not "process" it. NOTE: leak the queried one - if(compObj == NULL) + if(compObj == NULL) // This should be assertable { verbose(1, "Composite '%s' has not been defined.\n",compName); mdbProcessedObs = slCat(mdbProcessedObs,mdbCompositeObjs); mdbCompositeObjs = NULL; continue; } + } + else + slAddHead(&mdbProcessedObs,compObj); // We can still use the pointer, but will not "process" it. NOTE: leak the queried one + } + else + { + compObj = mdbCompositeObjs; // Should be only one + compName = mdbCompositeObjs->obj; + compositelessObj = TRUE; + } + verbose(2, "mdbObjsEncodeExperimentify() working on %s %s%s.\n",compName,MDB_VAR_COMPOSITE,(compositelessObj?"less set":"")); - // Obtain experiment defining variables for the composite - struct slName *compositeEdvs = mdbCompositeFindEncodeEdvs(compObj); + // Obtain experiment defining variables for the composite (or compositeless obj) + struct slName *compositeEdvs = mdbObjGetNamedEncodeEdvs(compObj); if (compositeEdvs == NULL) { - verbose(1, "There are no experiment defining variables established for this %s. Add them to obj %s => var:%s.\n", - MDB_VAR_COMPOSITE, compName,MDB_VAR_ENCODE_EDVS); + verbose(1, "There are no experiment defining variables established for this %s%s. Add them to obj %s => var:%s.\n", + MDB_VAR_COMPOSITE,(compositelessObj?"less set":""), compName,MDB_VAR_ENCODE_EDVS); mdbProcessedObs = slCat(mdbProcessedObs,mdbCompositeObjs); mdbCompositeObjs = NULL; continue; } dyStringClear(dyVars); dyStringAppend(dyVars,slNameListToString(compositeEdvs, ' ')); if (warn > 0) - printf("Composite '%s' with %d objects has %d EDVs(%s): [%s].\n",compName,slCount(mdbCompositeObjs), - slCount(compositeEdvs),MDB_VAR_ENCODE_EDVS,dyStringContents(dyVars)); // Set the stage + printf("Composite%s '%s' with %d objects has %d EDVs(%s): [%s].\n",(compositelessObj?"less set":""),compName, + slCount(mdbCompositeObjs),slCount(compositeEdvs),MDB_VAR_ENCODE_EDVS,dyStringContents(dyVars)); // Set the stage // Organize composite objs by EDVs dyStringPrintf(dyVars, " %s %s ",MDB_VAR_VIEW,MDB_VAR_REPLICATE); // Allows for nicer sorted list char *edvSortOrder = cloneString(dyStringContents(dyVars)); // Walk through objs for an exp as defined by EDVs int expCount=0; // Count of experiments in composite int expMissing=0; // Count of experiments with missing expId int expObjsCount=0; // Total of all experimental object accoss the composite int expMax=0; // Largest experiment (in number of objects) int expMin=999; // Smallest experiment (in number of objects) while(mdbCompositeObjs != NULL) { // Must sort each cycle, because sort order is lost during mdbObjs FilterByVars(); mdbObjsSortOnVars(&mdbCompositeObjs, edvSortOrder); @@ -2776,30 +2806,31 @@ dyStringPrintf(dyVars,"%s=%s ",edvVar->var,edvVar->val); if (differentString(edvVar->val,MDB_VAL_ENCODE_EDV_NONE)) valsFound++; } dyStringContents(dyVars)[dyStringLen(dyVars) -1] = '\0'; // Nicer printing is all if (valsFound == 0) { verbose(1, "There are no experiment defining variables for this object '%s'.\n",mdbCompositeObjs->obj); slAddHead(&mdbProcessedObs,slPopHead(&mdbCompositeObjs)); // We're done with this one mdbVarsFree(&edvVarVals); continue; } // Work on one experiment at a time + verbose(2, "mdbObjsEncodeExperimentify() working on EDVs: %s.\n",dyStringContents(dyVars)); struct mdbObj *mdbExpObjs = mdbObjsFilterByVars(&mdbCompositeObjs,dyStringContents(dyVars),TRUE,TRUE); // None={notFound} // --- At this point we have nibbled off an experiment worth of objects from the composite set of objects int objsInExp = slCount(mdbExpObjs); assert(objsInExp > 0); expCount++; expObjsCount += objsInExp; // Total of all experimental objects across the composite // Look up each exp in EXPERIMENTS_TABLE char experimentId[128]; int expId = -1; struct encodeExp *exp = encodeExpGetByMdbVars(db, edvVarVals); if (exp == NULL && createExpIfNecessary) exp = encodeExpGetOrCreateByMdbVars(db, edvVarVals); @@ -2896,32 +2927,32 @@ slAddHead(&mdbProcessedObs,obj); } // Done with one experiment encodeExpFree(&exp); if (!foundId && errors > 0) { expMissing++; if (warn > 0) printf(" %s all %d objects are missing an %s.\n",experimentId,objsInExp,MDB_VAR_ENCODE_EXP_ID); } } // Done with one composite if (expCount > 0) - printf("Composite '%s' has %d recognizable experiment%s with %d missing an %s.\n objects/experiment: min:%d max:%d mean:%lf.\n", - compName,expCount,(expCount != 1?"s":""),expMissing,MDB_VAR_ENCODE_EXP_ID,expMin,expMax,((double)expObjsCount/expCount)); + printf("Composite%s '%s' has %d recognizable experiment%s with %d missing an %s.\n objects/experiment: min:%d max:%d mean:%lf.\n", + (compositelessObj?"less set":""),compName,expCount,(expCount != 1?"s":""),expMissing,MDB_VAR_ENCODE_EXP_ID,expMin,expMax,((double)expObjsCount/expCount)); if (edvSortOrder != NULL) freeMem(edvSortOrder); slNameFreeList(compositeEdvs); } // Done with all composites dyStringFree(&dyVars); *pMdbObjs = mdbProcessedObs; return mdbUpdateObjs; } boolean mdbObjIsEncode(struct mdbObj *mdb)