0713928c047bb0bbc8766e38edbc1b475e3a111b
tdreszer
  Fri Apr 8 17:41:06 2011 -0700
Adjusted encodeExp funtionality to handle compositeless experiments without choking
diff --git src/hg/lib/mdb.c src/hg/lib/mdb.c
index 11ea4f4..12fe3a2 100644
--- src/hg/lib/mdb.c
+++ src/hg/lib/mdb.c
@@ -2584,31 +2584,31 @@
                 {
                 //char buffer[128];
                 //regerror(err, &regEx, buffer, sizeof buffer);
                 printf("INVALID regex '%s' match: %s -> %s = '%s'.\n",validationRule, mdbObj->obj,mdbVar->var,mdbVar->val);
                 invalids++;
                 }
             regfree(&regEx);
             }
         else
             verbose(1,"ERROR in %s: Unknown validationRule rule '%s' for term %s.\n",CV_FILE_NAME,validationRule,mdbVar->var);
         }
     }
 return invalids;
 }
 
-static struct slName *mdbCompositeFindEncodeEdvs(struct mdbObj *compObj)
+static struct slName *mdbObjGetNamedEncodeEdvs(struct mdbObj *compObj)
 // returns NULL or the list of EDVs defined for this composite
 {
 char *edvs = mdbObjFindValue(compObj,MDB_VAR_ENCODE_EDVS);
 if (edvs == NULL)
     return NULL;
 
 edvs = cloneString(edvs);
 if (strchr(     edvs,',') != NULL) // Tolerate delimit by commas
     strSwapChar(edvs,',',' ');
 else if (strchr(edvs,';') != NULL) // Tolerate delimit by semicolons
     strSwapChar(edvs,';',' ');
 
 struct slName *compositeEdvs = slNameListFromString(edvs,' ');
 freeMem(edvs);
 return compositeEdvs;
@@ -2623,52 +2623,57 @@
 for(;var!=NULL;var=var->next)
     {
     char *val = mdbObjFindValue(mdbObj,var->name);
     if (val)
         mdbVarAdd(&edvVars, var->name,val);
     else if (includeNone)
         {
         if (differentWord(var->name,ENCODE_EXP_FIELD_ORGANISM)) // Does not go into EDV's sent to encodeExp table
             mdbVarAdd(&edvVars, var->name,MDB_VAL_ENCODE_EDV_NONE);
         }
     }
 slReverse(&edvVars);
 return edvVars;
 }
 
-struct slName *mdbObjFindCompositeEncodeEdvNames(struct sqlConnection *conn,char *tableName,struct mdbObj *mdbObj)
+struct slName *mdbObjFindCompositeNamedEncodeEdvs(struct sqlConnection *conn,char *tableName,struct mdbObj *mdbObj)
 // returns NULL or the Experiment Defining Variable names for this composite
 {
 if (!mdbObjIsCompositeMember(mdbObj))
     return NULL; // This should be a valid composite memeber
 
 struct mdbObj *compObj = mdbObjQueryCompositeObj(conn,tableName,mdbObj);
 if (compObj == NULL)
     return NULL;
 
-struct slName *edvs = mdbCompositeFindEncodeEdvs(compObj);
+struct slName *edvs = mdbObjGetNamedEncodeEdvs(compObj);
 mdbObjFree(&compObj);
 return edvs;
 }
 
 struct mdbVar *mdbObjFindEncodeEdvPairs(struct sqlConnection *conn,char *tableName,struct mdbObj *mdbObj,boolean includeNone)
 // returns NULL or the Experiment Defining Variables and values for this composite member object
 // If includeNone, then defined variables not found in obj will be included as {var}="None".
 {
-struct slName *compositeEdvs = mdbObjFindCompositeEncodeEdvNames(conn,tableName,mdbObj);
+// In rare cases, the EDVs reside with the object and NOT in a objType=composite.
+struct slName *compositeEdvs = mdbObjGetNamedEncodeEdvs(mdbObj);  // looking locally first.
+if (compositeEdvs == NULL)
+    {
+    struct slName *compositeEdvs = mdbObjFindCompositeNamedEncodeEdvs(conn,tableName,mdbObj);
 if (compositeEdvs == NULL)
     return NULL;
+    }
 
 return mdbObjEncodeEdvsAsMdbVars(mdbObj,compositeEdvs,includeNone);
 }
 
 #define EXPERIMENTS_TABLE "hgFixed.encodeExp"
 
 struct mdbObj *mdbObjsEncodeExperimentify(struct sqlConnection *conn,char *db,char *tableName,struct mdbObj **pMdbObjs,
                                           int warn,boolean createExpIfNecessary)
 // Organizes objects into experiments and validates experiment IDs.  Will add/update the ids in the structures.
 // If warn=1, then prints to stdout all the experiments/obs with missing or wrong expIds;
 //    warn=2, then print line for each obj with expId or warning.
 // createExpIfNecessary means go ahead and add to the hgFixed.encodeExp table to get an ID
 // Returns a new set of mdbObjs that is what can (and should) be used to update the mdb via mdbObjsSetToDb().
 {
 // Here is what "experimentify" does from "mdbPrint -encodeExp" and "mdbUpdate -encodeExp":
@@ -2678,82 +2683,107 @@
 //    - Looks up EDVs (Experiment Defining Variables) for composite.
 //        These are defined in the mdb under objType=composite expVars=
 //    - Breaks up and walks through composite objects exp by exp as defined by EDVs
 //    - Uses encodeExp API to determine what expId should be.
 //    - Creates new mdbObjs list of updates needed to put expId and dccAccession into the mdb.
 //    - From "mdbPrint", this API warns of mismatches or missing expIds
 //    - From "mdbUpdate" (not -test) then that utility will update the mdb from this API's return structs.  If -test, will reveal what would be updated.
 //    - FIXME: Need to extend this to update dccAccession separately from expId.
 
 if (pMdbObjs == NULL || *pMdbObjs == NULL)
     return 0;
 struct mdbObj *mdbObjs = *pMdbObjs;
 struct mdbObj *mdbProcessedObs = NULL;
 struct mdbObj *mdbUpdateObjs = NULL;
 
+verbose(2, "mdbObjsEncodeExperimentify() beginning for %d objects.\n",slCount(*pMdbObjs));
 // Sort all objects by composite, so that we handle composite by composite
 mdbObjsSortOnVars(&mdbObjs, MDB_VAR_COMPOSITE);
 
 struct dyString *dyVars = dyStringNew(256);
 
 while(mdbObjs != NULL)
     {
     // Work on a composite at a time
+    boolean compositelessObj = FALSE;
     char *compName = NULL;
     while(mdbObjs != NULL && compName == NULL)
         {
         compName = mdbObjFindValue(mdbObjs,MDB_VAR_COMPOSITE);
         if (compName == NULL)
             {
-            verbose(1, "Object '%s' has no %s defined.\n",mdbObjs->obj,MDB_VAR_COMPOSITE);
+            if (mdbObjFindValue(mdbObjs,MDB_VAR_ENCODE_EDVS) == NULL)
+                {
+                verbose(1, "Object '%s' has no %s or %s defined.\n",mdbObjs->obj,MDB_VAR_COMPOSITE,MDB_VAR_ENCODE_EDVS);
             mdbProcessedObs = slCat(mdbProcessedObs,slPopHead(&mdbObjs));
             continue;
             }
+            verbose(2, "mdbObjsEncodeExperimentify() starting on compositeless set.\n");
+            break;
+            }
         }
-    struct mdbObj *mdbCompositeObjs = mdbObjsFilter(&mdbObjs, MDB_VAR_COMPOSITE, compName,TRUE);
+    struct mdbObj *mdbCompositeObjs = NULL;
+    if (compName != NULL)
+        mdbCompositeObjs = mdbObjsFilter(&mdbObjs, MDB_VAR_COMPOSITE, compName,TRUE);
+    else
+        mdbCompositeObjs = slPopHead(&mdbObjs); // Rare cases there is no composite set.
+    assert(mdbCompositeObjs != NULL);
     // --- At this point we have nibbled off a composite worth of objects from the full set of objects
 
     // Find the composite obj if it exists
-    struct mdbObj *compObj = mdbObjsFilter(&mdbCompositeObjs, MDB_OBJ_TYPE, MDB_OBJ_TYPE_COMPOSITE,TRUE);
+    struct mdbObj *compObj = NULL;
+    if (compName != NULL)
+        {
+        compObj =mdbObjsFilter(&mdbCompositeObjs, MDB_OBJ_TYPE, MDB_OBJ_TYPE_COMPOSITE,TRUE);
     if (compObj == NULL) // May be NULL if mdbObjs passed in was produced by too narrow of selection criteria
+            {
         compObj = mdbObjQueryCompositeObj(conn,tableName,mdbCompositeObjs);  // First obj on list will do
-    else
-        slAddHead(&mdbProcessedObs,compObj); // We can still use the pointer, but will not "process" it.  NOTE: leak the queried one
-    if(compObj == NULL)
+            if(compObj == NULL)  // This should be assertable
         {
         verbose(1, "Composite '%s' has not been defined.\n",compName);
         mdbProcessedObs = slCat(mdbProcessedObs,mdbCompositeObjs);
         mdbCompositeObjs = NULL;
         continue;
         }
+            }
+        else
+            slAddHead(&mdbProcessedObs,compObj); // We can still use the pointer, but will not "process" it.  NOTE: leak the queried one
+        }
+    else
+        {
+        compObj = mdbCompositeObjs;  // Should be only one
+        compName = mdbCompositeObjs->obj;
+        compositelessObj = TRUE;
+        }
+    verbose(2, "mdbObjsEncodeExperimentify() working on %s %s%s.\n",compName,MDB_VAR_COMPOSITE,(compositelessObj?"less set":""));
 
-    // Obtain experiment defining variables for the composite
-    struct slName *compositeEdvs = mdbCompositeFindEncodeEdvs(compObj);
+    // Obtain experiment defining variables for the composite (or compositeless obj)
+    struct slName *compositeEdvs = mdbObjGetNamedEncodeEdvs(compObj);
     if (compositeEdvs == NULL)
         {
-        verbose(1, "There are no experiment defining variables established for this %s.  Add them to obj %s => var:%s.\n",
-                MDB_VAR_COMPOSITE, compName,MDB_VAR_ENCODE_EDVS);
+        verbose(1, "There are no experiment defining variables established for this %s%s.  Add them to obj %s => var:%s.\n",
+                MDB_VAR_COMPOSITE,(compositelessObj?"less set":""), compName,MDB_VAR_ENCODE_EDVS);
         mdbProcessedObs = slCat(mdbProcessedObs,mdbCompositeObjs);
         mdbCompositeObjs = NULL;
         continue;
         }
     dyStringClear(dyVars);
     dyStringAppend(dyVars,slNameListToString(compositeEdvs, ' '));
 
     if (warn > 0)
-        printf("Composite '%s' with %d objects has %d EDVs(%s): [%s].\n",compName,slCount(mdbCompositeObjs),
-               slCount(compositeEdvs),MDB_VAR_ENCODE_EDVS,dyStringContents(dyVars)); // Set the stage
+        printf("Composite%s '%s' with %d objects has %d EDVs(%s): [%s].\n",(compositelessObj?"less set":""),compName,
+               slCount(mdbCompositeObjs),slCount(compositeEdvs),MDB_VAR_ENCODE_EDVS,dyStringContents(dyVars)); // Set the stage
 
     // Organize composite objs by EDVs
     dyStringPrintf(dyVars, " %s %s ",MDB_VAR_VIEW,MDB_VAR_REPLICATE); // Allows for nicer sorted list
     char *edvSortOrder = cloneString(dyStringContents(dyVars));
 
     // Walk through objs for an exp as defined by EDVs
     int expCount=0;     // Count of experiments in composite
     int expMissing=0;   // Count of experiments with missing expId
     int expObjsCount=0; // Total of all experimental object accoss the composite
     int expMax=0;       // Largest experiment (in number of objects)
     int expMin=999;     // Smallest experiment (in number of objects)
     while(mdbCompositeObjs != NULL)
         {
         // Must sort each cycle, because sort order is lost during mdbObjs FilterByVars();
         mdbObjsSortOnVars(&mdbCompositeObjs, edvSortOrder);
@@ -2776,30 +2806,31 @@
             dyStringPrintf(dyVars,"%s=%s ",edvVar->var,edvVar->val);
             if (differentString(edvVar->val,MDB_VAL_ENCODE_EDV_NONE))
                 valsFound++;
             }
         dyStringContents(dyVars)[dyStringLen(dyVars) -1] = '\0'; // Nicer printing is all
 
         if (valsFound == 0)
             {
             verbose(1, "There are no experiment defining variables for this object '%s'.\n",mdbCompositeObjs->obj);
             slAddHead(&mdbProcessedObs,slPopHead(&mdbCompositeObjs)); // We're done with this one
             mdbVarsFree(&edvVarVals);
             continue;
             }
 
         // Work on one experiment at a time
+        verbose(2, "mdbObjsEncodeExperimentify() working on EDVs: %s.\n",dyStringContents(dyVars));
         struct mdbObj *mdbExpObjs = mdbObjsFilterByVars(&mdbCompositeObjs,dyStringContents(dyVars),TRUE,TRUE); // None={notFound}
 
         // --- At this point we have nibbled off an experiment worth of objects from the composite set of objects
 
         int objsInExp = slCount(mdbExpObjs);
         assert(objsInExp > 0);
         expCount++;
         expObjsCount += objsInExp; // Total of all experimental objects across the composite
 
         // Look up each exp in EXPERIMENTS_TABLE
         char experimentId[128];
         int expId = -1;
         struct encodeExp *exp = encodeExpGetByMdbVars(db, edvVarVals);
         if (exp == NULL && createExpIfNecessary)
             exp = encodeExpGetOrCreateByMdbVars(db, edvVarVals);
@@ -2896,32 +2927,32 @@
             slAddHead(&mdbProcessedObs,obj);
             }
         // Done with one experiment
         encodeExpFree(&exp);
 
         if (!foundId && errors > 0)
             {
             expMissing++;
             if (warn > 0)
                 printf("           %s all %d objects are missing an %s.\n",experimentId,objsInExp,MDB_VAR_ENCODE_EXP_ID);
             }
         }
     // Done with one composite
 
     if (expCount > 0)
-        printf("Composite '%s' has %d recognizable experiment%s with %d missing an %s.\n   objects/experiment: min:%d  max:%d  mean:%lf.\n",
-               compName,expCount,(expCount != 1?"s":""),expMissing,MDB_VAR_ENCODE_EXP_ID,expMin,expMax,((double)expObjsCount/expCount));
+        printf("Composite%s '%s' has %d recognizable experiment%s with %d missing an %s.\n   objects/experiment: min:%d  max:%d  mean:%lf.\n",
+               (compositelessObj?"less set":""),compName,expCount,(expCount != 1?"s":""),expMissing,MDB_VAR_ENCODE_EXP_ID,expMin,expMax,((double)expObjsCount/expCount));
 
     if (edvSortOrder != NULL)
         freeMem(edvSortOrder);
     slNameFreeList(compositeEdvs);
     }
 // Done with all composites
 
 dyStringFree(&dyVars);
 
 *pMdbObjs = mdbProcessedObs;
 
 return mdbUpdateObjs;
 }
 
 boolean mdbObjIsEncode(struct mdbObj *mdb)