62116e0476f1e663d37de8477766f5cdee572862
galt
  Fri Oct 12 18:37:08 2012 -0700
v12.16 - fixed bug in planner which had r and c reversed so that -ramUnit option now works
diff --git src/parasol/paraHub/paraHub.c src/parasol/paraHub/paraHub.c
index 548aed5..7d18ae4 100644
--- src/parasol/paraHub/paraHub.c
+++ src/parasol/paraHub/paraHub.c
@@ -173,31 +173,31 @@
 struct resultQueue *resultQueues; /* Result files. */
 int finishedJobCount = 0;		/* Number of finished jobs. */
 int crashedJobCount = 0;		/* Number of crashed jobs. */
 
 char *jobIdFileName = "parasol.jid";	/* File name where jobId file is. */
 FILE *jobIdFile = NULL;			/* Handle to jobId file. */
 
 char *hubHost;	/* Name of machine running this. */
 struct rudp *rudpOut;	/* Our rUDP socket. */
 
 
 /* Variables for new scheduler */
 
 // TODO make commandline param options to override defaults for unit sizes?
 /*  using machines list spec info for defaults */
-int cpuUnit = 1;                   /* 1 CPU */
+int cpuUnit = 1;                   /* 1 CPU */  /* someday this could be float 0.5 */
 long long ramUnit = 512 * 1024 * 1024;  /* 500 MB */
 int defaultJobCpu = 1;        /* number of cpuUnits in default job usage */  
 int defaultJobRam = 1;        /* number of ramUnits in default job usage */
 /* for the resource array dimensions */
 int maxCpuInCluster = 0;      /* node with largest number of cpu units */
 int maxRamInCluster = 0;      /* node with largest number of ram units */
 struct slRef ***perCpu = NULL;  /* an array of resources sharing the same cpu units free units count */
 boolean needsPlanning = FALSE;  /* remember if situation changed, need new plan */  
 
 
 void setupLists()
 /* Make up machine, spoke, user and job lists - all doubly linked
  * so it is fast to remove items from one list and put them
  * on another. */
 {
@@ -736,31 +736,31 @@
 	struct dlNode *jobNode = NULL;
 	for (jobNode = mach->jobs->head; !dlEnd(jobNode); jobNode = jobNode->next)
 	    {
 	    struct job *job = jobNode->val;
 	    struct batch *batch = job->batch;
 	    struct user *user = batch->user;
 	    job->oldPlan = TRUE;
 	    if (batch->planning && (batch->maxJob != -1))
 		{
 		if (pm) 
 		    {
 		    //pmClear(pm);
 		    //pmPrintf(pm, "preserving batch %s on machine %s", batch->name, mach->name);
 		    //pmSend(pm, rudpOut);
 		    }
-		allocateResourcesToMachine(mach, batch, user, &r, &c);
+		allocateResourcesToMachine(mach, batch, user, &c, &r);
 		}
 	    }
 
 	if (pm) 
 	    {
 	    //pmClear(pm);
 	    //pmPrintf(pm, "machSpec (%s) cpus:%d ramSize=%d"
 		//, mach->name, mach->machSpec->cpus, mach->machSpec->ramSize);
 	    //pmSend(pm, rudpOut);
 	    }
      
 
 	if (c < 1 || r < 1)
 	    {
 	    if (pm) 
@@ -851,31 +851,31 @@
 
 	/* allocate plan, reduce resources, calc new resources and pos.
 	 *   move machine from old array pos to new pos. (slPopHead, slAddHead)
 	 *   update its stats, and if heaps, update heaps.
 	 */
 
 
 	if (pm) 
 	    {
 	    //pmClear(pm);
 	    //pmPrintf(pm, "found hardware cpu %d ram %d in machine %s c=%d r=%d batch=%s", 
 		//batch->cpu, batch->ram, mach->name, c, r, batch->name);
 	    //pmSend(pm, rudpOut);
 	    }
 
-	allocateResourcesToMachine(mach, batch, user, &r, &c);
+	allocateResourcesToMachine(mach, batch, user, &c, &r);
 
 	if (pm) 
 	    {
 	    //pmClear(pm);
 	    //pmPrintf(pm, "remaining hardware c=%d r=%d", c, r);
 	    //pmSend(pm, rudpOut);
 	    }
      
 	if (c < 1 || r < 1)
 	    freeMem(el);  /* this node has insufficient resources remaining */
 	else
 	    slAddHead(&perCpu[c][r], el);
 
 	}
     else
@@ -1067,32 +1067,30 @@
     job = jNode->val;
     dlAddTail(hangJobs, job->hangNode);
     ++batch->runningCount;
     --batch->queuedCount;
     ++user->runningCount;
     unactivateBatchIfEmpty(batch); 
 
     /* Tell machine, job, and spoke about each other. */
     dlAddTail(machine->jobs, job->jobNode);
 
     /* just put it back on the ready list, it will get looked at again */
     dlAddTail(readyMachines, mNode);
 
     job->machine = machine;
     job->lastChecked = job->startTime = job->lastClockIn = now;
-    if (!(job->ram))  /* if no ram size specified, use the default */
-	job->ram = batch->ram * ramUnit;
     spokeSendJob(spoke, machine, job);
     return TRUE;
     }
 }
 
 void runner(int count)
 /* Try to run a couple of jobs. */
 {
 while (--count >= 0)
     if (!runNextJob())
         break;
 }
 
 struct machine *machineNew(char *name, char *tempDir, struct machSpec *m)
 /* Create a new machine structure. */
@@ -1908,35 +1906,44 @@
 
 job = jobNew(command, userName, dir, in, out, cpus, ram, results, TRUE);
 if (!job)
     {
     return 0;
     }
 batch = job->batch;
 dlAddTail(batch->jobQueue, job->node);
 ++batch->queuedCount;
 
 int oldCpu = batch->cpu;  
 int oldRam = batch->ram; 
 if (job->cpus) 
     batch->cpu = (job->cpus + 0.5) / cpuUnit;  /* rounding */
 else
+    {
+    /* if no cpus specified, use the default */
     batch->cpu = defaultJobCpu;
+    job->cpus = defaultJobCpu * cpuUnit;
+    }
 if (job->ram) 
-    batch->ram = (job->ram + (0.5*ramUnit)) / ramUnit;   /* rounding */
+    batch->ram = 1 + (job->ram - 1) / ramUnit;   /* any remainder will be rounded upwards
+        e.g.  1 to 1024m --> 1G but 1025m --> 2G if unit is 1G.   0m would just cause default ram usage. */
 else
+    {
+    /* if no ram size specified, use the default */
     batch->ram = defaultJobRam;
+    job->ram = defaultJobRam * ramUnit;
+    }
 
 if (oldCpu != batch->cpu || oldRam != batch->ram)
     {
     needsPlanning = TRUE; 
     }
 
 if (batch->planCount == 0)
     {
     needsPlanning = TRUE; 
     }
 user = batch->user;
 dlRemove(user->node);
 dlAddTail(queuedUsers, user->node);
 job->submitTime = time(NULL);
 return job->id;