src/hg/cgilib/joinMixer.c 4898794edd81be5285ea6e544acbedeaeb31bf78

4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/cgilib/joinMixer.c src/hg/cgilib/joinMixer.c
index 9b5316a..518d941 100644
--- src/hg/cgilib/joinMixer.c
+++ src/hg/cgilib/joinMixer.c
@@ -1,363 +1,363 @@
 /* joinMixer - implement multiple joins by sql joins and/or hashJoins, depending on which
  * should work best for each joined table. */
 
 /* Copyright (C) 2015 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "joinMixer.h"
 #include "hdb.h"
 
 static void addUniqueJoinerDt(struct joinerDtf **pTableList, struct hash *uniqHash,
                               struct joinerDtf *dt, char *db)
 /* Use a string key into the hash to build up a unique list of cloned joinerDtf's with
  * ignored (possibly NULL) fields (table-only, hence "dt" not "dtf"). */
 {
 char dbTable[PATH_LEN];
 joinerDtfToSqlTableString(dt, db, dbTable, sizeof(dbTable));
 if (! hashLookup(uniqHash, dbTable))
     {
     slAddHead(pTableList, joinerDtfClone(dt));
     hashAdd(uniqHash, dbTable, NULL);
     }
 }
 
 static struct joinerDtf *tablesInOutput(char *db, char *mainTable,
                                         struct joinerDtf *outputFieldList)
 /* Return a uniquified list of tables (NULL for field) that we need for selected output fields
  * and filters, if applicable.  Joining these tables may require additional intermediate tables. */
 {
 struct hash *uniqHash = hashNew(0);
 struct joinerDtf *tableList = NULL;
 // Always include the main table, even if its fields aren't in output.
 struct joinerDtf *mainDt = joinerDtfNew(db, mainTable, NULL);
 addUniqueJoinerDt(&tableList, uniqHash, mainDt, db);
 struct joinerDtf *dtf;
 for (dtf = outputFieldList;  dtf != NULL;  dtf = dtf->next)
     addUniqueJoinerDt(&tableList, uniqHash, dtf, db);
 hashFree(&uniqHash);
 slReverse(&tableList);
 return tableList;
 }
 
 static boolean noFancyKeywords(struct joinerField *jfA, struct joinerField *jfB)
 /* Return TRUE unless jfA or jfB has a keyword that is too fancy for sql joins. */
 {
 if (isNotEmpty(jfA->separator) && differentString(jfA->separator, ","))
     return FALSE;
 if (isNotEmpty(jfB->separator))
     return FALSE;
 if (jfA->chopBefore || jfA->chopAfter || jfB->chopBefore || jfB->chopAfter)
     return FALSE;
 return TRUE;
 }
 
 static boolean dependsOn(struct joinerPair *jpA, struct joinerPair *jpBList)
 /* Return TRUE if jpA->a's table is found among the ->b's in jpBList. */
 {
 struct joinerPair *jpB;
 for (jpB = jpBList;  jpB != NULL;  jpB = jpB->next)
     {
     if (joinerDtfSameTable(jpA->a, jpB->b))
         return TRUE;
     }
 return FALSE;
 }
 
 static void partitionJoins(char *db, struct joinerPair *routeList, uint mainTableRowCount,
                            struct joinerPair **retSqlRouteList,
                            struct joinerPair **retHashJoinRouteList)
 /* For each table pair in routeList, figure out whether a sql join is possible and
  * perhaps faster than brute force join.  If it looks like sql is faster, add to
  * retSqlRouteList, otherwise to retHashJoinRouteList. */
 {
 struct sqlConnection *conn = hAllocConn(db);
 boolean mysqlFasterForMainTable = mainTableRowCount < 10000;
 struct joinerPair *jp, *jpNext;
 for (jp = routeList;  jp != NULL;  jp = jpNext)
     {
     jpNext = jp->next;
     boolean useSql = FALSE;
     struct joinerField *jfA = joinerSetFindField(jp->identifier, jp->a);
     struct joinerField *jfB = joinerSetFindField(jp->identifier, jp->b);
     if (noFancyKeywords(jfA, jfB))
         {
         char dbTable[PATH_LEN];
         joinerDtfToSqlTableString(jp->b, db, dbTable, sizeof(dbTable));
         uint rowCountB = sqlRowCount(conn, dbTable);
         boolean relatedMuchBigger = mainTableRowCount + 10000000 < rowCountB;
         if (mysqlFasterForMainTable || relatedMuchBigger)
             useSql = TRUE;
         }
     if (useSql && !dependsOn(jp, *retHashJoinRouteList))
         slAddHead(retSqlRouteList, jp);
     else
         slAddHead(retHashJoinRouteList, jp);
     }
 slReverse(retSqlRouteList);
 slReverse(retHashJoinRouteList);
 hFreeConn(&conn);
 }
 
 static struct hash *tablesInRouteList(char *db, char *mainTable, struct joinerPair *routeList)
 /* Return a hash of names of tables in routeList to NULL values. */
 {
 struct hash *hash = hashNew(0);
 // Always add mainTable
 char dbTable[PATH_LEN];
 hashAdd(hash, mainTable, NULL);
 // Add all pairs' ->b's
 struct joinerPair *jp;
 for (jp = routeList;  jp != NULL;  jp = jp->next)
     {
     joinerDtfToSqlTableString(jp->b, db, dbTable, sizeof(dbTable));
     hashAdd(hash, dbTable, NULL);
     }
 return hash;
 }
 
 static void addField(struct hash *fieldToIx, struct joinerDtf *dtf, char *db, uint *pIx)
 /* If stringified dtf is not already in hash, add it with value=*pIx and increment *pIx. */
 {
 char dtField[PATH_LEN];
 joinerDtfToSqlFieldString(dtf, db, dtField, sizeof(dtField));
 if (!hashLookup(fieldToIx, dtField))
     {
     hashAddInt(fieldToIx, dtField, *pIx);
     *pIx = *pIx + 1;
     }
 }
 
 static uint mustFindIx(struct hash *fieldToIx, struct joinerDtf *dtf, char *db, char *errMsg)
 /* Look up nonnegative int value of stringified dtf; die if not found, otherwise return it. */
 {
 char dtField[PATH_LEN];
 joinerDtfToSqlFieldString(dtf, db, dtField, sizeof(dtField));
 int bigRowKeyIx = hashIntValDefault(fieldToIx, dtField, -1);
 if (bigRowKeyIx < 0)
     errAbort("%s '%s'", errMsg, dtField);
 return (uint)bigRowKeyIx;
 }
 
 struct hjTableInfo
 // Info accumulator for constructing a new hashJoin
 {
     struct hjTableInfo *next;
     char *dbTable;
     struct joinerPair *jp;
     struct joinerDtf *hashValCols;
 };
 
 static struct hjTableInfo *hjTableInfoNew(char *dbTable, struct joinerPair *jp,
                                           struct joinerDtf *hashValCol)
 // Return a new hjTableInfo for dbTable; if jp is non-NULL, jp->b is the hash key field.
 {
 struct hjTableInfo *self;
 AllocVar(self);
 self->dbTable = cloneString(dbTable);
 self->jp = jp;
 self->hashValCols = joinerDtfClone(hashValCol);
 return self;
 }
 
 static struct hjTableInfo *hjTableInfoFind(struct hjTableInfo *list, char *dbTable)
 // Just use slPair find because the first two fields are next and char *name.
 {
 return (struct hjTableInfo *)slPairFind((struct slPair *)list, dbTable);
 }
 
 static void hjTableInfoAddKey(struct hjTableInfo **pList, struct joinerPair *jp, char *db)
 // jp->b is the hash key field for its table; look up jp->b's table in pList, add if necessary,
 // setting the table's jp.
 {
 char dbTable[PATH_LEN];
 joinerDtfToSqlTableString(jp->b, db, dbTable, sizeof(dbTable));
 struct hjTableInfo *infoForTable = hjTableInfoFind(*pList, dbTable);
 if (infoForTable == NULL)
     slAddHead(pList, hjTableInfoNew(dbTable, jp, NULL));
 else if (infoForTable->jp != NULL)
     errAbort("Multiple keys for %s: %s and %s", dbTable, infoForTable->jp->b->field, jp->b->field);
 else
     infoForTable->jp = jp;
 }
 
 static void hjTableInfoAddVal(struct hjTableInfo **pList, struct joinerDtf *dtf, char *db)
 // Find dtf in pList -- it should be there already if hjRouteList is traversed in order --
 // and add dtf to its hash value column list if not already there.
 {
 char dbTable[PATH_LEN];
 joinerDtfToSqlTableString(dtf, db, dbTable, sizeof(dbTable));
 struct hjTableInfo *infoForTable = hjTableInfoFind(*pList, dbTable);
 if (infoForTable == NULL)
     {
     errAbort("hjTableInfoAddVal: can't find table '%s' in list", dbTable);
     }
 else if (! joinerDtfFind(infoForTable->hashValCols, dtf))
     slAddTail(&infoForTable->hashValCols, joinerDtfClone(dtf));
 }
 
 static void hjTableInfoFree(struct hjTableInfo **pHjti)
 // Free up *pHjti & contents if not NULL.
 {
 if (! pHjti || ! *pHjti)
     return;
 struct hjTableInfo *hjti = *pHjti;
 freeMem(hjti->dbTable);
 joinerDtfFreeList(&hjti->hashValCols);
 freez(pHjti);
 }
 
 static void hjTableInfoFreeList(struct hjTableInfo **pHjtiList)
 // Free up every member of *pHjtiList if not NULL.
 {
 if (! pHjtiList || ! *pHjtiList)
     return;
 struct hjTableInfo *hjti = *pHjtiList;
 while (hjti != NULL)
     {
     struct hjTableInfo *hjtiNext = hjti->next;
     hjTableInfoFree(&hjti);
     hjti = hjtiNext;
     }
 *pHjtiList = NULL;
 }
 
 struct joinMixer *joinMixerNew(struct joiner *joiner, char *db, char *mainTable,
                                struct joinerDtf *outputFieldList,
                                uint mainTableRowCount, boolean naForMissing)
 /* If outputFieldList contains fields from more than one table, use joiner to figure
  * out the route of table joins to relate all fields; for each table, predict whether
  * it would be more efficient to join by sql or by hashJoin, taking into account
  * anticipated row counts.  Return info sufficient for building a sql query, a list
  * of hashJoins, bigRow size, and indexes in bigRow for each output.
  * If naForMissing is TRUE then the hashJoiner result columns will contain "n/a" when
  * there is no match in the hash. */
 {
 struct joinMixer *jmOut;
 AllocVar(jmOut);
 // Figure out what tables we need to query, and whether each one should be joined
 // by sql or hash:
 struct joinerDtf *tablesNeeded = tablesInOutput(db, mainTable, outputFieldList);
 struct joinerPair *routeList = joinerFindRouteThroughAll(joiner, tablesNeeded);
 struct joinerPair *hjRouteList = NULL;
 partitionJoins(db, routeList, mainTableRowCount, &jmOut->sqlRouteList, &hjRouteList);
 // routeList was clobbered, make sure we don't try to use it again:
 routeList = NULL;
 char dbTable[PATH_LEN];
 // Split output fields into those that come from sql and those that come from hashJoins:
 // Assign indices in external row to each output from mysql.
 struct hash *fieldToIx = hashNew(0);
 uint bigRowIx = 0;
 struct hash *sqlTables = tablesInRouteList(db, mainTable, jmOut->sqlRouteList);
 struct joinerDtf *hjFieldList = NULL;
 struct joinerDtf *dtf;
 for (dtf = outputFieldList;  dtf != NULL;  dtf = dtf->next)
     {
     joinerDtfToSqlTableString(dtf, db, dbTable, sizeof(dbTable));
     if (hashLookup(sqlTables, dbTable) || sameString(dbTable, mainTable))
         {
         slAddHead(&jmOut->sqlFieldList, joinerDtfClone(dtf));
         addField(fieldToIx, dtf, db, &bigRowIx);
         }
     else
         {
         slAddHead(&hjFieldList, joinerDtfClone(dtf));
         // Don't add to bigRow/fieldToIx because we might need to tack on some sql fields
         // for hashJoin keys.
         }
     }
 slReverse(&jmOut->sqlFieldList);
 slReverse(&hjFieldList);
 
 // Add hashJoin jp->a keys that come from sqlTables to sqlFieldList and bigRow if not already
 // in sqlFieldList
 struct joinerPair *jp;
 for (jp = hjRouteList;  jp != NULL;  jp = jp->next)
     {
     joinerDtfToSqlTableString(jp->a, db, dbTable, sizeof(dbTable));
     if (hashLookup(sqlTables, dbTable) && !joinerDtfFind(jmOut->sqlFieldList, jp->a))
         {
         slAddTail(&jmOut->sqlFieldList, joinerDtfClone(jp->a));
         addField(fieldToIx, jp->a, db, &bigRowIx);
         }
     }
 
 // Now that sqlFieldList is complete, add hashJoin output fields to bigRow.
 for (dtf = hjFieldList;  dtf != NULL;  dtf = dtf->next)
     addField(fieldToIx, dtf, db, &bigRowIx);
 
 // Add hashJoin key info (jp->b) to hjTableCols.  If any hashJoin key fields take values
 // produced by other hashJoins (jp->a), but those columns don't appear in output, add them
 // to bigRow.
 struct hjTableInfo *hjTableCols = NULL;
 for (jp = hjRouteList;  jp != NULL;  jp = jp->next)
     {
     hjTableInfoAddKey(&hjTableCols, jp, db);
     joinerDtfToSqlTableString(jp->a, db, dbTable, sizeof(dbTable));
     if (! hashLookup(sqlTables, dbTable))
         hjTableInfoAddVal(&hjTableCols, jp->a, db);
     addField(fieldToIx, jp->a, db, &bigRowIx);
     }
 
 // Done assigning bigRow indices; set bigRowSize.
 jmOut->bigRowSize = bigRowIx;
 
 // Add hash output fields to hjTableCols
 for (dtf = hjFieldList;  dtf != NULL;  dtf = dtf->next)
     hjTableInfoAddVal(&hjTableCols, dtf, db);
 
 // Build up jmOut->hashJoins
 slReverse(&hjTableCols);
 struct hjTableInfo *hjti;
 int i;
 for (i = 0, hjti = hjTableCols;  hjti != NULL;  hjti = hjti->next, i++)
     {
     if (hjti->jp == NULL)
         errAbort("hjTableInfo for %s has NULL jp", hjti->dbTable);
     if (hjti->hashValCols == NULL)
         errAbort("hjTableInfo for %s has NULL hashValCols", hjti->dbTable);
     uint bigRowKeyIx = mustFindIx(fieldToIx, hjti->jp->a, db,
                                   "joinMixerNew: Can't find index of hashJoin index");
     struct joinerField *jfA = joinerSetFindField(hjti->jp->identifier, hjti->jp->a);
     struct joinerField *jfB = joinerSetFindField(hjti->jp->identifier, hjti->jp->b);
     uint valCount = slCount(hjti->hashValCols);
     uint bigRowColIxs[valCount];
     struct joinerDtf *col;
     int colIx;
     for (colIx = 0, col = hjti->hashValCols;  col != NULL;  col = col->next, colIx++)
         bigRowColIxs[colIx] = mustFindIx(fieldToIx, col, db,
                                          "joinMixerNew: Missing bigRow ix for");
     slAddHead(&jmOut->hashJoins,
               hashJoinNew(hjti->jp->b, bigRowKeyIx,
                           hjti->hashValCols, bigRowColIxs,
                           jfA, jfB, naForMissing));
     }
 slReverse(&jmOut->hashJoins);
 
 // Fill in each output field's index into bigRow
 AllocArray(jmOut->outIxs, slCount(outputFieldList));
 uint outRowIx;
 for (outRowIx = 0, dtf = outputFieldList;  dtf != NULL;  dtf = dtf->next, outRowIx++)
     jmOut->outIxs[outRowIx] = mustFindIx(fieldToIx, dtf, db,
                                         "joinMixerNew: no bigRowIx for output field");
 
 joinerPairFreeList(&hjRouteList);
 joinerDtfFreeList(&hjFieldList);
 hjTableInfoFreeList(&hjTableCols);
 hashFree(&fieldToIx);
 hashFree(&sqlTables);
 joinerDtfFreeList(&tablesNeeded);
 return jmOut;
 }
 
 void joinMixerFree(struct joinMixer **pJm)
 /* Free joinMixer's holdings unless already NULL. */
 {
 if (!pJm || !*pJm)
     return;
 struct joinMixer *jm = *pJm;
 joinerPairFreeList(&jm->sqlRouteList);
 joinerDtfFreeList(&jm->sqlFieldList);
 hashJoinFreeList(&jm->hashJoins);
 freeMem(jm->outIxs);
 freez(pJm);
 }