4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/hg/utils/trashLoad/trashLoad.c src/hg/utils/trashLoad/trashLoad.c index 952a54b..b259576 100644 --- src/hg/utils/trashLoad/trashLoad.c +++ src/hg/utils/trashLoad/trashLoad.c @@ -1,367 +1,367 @@ /* trashLoad - generate trash file activity load test. */ /* Copyright (C) 2013 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "sqlNum.h" #include "options.h" #include "trashDir.h" #include "jksql.h" #include "hdb.h" #include "obscure.h" static char *testDir = "loadTest"; /* directory to write files into */ static char *mysql = ""; /* database to load tables */ static boolean mysqlTesting = FALSE; /* TRUE when mysql database given */ static char *bedFile = ""; /* path to a bed file for mysql testing */ static boolean filesToo = FALSE; /* also write files during mysql testing */ static boolean onServer = FALSE; /* for mysql speedup */ static boolean MyISAM = FALSE; /* type of ENGINE for MySQL table creation */ static char devShmDir[PATH_LEN]; /* temporary directory for bed files */ static char **bedFileNames; /* array of file names in devShmDir/ */ static long long tableRowsLoaded = 0; /* total number of table rows loaded */ void usage() /* Explain usage and exit. */ { errAbort( "trashLoad - generate trash file activity load test\n" "usage:\n" " trashLoad [options] \n" " constructs a directory ./%s to write files into\n" "options:\n" " numFiles - generate this number of files, positive integer\n" " averageSize - files of this average size, positive integer\n" " -testDir= - specify a different directory than ./'%s'\n" " note, this will always be relative: ./\n" " it can not be an explicit path.\n" " -mysql= - specify a database to test mysql table loading\n" " - turns off file write testing, also needs:\n" " -bedFile - a bed file to use for mysql load testing, the more lines\n" " the better.\n" " -filesToo - also write files during mysql testing\n" " -onServer This will speed things up when running with a directory that\n" " the mysql server can access. It will be /dev/shm/bedFiles/\n" " set mysqld my.cnf: secure-file-priv = /dev/shm/bedFilesn\n" " -MyISAM Use MySQL ENGINE MyISAM instead of the default InnoDB", testDir, testDir ); } /* Command line validation table. */ static struct optionSpec options[] = { {"testDir", OPTION_STRING}, {"mysql", OPTION_STRING}, {"bedFile", OPTION_STRING}, {"filesToo", OPTION_BOOLEAN}, {"onServer", OPTION_BOOLEAN}, {"MyISAM", OPTION_BOOLEAN}, {NULL, 0}, }; static void seedRand() /* seed the rand function with bytes from /dev/random */ { unsigned seed = 0; FILE *fd = mustOpen("/dev/random", "r"); fread((unsigned char *)&seed, sizeof(unsigned), 1, fd); carefulClose(&fd); verbose(2, "#seed: 0x%0x = %d\n", seed, seed); srand(seed); } static void createTable(struct sqlConnection *conn, char *tableName) /* use SQL definition to construct a bed 12 track and create the track */ { int maxChromName = 32; char *tableFormat = "CREATE TABLE %s (\n" " bin int unsigned not null, # Bin for range index\n" " chrom varchar(255) not null, # Reference sequence chromosome or scaffold\n" " chromStart int unsigned not null, # Start position in chromosome\n" " chromEnd int unsigned not null, # End position in chromosome\n" " name varchar(255) not null, # Name of item - up to 16 chars\n" " score int not null, # 0-1000. Higher numbers are darker.\n" " strand char(1) not null, # + or - for strand\n" " thickStart int unsigned not null, # Start of thick part\n" " thickEnd int unsigned not null, # End position of thick part\n" " reserved int unsigned not null, # RGB 8 bits each as in bed\n" " blockCount int unsigned not null,\n" " blockSizes longblob not null,\n" " chromStarts longblob not null,\n" " #Indices\n" " INDEX(chrom(%d),bin)\n" ")%s"; struct dyString *createSql = dyStringNew(0); if (MyISAM) sqlDyStringPrintf(createSql, tableFormat, tableName, maxChromName, " ENGINE=MyISAM DEFAULT CHARSET=latin1"); else sqlDyStringPrintf(createSql, tableFormat, tableName, maxChromName, ""); sqlUpdate(conn, createSql->string); dyStringFree(&createSql); } /* static void createTable(struct sqlConnection *conn, char *tableName) */ static void loadFileToTable(struct sqlConnection *conn, char *tableName, char *tabFile) { int loadOptions = (optionExists("onServer") ? SQL_TAB_FILE_ON_SERVER : 0); verbose(3, "# loading '%s' into '%s.%s'\n", tabFile, mysql, tableName); sqlLoadTabFile(conn, tabFile, tableName, loadOptions|SQL_TAB_FILE_WARN_ON_WARN); } /* generate normal distrubion, from information found at: * http://stackoverflow.com/questions/2325472/generate-random-numbers-following-a-normal-distribution-in-c-c * this formula seemed to generate a bi-normal distribution with peaks * at -0.5 and 0.5, so, flipping all the negative values turned it into * a single normal distribution with a mean near 0.5 * the tail toward zero is a bit truncated since it doesn't go below zero */ #define RANDU ((double) rand()/RAND_MAX) #define RANDN2(mu, sigma) (mu + (rand()%2 ? -1.0 : 1.0)*sigma*pow(-log(0.99999*RANDU), 0.5)) #define RANDN RANDN2(0, 1.0) static size_t normalDistribution(long long mean, long long min, long long max) /* return a random number from a normal distribution at the specified mean * within the min,max limits. The limits distort it a bit, true. */ { static int depth = 0; double randn = RANDN2(0, 1.0); if (randn < 0.0) { randn = -randn; } double meanValue = 2.0 * randn * (double)mean; size_t returnValue = (size_t)(round(meanValue + 0.5)); ++depth; if (returnValue < min) returnValue = min; if (returnValue > max) { if (depth > 40) /* do not recurse indefinately */ { returnValue = max; depth = 1; } else return (normalDistribution(mean, min, max)); /* recursive ! */ } --depth; return (returnValue); } static void readLines(char *fileName, int *retCount, char ***retLines) /* as found in gensub2.c, read all lines of file into an array */ { struct slName *el, *list = readAllLines(fileName); int i=0, count = slCount(list); char **lines; AllocArray(lines, count); for (el = list; el != NULL; el = el->next) lines[i++] = trimSpaces(el->name); *retCount = count; *retLines = lines; } static void cleanUp() /* removing temporary bed files */ { struct fileInfo *file, *fileList = listDirX(devShmDir, "*", FALSE); int filesRemoved = 0; for (file = fileList; file != NULL; file = file->next) { char pathName[PATH_LEN]; safef(pathName, sizeof(pathName), "%s/%s", devShmDir, file->name); unlink(pathName); ++filesRemoved; } if (! onServer) rmdir(devShmDir); verbose(3, "# removed %d files from '%s'\n", filesRemoved, devShmDir); } static void writeBedFile(char *fileName, int lines, char **bedArray, int sizeOfArray) /* write number of 'lines' to devShmDir/fileName from bedArray which * has a size of sizeOfArray */ { char pathName[PATH_LEN]; safef(pathName, sizeof(pathName), "%s/%s", devShmDir, fileName); verbose(3, "# writing %d lines to '%s'\n", lines, pathName); tableRowsLoaded += lines; FILE *fh = mustOpen(pathName, "w"); int i = 0; for ( i = 0; i < lines; ++i) { size_t index = normalDistribution(sizeOfArray/2, 0, sizeOfArray-1); fprintf(fh, "%s\n", bedArray[index]); } carefulClose(&fh); } static void prepareBedPartitions(char *bedFile, long long numFiles, long long averageSize) /* Read all lines of file into an array. * partition into various sizes of bed files into a /dev/shm/bedFiles.pid/ * or when onServer, simply /dev/shm/bedFiles/ * directory. */ { int lineCount; char **bedArray; readLines(bedFile, &lineCount, &bedArray); verbose(1, "# bedFile '%s' has %d lines\n", bedFile, lineCount); pid_t pid = getpid(); if (onServer) safef(devShmDir, sizeof(devShmDir), "/dev/shm/bedFiles"); else safef(devShmDir, sizeof(devShmDir), "/dev/shm/bedFiles.%d", (int) pid); makeDirs(devShmDir); bedFileNames = (char **)needMem(sizeof(char *) * numFiles); char maxFileName[PATH_LEN]; safef(maxFileName, sizeof(maxFileName), "%d", lineCount); int nameLength = strlen(maxFileName); char nameFormat[PATH_LEN]; safef(nameFormat, sizeof(nameFormat), "%%0%dd", nameLength); verbose(1, "# constructing %lld bed files, name length: %d, name format: '%s'\n", numFiles, nameLength, nameFormat); int i; for ( i = 0; i < numFiles; ++i) { char fileName[PATH_LEN]; size_t linesToWrite = normalDistribution(averageSize, 1, lineCount); safef(fileName, sizeof(fileName), nameFormat, (int) linesToWrite); writeBedFile(fileName, (int)linesToWrite, bedArray, lineCount); char pathName[PATH_LEN]; safef(pathName, sizeof(pathName), "%s/%s", devShmDir, fileName); bedFileNames[i] = cloneString(pathName); } } static void trashLoad(long long numFiles, long long averageSize) /* trashLoad - generate trash file activity load test. */ { int i; long long bytesWritten = 0; long long maxSize = averageSize * 5; verbose(1, "# trash load test begin, numFiles: %lld, averageSize: %lld, maximum size: %lld\n", numFiles, averageSize, maxSize); struct tempName tn; /* testing normalDistribution, print out a million numbers, send into * textHistorgram to view */ // int i; // for ( i = 0; i < 1000000; ++i) // printf("%lld\n", (long long)normalDistribution(averageSize, 1, maxSize)); struct sqlConnection *conn = NULL; if (mysqlTesting) conn = hAllocConn(mysql); /* generate a single large buffer to write from, and initialize with values */ char *buf = needMem(maxSize+1); int j; for ( j = 0; j < maxSize+1; ++j ) buf[j] = (char)(0xff & j); long beginLoadTest = clock1000(); for ( i = 0; i < numFiles; ++i) { char nameBuffer[1024]; safef(nameBuffer, sizeof(nameBuffer), "lt_%d", i); trashDirFile(&tn, testDir, nameBuffer, ".txt"); char *fileName = tn.forCgi; if (conn) { char prefix[16]; safef(prefix, sizeof(prefix), "t%d", i); char *dbTableName = sqlTempTableName(conn, prefix); createTable(conn, dbTableName); loadFileToTable(conn, dbTableName, bedFileNames[i]); } if (!conn || filesToo) { FILE *fh = mustOpen(fileName, "w"); size_t writeSize = normalDistribution(averageSize, 1, maxSize); bytesWritten += writeSize; size_t itemsWritten = fwrite(buf, writeSize, 1, fh); /* fread() and fwrite() return the number of items successfully read or written (i.e., not the number of characters). If an error occurs, or the end-of-file is reached, the return value is a short item count (or zero). */ if ( 1 != itemsWritten ) { errAbort("# ERROR write error, bytes requested: %ld != bytes written: %ld\n", (long)writeSize, (long)itemsWritten); } fclose(fh); } } long et = clock1000() - beginLoadTest; if (! mysqlTesting || filesToo) { double bytesPerSecond = (double)bytesWritten/(double)((double)et/1000.0); verbose(1, "# %lld total bytes written in %lld files\n", bytesWritten, numFiles); verbose(1, "# trash load test total run time: %ld millis, %.0f bytes per second\n", et, bytesPerSecond); verbose(1, "# %lld\t%lld\t%ld\t%.0f\n", numFiles, bytesWritten, et, bytesPerSecond); verbose(1, "# files\tbytes\tmillis\tbytes/sec\n"); } if (mysqlTesting) { double tablesPerSecond = (double)numFiles/(double)((double)et/1000.0); double rowsPerSecond = (double)tableRowsLoaded/(double)((double)et/1000.0); verbose(1, "# total table rows loaded into mysql: %lld in %lld tables, %0f rows per second\n", tableRowsLoaded, numFiles, rowsPerSecond); verbose(1, "# mysql load test total run time: %ld millis, %0f tables per second\n", et, tablesPerSecond); } hFreeConn(&conn); /* will close only if it was allocated */ } /* static void trashLoad(long long numFiles, long long averageSize) */ int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 3) usage(); filesToo = optionExists("filesToo"); onServer = optionExists("onServer"); MyISAM = optionExists("MyISAM"); testDir = optionVal("testDir", testDir); mysql = optionVal("mysql", mysql); long long numFiles = sqlLongLong(argv[1]); long long averageSize = sqlLongLong(argv[2]); if (numFiles < 1) errAbort("ERROR: numFiles must be a positive integer, given: %lld\n", numFiles); if (averageSize < 1) errAbort("ERROR: averageSize must be a positive integer, given: %lld\n", averageSize); seedRand(); if (strlen(mysql) > 0) { bedFile = optionVal("bedFile", bedFile); if (strlen(bedFile) < 1) errAbort("ERROR: must have a -bedFile=file.bed to do mysql testing"); verbose(1, "# testing mysql with bedFile: '%s'\n", bedFile); prepareBedPartitions(bedFile, numFiles, averageSize); mysqlTesting = TRUE; /* TRUE when mysql database given */ } verbose(1, "# testLoad options: filesToo: %s, onServer: %s, MyISAM: %s\n", filesToo ? "TRUE" : "FALSE", onServer ? "TRUE" : "FALSE", MyISAM ? "TRUE" : "FALSE"); verbose(1, "# testLoad options: mysql: '%s', testDir: '%s'\n", mysql, testDir); verbose(1, "# testLoad options: number of files/tables: %lld, average size (bytes/rows): %lld\n", numFiles, averageSize); /* argv[1] is fileCount, argv[2] is average size */ trashLoad(numFiles, averageSize); if (mysqlTesting) cleanUp(); return 0; }