4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/hg/bioImage/bioImageLoad/bioImageLoad.c src/hg/bioImage/bioImageLoad/bioImageLoad.c index c70615a..c2418a8 100644 --- src/hg/bioImage/bioImageLoad/bioImageLoad.c +++ src/hg/bioImage/bioImageLoad/bioImageLoad.c @@ -1,353 +1,353 @@ /* bioImageLoad - Load data into bioImage database. */ /* Copyright (C) 2013 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "obscure.h" #include "ra.h" #include "jksql.h" #include "dystring.h" /* Variables you can override from command line. */ char *database = "bioImage"; boolean replace = FALSE; void usage() /* Explain usage and exit. */ { errAbort( "bioImageLoad - Load data into bioImage database\n" "usage:\n" " bioImageLoad setInfo.ra itemInfo.tab\n" "Please see bioImageLoad.doc for description of the .ra and .tab files\n" "Options:\n" " -database=%s - Specifically set database\n" " -replace - Replace image rather than complaining if it exists\n" , database ); } static struct optionSpec options[] = { {"database", OPTION_STRING,}, {"replace", OPTION_BOOLEAN,}, {NULL, 0}, }; struct hash *hashRowOffsets(char *line) /* Given a space-delimited line, create a hash keyed by the words in * line with values the position of the word (0 based) in line */ { struct hash *hash = hashNew(0); char *word; int wordIx = 0; while ((word = nextWord(&line)) != 0) { hashAdd(hash, word, intToPt(wordIx)); wordIx += 1; } return hash; } char *getVal(char *fieldName, struct hash *raHash, struct hash *rowHash, char **row, char *defaultVal) /* Return value in row if possible, else in ra, else in default. If no value and no default * return an error. */ { char *val = NULL; struct hashEl *hel = hashLookup(rowHash, fieldName); if (hel != NULL) { int rowIx = ptToInt(hel->val); val = row[rowIx]; } else { val = hashFindVal(raHash, fieldName); if (val == NULL) { if (defaultVal != NULL) val = defaultVal; else errAbort("Can't find value for field %s", fieldName); } } return val; } static char *requiredItemFields[] = {"fileName", "submitId"}; static char *requiredSetFields[] = {"contributor"}; static char *requiredFields[] = {"fullDir", "screenDir", "thumbDir", "taxon", "isEmbryo", "age", "bodyPart", "sliceType", "imageType", }; //static char *optionalFields[] = {"sectionSet", "sectionIx", "gene", "locusLink", "refSeq", "genbank", }; char *hashValOrDefault(struct hash *hash, char *key, char *defaultVal) /* Lookup key in hash and return value, or return default if it doesn't exist. */ { char *val = hashFindVal(hash, key); if (val == NULL) val = defaultVal; return val; } int findExactSubmissionId(struct sqlConnection *conn, char *contributors, char *publication, char *pubUrl, char *setUrl, char *itemUrl) /* Find ID of submissionSet that matches all parameters. Return 0 if none found. */ { char query[1024]; sqlSafef(query, sizeof(query), "select id from submissionSet " "where contributors = \"%s\" " "and publication = \"%s\" " "and pubUrl = '%s' and setUrl = '%s' and itemUrl = '%s'" , contributors, publication, pubUrl, setUrl, itemUrl); return sqlQuickNum(conn, query); } int findOrAddIdTable(struct sqlConnection *conn, char *table, char *field, char *value) /* Get ID associated with field.value in table. */ { char query[256]; int id; sqlSafef(query, sizeof(query), "select id from %s where %s = \"%s\"", table, field, value); id = sqlQuickNum(conn, query); if (id == 0) { sqlSafef(query, sizeof(query), "insert into %s values(default, \"%s\")", table, value); sqlUpdate(conn, query); id = sqlLastAutoId(conn); } return id; } int createSubmissionId(struct sqlConnection *conn, char *contributors, char *publication, char *pubUrl, char *setUrl, char *itemUrl) /* Add submission and contributors to database and return submission ID */ { struct slName *slNameListFromString(char *s, char delimiter); struct slName *contribList = NULL, *contrib; int submissionSetId; char query[1024]; sqlSafef(query, sizeof(query), "insert into submissionSet " "values(default, \"%s\", \"%s\", '%s', '%s', '%s')", contributors, publication, pubUrl, setUrl, itemUrl); sqlUpdate(conn, query); submissionSetId = sqlLastAutoId(conn); contribList = slNameListFromComma(contributors); for (contrib = contribList; contrib != NULL; contrib = contrib->next) { int contribId = findOrAddIdTable(conn, "contributor", "name", skipLeadingSpaces(contrib->name)); sqlSafef(query, sizeof(query), "insert into submissionContributor values(%d, %d)", submissionSetId, contribId); sqlUpdate(conn, query); } slFreeList(&contribList); return submissionSetId; } int saveSubmissionSet(struct sqlConnection *conn, struct hash *raHash) /* Create submissionSet, submissionContributor, and contributor records. */ { char *contributor = hashMustFindVal(raHash, "contributor"); char *publication = hashValOrDefault(raHash, "publication", ""); char *pubUrl = hashValOrDefault(raHash, "pubUrl", ""); char *setUrl = hashValOrDefault(raHash, "setUrl", ""); char *itemUrl = hashValOrDefault(raHash, "itemUrl", ""); int submissionId = findExactSubmissionId(conn, contributor, publication, pubUrl, setUrl, itemUrl); if (submissionId != 0) return submissionId; else return createSubmissionId(conn, contributor, publication, pubUrl, setUrl, itemUrl); } int cachedId(struct sqlConnection *conn, char *tableName, char *fieldName, struct hash *cache, char *raFieldName, struct hash *raHash, struct hash *rowHash, char **row) /* Get value for named field, and see if it exists in table. If so * return associated id, otherwise create new table entry and return * that id. */ { char *value = getVal(raFieldName, raHash, rowHash, row, ""); if (value[0] == 0) return 0; return findOrAddIdTable(conn, tableName, fieldName, value); } void bioImageLoad(char *setRaFile, char *itemTabFile) /* bioImageLoad - Load data into bioImage database. */ { struct hash *raHash = raReadSingle(setRaFile); struct hash *rowHash; struct lineFile *lf = lineFileOpen(itemTabFile, TRUE); char *line, *words[256]; struct sqlConnection *conn = sqlConnect(database); int rowSize; int submissionSetId; struct hash *fullDirHash = newHash(0); struct hash *screenDirHash = newHash(0); struct hash *thumbDirHash = newHash(0); struct hash *treatmentHash = newHash(0); struct hash *bodyPartHash = newHash(0); struct hash *sliceTypeHash = newHash(0); struct hash *imageTypeHash = newHash(0); struct hash *sectionSetHash = newHash(0); struct dyString *dy = dyStringNew(0); /* Read first line of tab file, and from it get all the field names. */ if (!lineFileNext(lf, &line, NULL)) errAbort("%s appears to be empty", lf->fileName); if (line[0] != '#') errAbort("First line of %s needs to start with #, and then contain field names", lf->fileName); rowHash = hashRowOffsets(line+1); rowSize = rowHash->elCount; if (rowSize >= ArraySize(words)) errAbort("Too many fields in %s", lf->fileName); /* Check that have all required fields */ { char *fieldName; int i; for (i=0; i<ArraySize(requiredSetFields); ++i) { fieldName = requiredSetFields[i]; if (!hashLookup(raHash, fieldName)) errAbort("Field %s is not in %s", fieldName, setRaFile); } for (i=0; i<ArraySize(requiredItemFields); ++i) { fieldName = requiredItemFields[i]; if (!hashLookup(rowHash, fieldName)) errAbort("Field %s is not in %s", fieldName, itemTabFile); } for (i=0; i<ArraySize(requiredFields); ++i) { fieldName = requiredFields[i]; if (!hashLookup(rowHash, fieldName) && !hashLookup(raHash, fieldName)) errAbort("Field %s is not in %s or %s", fieldName, setRaFile, itemTabFile); } } /* Create/find submission record. */ submissionSetId = saveSubmissionSet(conn, raHash); /* Process rest of tab file. */ while (lineFileNextRowTab(lf, words, rowSize)) { int fullDir = cachedId(conn, "location", "name", fullDirHash, "fullDir", raHash, rowHash, words); int screenDir = cachedId(conn, "location", "name", screenDirHash, "screenDir", raHash, rowHash, words); int thumbDir = cachedId(conn, "location", "name", thumbDirHash, "thumbDir", raHash, rowHash, words); int bodyPart = cachedId(conn, "bodyPart", "name", bodyPartHash, "bodyPart", raHash, rowHash, words); int sliceType = cachedId(conn, "sliceType", "name", sliceTypeHash, "sliceType", raHash, rowHash, words); int imageType = cachedId(conn, "imageType", "name", imageTypeHash, "imageType", raHash, rowHash, words); int treatment = cachedId(conn, "treatment", "conditions", treatmentHash, "treatment", raHash, rowHash, words); char *fileName = getVal("fileName", raHash, rowHash, words, NULL); char *submitId = getVal("submitId", raHash, rowHash, words, NULL); char *taxon = getVal("taxon", raHash, rowHash, words, NULL); char *isEmbryo = getVal("isEmbryo", raHash, rowHash, words, NULL); char *age = getVal("age", raHash, rowHash, words, NULL); char *sectionSet = getVal("sectionSet", raHash, rowHash, words, ""); char *sectionIx = getVal("sectionIx", raHash, rowHash, words, "0"); char *gene = getVal("gene", raHash, rowHash, words, ""); char *locusLink = getVal("locusLink", raHash, rowHash, words, ""); char *refSeq = getVal("refSeq", raHash, rowHash, words, ""); char *genbank = getVal("genbank", raHash, rowHash, words, ""); char *priority = getVal("priority", raHash, rowHash, words, "200"); int sectionId = 0; int oldId; // char *xzy = getVal("xzy", raHash, rowHash, words, xzy); if (sectionSet[0] != 0 && !sameString(sectionSet, "0")) { struct hashEl *hel = hashLookup(sectionSetHash, sectionSet); if (hel != NULL) sectionId = ptToInt(hel->val); else { sqlUpdate(conn, NOSQLINJ "insert into sectionSet values(default)"); sectionId = sqlLastAutoId(conn); hashAdd(sectionSetHash, sectionSet, intToPt(sectionId)); } } dyStringClear(dy); sqlDyStringPrintf(dy, "select id from image "); sqlDyStringPrintf(dy, "where fileName = '%s' ", fileName); sqlDyStringPrintf(dy, "and fullLocation = %d", fullDir); oldId = sqlQuickNum(conn, dy->string); if (oldId != 0) { if (replace) { dyStringClear(dy); sqlDyStringPrintf(dy, "delete from image where id = %d", oldId); sqlUpdate(conn, dy->string); } else errAbort("%s is already in database line %d of %s", fileName, lf->lineIx, lf->fileName); } dyStringClear(dy); sqlDyStringPrintf(dy, "insert into image set\n"); sqlDyStringPrintf(dy, " id = default,\n"); sqlDyStringPrintf(dy, " fileName = '%s',\n", fileName); sqlDyStringPrintf(dy, " fullLocation = %d,\n", fullDir); sqlDyStringPrintf(dy, " screenLocation = %d,\n", screenDir); sqlDyStringPrintf(dy, " thumbLocation = %d,\n", thumbDir); sqlDyStringPrintf(dy, " submissionSet = %d,\n", submissionSetId); sqlDyStringPrintf(dy, " sectionSet = %d,\n", sectionId); sqlDyStringPrintf(dy, " sectionIx = '%s',\n", sectionIx); sqlDyStringPrintf(dy, " submitId = '%s',\n", submitId); sqlDyStringPrintf(dy, " gene = '%s',\n", gene); sqlDyStringPrintf(dy, " locusLink = '%s',\n", locusLink); sqlDyStringPrintf(dy, " refSeq = '%s',\n", refSeq); sqlDyStringPrintf(dy, " genbank = '%s',\n", genbank); sqlDyStringPrintf(dy, " priority = '%s',\n", priority); sqlDyStringPrintf(dy, " taxon = '%s',\n", taxon); sqlDyStringPrintf(dy, " isEmbryo = '%s',\n", isEmbryo); sqlDyStringPrintf(dy, " age = '%s',\n", age); sqlDyStringPrintf(dy, " bodyPart = %d,\n", bodyPart); sqlDyStringPrintf(dy, " sliceType = %d,\n", sliceType); sqlDyStringPrintf(dy, " imageType = %d,\n", imageType); sqlDyStringPrintf(dy, " treatment = %d\n", treatment); sqlUpdate(conn, dy->string); } } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 3) usage(); database = optionVal("database", database); replace = optionExists("replace"); bioImageLoad(argv[1], argv[2]); return 0; }