386b5d76ae4c1625e3da1421d4f2a73628ef6048 kent Wed Feb 21 14:06:22 2024 -0800 Making cdwSubmit tag names against a schema file in the cdwSettings rather than hard-coded (and currently ifdefed out) tables. diff --git src/hg/cirm/cdw/lib/cdwValid.c src/hg/cirm/cdw/lib/cdwValid.c index ecd50f5..0eb54a5 100644 --- src/hg/cirm/cdw/lib/cdwValid.c +++ src/hg/cirm/cdw/lib/cdwValid.c @@ -1,651 +1,653 @@ /* Things to do with CIRM validation. */ /* Copyright (C) 2013 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "hash.h" #include "hex.h" #include "linefile.h" #include "cdwValid.h" char *cdwCalcValidationKey(char *md5Hex, long long fileSize) /* calculate validation key to discourage faking of validation. Do freeMem on *result when done. */ { if (strlen(md5Hex) != 32) errAbort("Invalid md5Hex string: %s\n", md5Hex); long long sum = 0; sum += fileSize; while (*md5Hex) { unsigned char n = hexToByte(md5Hex); sum += n; md5Hex += 2; } int vNum = sum % 10000; char buf[256]; safef(buf, sizeof buf, "V%d", vNum); return cloneString(buf); } static char *fileNameOnly(char *fullName) /* Return just the fileName part of the path */ { char *fileName = strrchr(fullName, '/'); if (!fileName) fileName = fullName; return fileName; } static void requireStartEndLines(char *fileName, char *startLine, char *endLine) /* Make sure first real line in file is startLine, and last is endLine. Tolerate * empty lines and white space. */ { char *reportFileName = fileNameOnly(fileName); struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; /* Get first real line and make sure it is not empty and matches start line. */ if (!lineFileNextReal(lf, &line)) errAbort("%s is empty", reportFileName); line = trimSpaces(line); if (!sameString(line, startLine)) errAbort("%s doesn't start with %s as expected", reportFileName, startLine); boolean lastSame = FALSE; for (;;) { if (!lineFileNextReal(lf, &line)) break; line = trimSpaces(line); lastSame = sameString(line, endLine); } if (!lastSame) errAbort("%s doesn't end with %s as expected", reportFileName, endLine); lineFileClose(&lf); } void cdwValidateRcc(char *path) /* Validate a nanostring rcc file. */ { requireStartEndLines(path, "
", ""); } static boolean fileStartsWithOneOfPair(char *fileName, char *one, char *two) /* Return TRUE if file starts with either one of two strings. */ { /* Figure out size of one and two strings. */ int oneLen = strlen(one); int twoLen = strlen(two); int maxLen = max(oneLen, twoLen); assert(maxLen > 0); int bufLen = maxLen+1; char buf[bufLen]; /* Open up file and try to read enough data */ FILE *f = fopen(fileName, "r"); if (f == NULL) return FALSE; int sizeRead = fread(buf, 1, maxLen, f); carefulClose(&f); /* Return TRUE if we match one or two, otherwise FALSE. */ if (oneLen >= sizeRead && memcmp(buf, one, oneLen) == 0) return TRUE; else if (twoLen >= sizeRead && memcmp(buf, two, twoLen) == 0) return TRUE; return FALSE; } static boolean fileStartsWith(char *path, char *string) /* Make sure file starts with string */ { return fileStartsWithOneOfPair(path, string, string); } void cdwValidateIdat(char *path) /* Validate illumina idat file. */ { if (!fileStartsWithOneOfPair(path, "IDAT", "DITA")) errAbort("%s is not a valid .idat file, it does not start with IDAT or DITA", fileNameOnly(path)); } void cdwValidatePdf(char *path) /* Make sure PDF really is PDF */ { if (!fileStartsWith(path, "%PDF")) errAbort("%s in not a valid .pdf file, it does not start with %%PDF", fileNameOnly(path)); } void cdwValidateCram(char *path) /* Validate cram file. */ { if (!fileStartsWith(path, "CRAM")) errAbort("%s is not a valid .cram file, it does not start with CRAM", fileNameOnly(path)); } void cdwValidateJpg(char *path) /* Check jpg file is really jpg */ { if (!fileStartsWithOneOfPair(path, "\xff\xd8\xff\xe0", "\xff\xd8\xff\xe1")) errAbort("%s is not a valid .jpeg file", fileNameOnly(path)); } void cdwValidatePng(char *path) /* Check png file is really png */ { // Signature from http://www.libpng.org/pub/png/spec/1.2/PNG-Structure.html static unsigned char pngSig[] = {137, 80, 78, 71, 13, 10, 26, 10, 0}; if (!fileStartsWith(path, (char*)pngSig)) errAbort("%s is not a valid .png file", fileNameOnly(path)); } void cdwValidateBamIndex(char *path) /* Check .bam.bai really is index. */ { if (!fileStartsWith(path, "BAI")) errAbort("%s is not a valid .bam.bai file", fileNameOnly(path)); } void cdwValidateTabixIndex(char *path) /* Check that a tabix index file (used for VCF files among other things) starts with right characters */ { if (!fileStartsWith(path, "TIDX")) errAbort("%s is not a valid TABIX index file", fileNameOnly(path)); } boolean cdwIsGzipped(char *path) /* Return TRUE if file at path starts with GZIP signature */ { FILE *f = mustOpen(path, "r"); int first = fgetc(f); int second = fgetc(f); carefulClose(&f); return first == 0x1F && second == 0x8B; } static char *edwSupportedEnrichedIn[] = {"unknown", "exon", "intron", "promoter", "coding", "utr", "utr3", "utr5", "open"}; static int edwSupportedEnrichedInCount = ArraySize(edwSupportedEnrichedIn); boolean cdwCheckEnrichedIn(char *enriched) /* return TRUE if value is allowed */ { return (stringArrayIx(enriched, edwSupportedEnrichedIn, edwSupportedEnrichedInCount) >= 0); } struct cdwBedType cdwBedTypeTable[] = { {"bedLogR", 9, 1}, {"bedRnaElements", 6, 3}, {"bedRrbs", 9, 2}, {"bedMethyl", 9, 2}, {"narrowPeak", 6, 4}, {"broadPeak", 6, 3}, }; int cdwBedTypeCount = ArraySize(cdwBedTypeTable); struct cdwBedType *cdwBedTypeMayFind(char *name) /* Return cdwBedType of given name, just return NULL if not found. */ { int i; for (i=0; i