4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/hg/cgilib/variant.c src/hg/cgilib/variant.c index b6f9f71..0c81a0e 100644 --- src/hg/cgilib/variant.c +++ src/hg/cgilib/variant.c @@ -1,212 +1,212 @@ /* variant.c -- routines to convert other variant formats to a generic * variant structure */ /* Copyright (C) 2014 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "annoRow.h" #include "variant.h" struct allele *alleleClip(struct allele *allele, int sx, int ex, struct lm *lm) /* Return new allele pointing to new variant, both clipped to region defined by [sx,ex). */ { struct variant *oldVariant = allele->variant; int start = oldVariant->chromStart; int end = oldVariant->chromEnd; int delFront = 0; int delRear = 0; if (start < sx) { delFront = min(sx - start, allele->length); start = sx; } if (end > ex) { delRear = min(end - ex, allele->length - delFront); end = ex; } struct variant *newVariant; lmAllocVar(lm, newVariant); newVariant->chrom = lmCloneString(lm, oldVariant->chrom); newVariant->chromStart = start; newVariant->chromEnd = end; newVariant->numAlleles = 1; struct allele *newAllele; lmAllocVar(lm, newAllele); newVariant->alleles = newAllele; newAllele->variant = newVariant; newAllele->length = allele->length - delRear - delFront; assert(newAllele->length >= 0); newAllele->sequence = lmCloneString(lm, &allele->sequence[delFront]); newAllele->sequence[newAllele->length] = 0; // cut off delRear part return newAllele; } static boolean isDash(char *string) /* Return TRUE if the only char in string is '-' * (possibly repeated like the darn pgVenter alleles). */ { char *p; for (p = string; p != NULL && *p != '\0'; p++) if (*p != '-') return FALSE; return TRUE; } struct variant *variantNew(char *chrom, unsigned start, unsigned end, unsigned numAlleles, char *slashSepAlleles, char *refAllele, struct lm *lm) /* Create a variant from basic information that is easy to extract from most other variant * formats: coords, allele count, string of slash-separated alleles and reference allele. */ { struct variant *variant; // We have a new variant! lmAllocVar(lm, variant); variant->chrom = lmCloneString(lm, chrom); variant->chromStart = start; variant->chromEnd = end; variant->numAlleles = numAlleles; // get the alleles. char *nextAlleleString = lmCloneString(lm, slashSepAlleles); int alleleNumber = 0; for( ; alleleNumber < numAlleles; alleleNumber++) { if (nextAlleleString == NULL) errAbort("number of alleles in /-separated string doesn't match numAlleles"); char *thisAlleleString = nextAlleleString; // advance pointer to next variant string // probably there's some kent routine to do this behind the curtain nextAlleleString = strchr(thisAlleleString, '/'); if (nextAlleleString) // null out '/' and move to next char { *nextAlleleString = 0; nextAlleleString++; } boolean isRefAllele = (sameWord(thisAlleleString, refAllele) || (isEmpty(refAllele) && sameString(thisAlleleString, "-")) || sameString(thisAlleleString, "<X>") || // samtools mpileup no variation sameString(thisAlleleString, "<*>")); // gVCF no variation int alleleStringLength = strlen(thisAlleleString); if (isDash(thisAlleleString)) { alleleStringLength = 0; thisAlleleString[0] = '\0'; } // we have a new allele! struct allele *allele; lmAllocVar(lm, allele); slAddHead(&variant->alleles, allele); allele->variant = variant; allele->length = alleleStringLength; allele->sequence = lmCloneString(lm, thisAlleleString); allele->isReference = isRefAllele; } slReverse(&variant->alleles); return variant; } struct variant *variantFromPgSnpAnnoRow(struct annoRow *row, char *refAllele, boolean hasBin, struct lm *lm) /* Translate pgSnp annoRow into variant (allocated by lm). */ { struct pgSnp pgSnp; char **words = row->data; char *wordsWithFakeBin[PGSNP_NUM_COLS]; if (! hasBin) { // pgSnp file input doesn't have a bin column, but the pgSnp code expects one -- // so make a fake bin column to ignore. wordsWithFakeBin[0] = "1"; int i; for (i = 1; i < PGSNP_NUM_COLS; i++) wordsWithFakeBin[i] = words[i-1]; words = wordsWithFakeBin; } pgSnpStaticLoad(words, &pgSnp); return variantNew(pgSnp.chrom, pgSnp.chromStart, pgSnp.chromEnd, pgSnp.alleleCount, pgSnp.name, refAllele, lm); } struct variant *variantFromVcfAnnoRow(struct annoRow *row, char *refAllele, struct lm *lm, struct dyString *dyScratch) /* Translate vcf array of words into variant (allocated by lm, overwriting dyScratch * as temporary scratch string). */ { char **words = row->data; char *alStr = vcfGetSlashSepAllelesFromWords(words, dyScratch); // The reference allele is the first allele in alStr -- and it may be trimmed on both ends with // respect to the raw VCF ref allele in words[3], so copy vcfRefAllele back out of alStr. // That ensures that variantNew will get the reference allele that matches the slash-separated // allele string. int refLen = strlen(alStr); char *p = strchr(alStr, '/'); if (p) refLen = p - alStr; char vcfRefAllele[refLen + 1]; safencpy(vcfRefAllele, sizeof(vcfRefAllele), alStr, refLen); unsigned alCount = countChars(alStr, '/') + 1; return variantNew(row->chrom, row->start, row->end, alCount, alStr, vcfRefAllele, lm); } static char *findRefAllele(struct variant *variant) /* Find the reference allele (preferably not symbolic); return NULL if variant doesn't have one. * Don't free result. */ { char *refAllele = NULL; struct allele *allele; for (allele = variant->alleles; allele != NULL; allele = allele->next) { if (allele->isReference) { refAllele = allele->sequence; if (isAllNt(refAllele, strlen(refAllele))) break; } } return refAllele; } struct variant *splitAndTrimVariants(struct variant *variantIn, struct lm *lm) /* Split variantIn into a list of single-allele variants with redundant ref/alt bases trimmed. */ { struct variant *variantList = NULL; struct variant *variant; for (variant = variantIn; variant != NULL; variant = variant->next) { char *refAllele = findRefAllele(variant); if (refAllele == NULL || !isAllNt(refAllele, strlen(refAllele))) slAddHead(&variantList, variant); else { struct allele *allele; for (allele = variant->alleles; allele != NULL; allele = allele->next) { if (! allele->isReference) { int refLen = strlen(refAllele), altLen = strlen(allele->sequence); char ref[refLen+1], alt[altLen+1]; safecpy(ref, sizeof(ref), refAllele); safecpy(alt, sizeof(alt), allele->sequence); uint start = variant->chromStart, end = variant->chromEnd; trimRefAlt(ref, alt, &start, &end, &refLen, &altLen); slAddHead(&variantList, variantNew(variant->chrom, start, end, 1, alt, ref, lm)); } } } } return variantList; }