38bc9e01ce6e7b22f0533c6624a64a17fd6c7bff angie Fri Sep 27 17:20:54 2013 -0700 Major restructuring of gpFx.c, to accomodate large deletions thatcan knock out entire exons. Now, instead of iterating through exons and building a new genePred along with a sequence that has one exon's modification, we project the variant start and end onto cDNA and CDS coords up front. Then there is only one modification to make per allele, and in fact we only care about the actual modification when there's a CDS change. A new SO term, exon_loss, has been incorporated into gpFx and hgVai's/annoGratorGpVar's filtering. fixes #11771 diff --git src/hg/lib/variant.c src/hg/lib/variant.c index 5014abd..f49537a 100644 --- src/hg/lib/variant.c +++ src/hg/lib/variant.c @@ -44,76 +44,79 @@ return newAllele; } static boolean isDash(char *string) /* Return TRUE if the only char in string is '-' * (possibly repeated like the darn pgVenter alleles). */ { char *p; for (p = string; p != NULL && *p != '\0'; p++) if (*p != '-') return FALSE; return TRUE; } struct variant *variantNew(char *chrom, unsigned start, unsigned end, unsigned numAlleles, - char *slashSepAlleles, struct lm *lm) + char *slashSepAlleles, char *refAllele, struct lm *lm) /* Create a variant from basic information that is easy to extract from most other variant - * formats: coords, allele count, and string of slash-separated alleles. */ + * formats: coords, allele count, string of slash-separated alleles and reference allele. */ { struct variant *variant; // We have a new variant! lmAllocVar(lm, variant); variant->chrom = lmCloneString(lm, chrom); variant->chromStart = start; variant->chromEnd = end; variant->numAlleles = numAlleles; // get the alleles. char *nextAlleleString = lmCloneString(lm, slashSepAlleles); int alleleNumber = 0; for( ; alleleNumber < numAlleles; alleleNumber++) { if (nextAlleleString == NULL) - errAbort("number of alleles in pgSnp doesn't match number in name"); + errAbort("number of alleles in /-separated string doesn't match numAlleles"); char *thisAlleleString = nextAlleleString; // advance pointer to next variant string // probably there's some kent routine to do this behind the curtain nextAlleleString = strchr(thisAlleleString, '/'); if (nextAlleleString) // null out '/' and move to next char { *nextAlleleString = 0; nextAlleleString++; } + boolean isRefAllele = (sameWord(thisAlleleString, refAllele) || + (isEmpty(refAllele) && sameString(thisAlleleString, "-"))); int alleleStringLength = strlen(thisAlleleString); if (isDash(thisAlleleString)) { alleleStringLength = 0; thisAlleleString[0] = '\0'; } // we have a new allele! struct allele *allele; AllocVar(allele); slAddHead(&variant->alleles, allele); allele->variant = variant; allele->length = alleleStringLength; toLowerN(thisAlleleString, alleleStringLength); allele->sequence = lmCloneString(lm, thisAlleleString); + allele->isReference = isRefAllele; } slReverse(&variant->alleles); return variant; } -struct variant *variantFromPgSnp(struct pgSnp *pgSnp, struct lm *lm) +struct variant *variantFromPgSnp(struct pgSnp *pgSnp, char *refAllele, struct lm *lm) /* convert pgSnp record to variant record */ { return variantNew(pgSnp->chrom, pgSnp->chromStart, pgSnp->chromEnd, pgSnp->alleleCount, - pgSnp->name, lm); + pgSnp->name, refAllele, lm); }