5ee0289962ecea549b230e5835b1b949684d611b braney Wed Apr 18 18:49:20 2012 -0700 ongoing work #6152. non-synonymous change in gene on the negative strand diff --git src/hg/lib/gpFx.c src/hg/lib/gpFx.c index c935389..268481a 100644 --- src/hg/lib/gpFx.c +++ src/hg/lib/gpFx.c @@ -1,66 +1,81 @@ #include "common.h" #include "genePred.h" #include "gpFx.h" char *gpFxModifySequence(struct allele *allele, struct genePred *pred, int exonNum, struct psl *transcriptPsl, struct dnaSeq *transcriptSequence) +/* modify a transcript to what it'd be if the alternate allele were present */ { //if ((pred->optFields & genePredExonFramesFld) == 0) // genePredAddExonFrames(pred); // change transcript at variant point int exonOffset = allele->variant->chromStart - transcriptPsl->tStarts[exonNum]; int transcriptOffset = transcriptPsl->qStarts[exonNum] + exonOffset; if (allele->length != allele->variant->chromEnd - allele->variant->chromStart) errAbort("only support alleles the same length as the reference"); char *retSequence = cloneString(transcriptSequence->dna); -memcpy(&transcriptSequence->dna[transcriptOffset], allele->sequence, +if (*pred->strand == '-') + transcriptOffset = transcriptSequence->size - (transcriptOffset + 1); + +// make the change in the sequence +memcpy(&retSequence[transcriptOffset], allele->sequence, allele->length); return retSequence; } char *getCodingSequence(struct genePred *pred, char *transcriptSequence) +/* extract the CDS from a transcript */ { int ii; +if (*pred->strand == '-') + reverseComplement(transcriptSequence, strlen(transcriptSequence)); + // trim off the 5' char *ptr = transcriptSequence; for(ii=0; ii < pred->exonCount; ii++) { int exonSize = pred->exonEnds[ii] - pred->exonStarts[ii]; if (pred->cdsStart > pred->exonStarts[ii]) break; ptr += exonSize; } int exonOffset = pred->cdsStart - pred->exonStarts[ii]; ptr += exonOffset; char *newString = cloneString(ptr); // trim off 3' newString[genePredCdsSize(pred)] = 0; +if (*pred->strand == '-') + { + reverseComplement(transcriptSequence, strlen(transcriptSequence)); + reverseComplement(newString, strlen(newString)); + } + return newString; } -int firstChange(char *string1, char *string2) +static int firstChange(char *string1, char *string2) /* return the position of the first difference between the two sequences */ { int count = 0; while (*string1++ == *string2++) count++; return count; } struct gpFx *gpFxInCodingExon(struct allele *allele, struct genePred *pred, int exonNum, struct psl *transcriptPsl, struct dnaSeq *transcriptSequence) /* generate an effect from a different allele in a coding exon */ { struct gpFx *effectsList = NULL; @@ -84,31 +99,33 @@ aaSeq *newaa = translateSeq(newCodingDna, 0, FALSE); //transcript ID, exon number(s), cDNA position, CDS position, peptide position, alternate amino acids, alternate codons" 9 if (sameString(newaa->dna, oldaa->dna)) { // synonymous change } else { // non-synonymous change struct gpFx *effects; AllocVar(effects); effects->so.soNumber = non_synonymous_variant; effects->so.sub.codingChange.transcript = cloneString(pred->name); + if (*pred->strand == '-') effects->so.sub.codingChange.exonNumber = exonNum; + effects->so.sub.codingChange.exonNumber = pred->exonCount - exonNum; effects->so.sub.codingChange.cDnaPosition = firstChange( newSequence, transcriptSequence->dna); effects->so.sub.codingChange.cdsPosition = firstChange( newCodingSequence, oldCodingSequence); effects->so.sub.codingChange.pepPosition = firstChange( newaa->dna, oldaa->dna); /* char *aaChanges; char *codonChanges; */ slAddHead(&effectsList, effects); } return effectsList; }