4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/lib/pslGenoShow.c src/lib/pslGenoShow.c index 507a9e2..6c2d73d 100644 --- src/lib/pslGenoShow.c +++ src/lib/pslGenoShow.c @@ -1,341 +1,341 @@ /* Show aligned exons between a pre-located gene (a stamper gene) in the genome *and its homologues (stamp elements) in the genome. *The aligned exon sequences are shown in blue as regular blat alignment. * The unaligned exon sequence are shown in red. Intron sequences are shown in black. * It is modified from pslShow.c */ /* Copyright (C) 2011 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "dnaseq.h" #include "htmshell.h" #include "psl.h" #include "cda.h" #include "seqOut.h" static void pslShowAlignmentStranded2(struct psl *psl, boolean isProt, char *qName, bioSeq *qSeq, int qStart, int qEnd, char *tName, bioSeq *tSeq, int tStart, int tEnd, int exnStarts[], int exnEnds[], int exnCnt, FILE *f) /* Show stamper gene and stamp elements alignment using genomic sequence. * The aligned exons' sequence of stamper gene are shown in colors as usual, but the * the unaligned exon's sequence of stamper gene are shown in red color. */ { boolean tIsRc = (psl->strand[1] == '-'); boolean qIsRc = (psl->strand[0] == '-'); int mulFactor = (isProt ? 3 : 1); DNA *dna = NULL; /* Mixed case version of genomic DNA. */ int qSize = qSeq->size; char *qLetters = cloneString(qSeq->dna); int qbafStart, tbafStart; int qcfmStart, tcfmStart; int lineWidth = isProt ? 60 : 50; tbafStart = tStart; tcfmStart = tStart; qbafStart = qStart; qcfmStart = qStart; /* Deal with minus strand. */ if (tIsRc) { int temp; reverseComplement(tSeq->dna, tSeq->size); tbafStart = tEnd; tcfmStart = tEnd; temp = psl->tSize - tEnd; tEnd = psl->tSize - tStart; tStart = temp; } if (qIsRc) { int temp, j; reverseComplement(qSeq->dna, qSeq->size); reverseComplement(qLetters, qSeq->size); qcfmStart = qEnd; qbafStart = qEnd; temp = psl->qSize - qEnd; qEnd = psl->qSize - qStart; qStart = temp; for(j = 0; j < exnCnt; j++) { temp = psl->qSize - exnStarts[j]; exnStarts[j] = psl->qSize - exnEnds[j]; exnEnds[j] = temp; } reverseInts(exnEnds, exnCnt); reverseInts(exnStarts, exnCnt); } dna = cloneString(tSeq->dna); if (qName == NULL) qName = psl->qName; if (tName == NULL) tName = psl->tName; fputs("Matching bases are colored blue and capitalized. " "Light blue bases mark the boundaries of gaps in either aligned sequence. " "Red bases are unaligned exons' bases of the query gene. \n", f); fprintf(f, "<H4><A NAME=cDNA></A>%s%s</H4>\n", qName, (qIsRc ? " (reverse complemented)" : "")); fprintf(f, "<PRE><TT>"); tolowers(qLetters); /* Display query sequence. */ { struct cfm *cfm; char *colorFlags = needMem(qSeq->size); int i = 0, j = 0, exnIdx = 0; int preStop = 0; for (i=0; i<psl->blockCount; ++i) { int qs = psl->qStarts[i] - qStart; int ts = psl->tStarts[i] - tStart; int sz = psl->blockSizes[i]-1; int end = 0; bool omitExon = FALSE; while(exnIdx < exnCnt && psl->qStarts[i] > exnEnds[exnIdx]) { if(omitExon) { for( j = exnStarts[exnIdx] - qStart; j < exnEnds[exnIdx]-qStart; j++) { colorFlags[j] = socRed; } } exnIdx++; preStop = exnStarts[exnIdx] - qStart; omitExon = TRUE; } /*mark the boundary bases */ colorFlags[qs] = socBrightBlue; qLetters[qs] = toupper(qLetters[qs]); colorFlags[qs+sz] = socBrightBlue; qLetters[qs+sz] = toupper(qLetters[qs+sz]); /* determine block end */ if( i < psl->blockCount -1) end = psl->qStarts[i+1] < exnEnds[exnIdx] ? psl->qStarts[i+1] - qStart : exnEnds[exnIdx] - qStart; else end = qs + sz; for (j=preStop; j < end; j++) { if(j == 82) fprintf(stderr, "right here\n"); if (j > qs && j < qs+sz) { if (qSeq->dna[j] == tSeq->dna[ts+j-qs]) { colorFlags[j] = socBlue; qLetters[j] = toupper(qLetters[j]); } } else if(colorFlags[j] != socBrightBlue && colorFlags[j] != socBlue) colorFlags[j] = socRed; } preStop = end; } cfm = cfmNew(10, lineWidth, TRUE, qIsRc, f, qcfmStart); for (i=0; i<qSize; ++i) cfmOut(cfm, qLetters[i], seqOutColorLookup[(int)colorFlags[i]]); cfmFree(&cfm); freez(&colorFlags); htmHorizontalLine(f); } fprintf(f, "</TT></PRE>\n"); fprintf(f, "<H4><A NAME=genomic></A>%s %s:</H4>\n", tName, (tIsRc ? "(reverse strand)" : "")); fprintf(f, "<PRE><TT>"); /* Display DNA sequence. */ { struct cfm *cfm; char *colorFlags = needMem(tSeq->size); int i,j; int curBlock = 0; for (i=0; i<psl->blockCount; ++i) { int qs = psl->qStarts[i] - qStart; int ts = psl->tStarts[i] - tStart; int sz = psl->blockSizes[i]; if (isProt) { for (j=0; j<sz; ++j) { AA aa = qSeq->dna[qs+j]; int codonStart = ts + 3*j; DNA *codon = &tSeq->dna[codonStart]; AA trans = lookupCodon(codon); if (trans != 'X' && trans == aa) { colorFlags[codonStart] = socBlue; colorFlags[codonStart+1] = socBlue; colorFlags[codonStart+2] = socBlue; toUpperN(dna+codonStart, 3); } } } else { for (j=0; j<sz; ++j) { if (qSeq->dna[qs+j] == tSeq->dna[ts+j]) { colorFlags[ts+j] = socBlue; dna[ts+j] = toupper(dna[ts+j]); } } } colorFlags[ts] = socBrightBlue; colorFlags[ts+sz*mulFactor-1] = socBrightBlue; } cfm = cfmNew(10, lineWidth, TRUE, tIsRc, f, tcfmStart); for (i=0; i<tSeq->size; ++i) { /* Put down "anchor" on first match position in haystack * so user can hop here with a click on the needle. */ if (curBlock < psl->blockCount && psl->tStarts[curBlock] == (i + tStart) ) { fprintf(f, "<A NAME=%d></A>", ++curBlock); /* Watch out for (rare) out-of-order tStarts! */ while (curBlock < psl->blockCount && psl->tStarts[curBlock] <= tStart + i) curBlock++; } cfmOut(cfm, dna[i], seqOutColorLookup[(int)colorFlags[i]]); } cfmFree(&cfm); freez(&colorFlags); htmHorizontalLine(f); } /* Display side by side. */ fprintf(f, "</TT></PRE>\n"); fprintf(f, "<H4><A NAME=ali></A>Side by Side Alignment*</H4>\n"); fprintf(f, "<PRE><TT>"); { struct baf baf; int i,j; bafInit(&baf, qSeq->dna, qbafStart, qIsRc, tSeq->dna, tbafStart, tIsRc, f, lineWidth, isProt); if (isProt) { for (i=0; i<psl->blockCount; ++i) { int qs = psl->qStarts[i] - qStart; int ts = psl->tStarts[i] - tStart; int sz = psl->blockSizes[i]; bafSetPos(&baf, qs, ts); bafStartLine(&baf); for (j=0; j<sz; ++j) { AA aa = qSeq->dna[qs+j]; int codonStart = ts + 3*j; DNA *codon = &tSeq->dna[codonStart]; bafOut(&baf, ' ', codon[0]); bafOut(&baf, aa, codon[1]); bafOut(&baf, ' ', codon[2]); } bafFlushLine(&baf); } fprintf( f, "<I>*when aa is different, BLOSUM positives are in green, BLOSUM negatives in red</I>\n"); } else { int lastQe = psl->qStarts[0] - qStart; int lastTe = psl->tStarts[0] - tStart; int maxSkip = 8; bafSetPos(&baf, lastQe, lastTe); bafStartLine(&baf); for (i=0; i<psl->blockCount; ++i) { int qs = psl->qStarts[i] - qStart; int ts = psl->tStarts[i] - tStart; int sz = psl->blockSizes[i]; boolean doBreak = TRUE; int qSkip = qs - lastQe; int tSkip = ts - lastTe; if (qSkip >= 0 && qSkip <= maxSkip && tSkip == 0) { for (j=0; j<qSkip; ++j) bafOut(&baf, qSeq->dna[lastQe+j], '-'); doBreak = FALSE; } else if (tSkip > 0 && tSkip <= maxSkip && qSkip == 0) { for (j=0; j<tSkip; ++j) bafOut(&baf, '-', tSeq->dna[lastTe+j]); doBreak = FALSE; } if (doBreak) { bafFlushLine(&baf); bafSetPos(&baf, qs, ts); bafStartLine(&baf); } for (j=0; j<sz; ++j) bafOut(&baf, qSeq->dna[qs+j], tSeq->dna[ts+j]); lastQe = qs + sz; lastTe = ts + sz; } bafFlushLine(&baf); fprintf( f, "<I>*Aligned Blocks with gaps <= %d bases are merged for this display</I>\n", maxSkip); } } fprintf(f, "</TT></PRE>"); if (qIsRc) reverseComplement(qSeq->dna, qSeq->size); if (tIsRc) reverseComplement(tSeq->dna, tSeq->size); freeMem(dna); freeMem(qLetters); } int pslGenoShowAlignment(struct psl *psl, boolean isProt, char *qName, bioSeq *qSeq, int qStart, int qEnd, char *tName, bioSeq *tSeq, int tStart, int tEnd, int exnStarts[], int exnEnds[], int exnCnt, FILE *f) /* Show aligned exons between a pre-located gene (a stamper gene)and its homologues (stamp elements) * in the genome. The aligned exon sequences are shown in blue as regular blat alignment. * The unaligned exon sequence are shown in red. Intron sequences are shown in black */ { /* At this step we just do a little shuffling of the strands for * untranslated DNA alignments. */ char origStrand[2]; boolean needsSwap = (psl->strand[0] == '-' && psl->strand[1] == 0); if (needsSwap) { memcpy(origStrand, psl->strand, 2); pslRc(psl); } pslShowAlignmentStranded2(psl, isProt, qName, qSeq, qStart, qEnd, tName, tSeq, tStart, tEnd,exnStarts, exnEnds, exnCnt, f); if (needsSwap) { pslRc(psl); memcpy(psl->strand, origStrand, 2); } return psl->blockCount; }