4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/lib/indelShift.c src/hg/lib/indelShift.c
index 873b4ae..5159285 100644
--- src/hg/lib/indelShift.c
+++ src/hg/lib/indelShift.c
@@ -1,217 +1,217 @@
 /* indelShift.c - find the leftmost or rightmost possible location of an insertion/deletion
  * variant with ambiguous placement, e.g. 'AAA' -> 'AAAA' -- where is the extra A inserted? */
 
 /* Copyright (C) 2017 The Regents of the University of California
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "dnaseq.h"
 #include "indelShift.h"
 
 boolean indelShiftIsApplicable(int refLen, int altLen)
 /* Return true if it makes sense to try to shift this variant, i.e. if it's pure ins or del. */
 {
 return ((refLen == 0 && altLen > 0) ||  // insertion into reference
         (refLen > 0 && altLen == 0));   // deletion from reference
 }
 
 void indelShiftRangeForVariant(uint varStart, int refLen, int altLen,
                                  uint *retStart, uint *retEnd)
 /* Given a variant start position and the lengths of reference and alternate alleles,
  * set retStart and retEnd to a range around varStart suitable for shifting around.
  * Caller must truncate retEnd to sequence size if necessary. */
 {
 int padding = max(128, 2*refLen+altLen);
 *retStart = (varStart > padding) ? varStart - padding : 0;
 *retEnd = varStart + refLen + altLen + padding;
 }
 
 static void expandWindowIfNecessary(struct seqWindow *seqWin, uint varStart, int refLen, int altLen)
 /* Make sure there's enough padding to start a sliding-window search around variant. */
 {
 uint maxStart, minEnd;
 indelShiftRangeForVariant(varStart, refLen, altLen, &maxStart, &minEnd);
 seqWin->fetch(seqWin, seqWin->seqName, maxStart, minEnd);
 }
 
 static int fetchLeft(struct seqWindow *seqWin, int basesShifted)
 /* We need to fetch more sequence to the left; update seqWin and p* params
  * with the distance that we have shifted so far, and fetch a larger block of sequence.
  * Return the new offset of the variant start within seqWin. */
 {
 // Double the size of our padding on the left (but don't underflow seq)
 int padding = basesShifted * 2;
 if (padding > seqWin->start)
     padding = seqWin->start;
 if (padding > 0)
     {
     uint newStart = seqWin->start - padding;
     seqWin->fetch(seqWin, seqWin->seqName, newStart, seqWin->end);
     }
 return padding;
 }
 
 static uint slideLeftOne(struct seqWindow *seqWin, uint start, int *pBasesShifted)
 /* Advance one base to the left; if we hit the beginning of the sequence window, fetch more. */
 {
 uint newStart = start - 1;
 *pBasesShifted += 1;
 if (newStart == 0)
     newStart = fetchLeft(seqWin, *pBasesShifted);
 return newStart;
 }
 
 INLINE boolean notMax(int basesShifted, int maxShift)
 /* Return TRUE if we have not yet hit the limit or if there is no limit. */
 {
 return (maxShift <= INDEL_SHIFT_NO_MAX) || (basesShifted < maxShift);
 }
 
 static int slideLeft(struct seqWindow *seqWin, uint varStart, int refLen, char *alt, int maxShift)
 /* Slide left within seqWin as far as we can; if we still haven't found a mismatch then
  * fetch more sequence and keep going.  Return the number of bases shifted.
  * This can modify all params (except maxShift). */
 {
 int basesShifted = 0;
 int altLen = strlen(alt);
 // Relative coords within seqWin:
 uint start = varStart - seqWin->start;
 boolean done = FALSE;
 // If insertion, first compare inserted seq to genomic seq to the left of insertion point.
 int ix;
 for (ix = altLen-1;  !done && ix >= 0 && start > 0 && notMax(basesShifted, maxShift);  ix--)
     {
     if (seqWin->seq[start-1] == alt[ix])
         start = slideLeftOne(seqWin, start, &basesShifted);
     else
         done = TRUE;
     }
 // If we're not done, keep trying to shift left.
 while (!done && start > 0 && notMax(basesShifted, maxShift))
     {
     if (seqWin->seq[start-1] == seqWin->seq[start-1+refLen+altLen])
         start = slideLeftOne(seqWin, start, &basesShifted);
     else
         done = TRUE;
     }
 return basesShifted;
 }
 
 static void fetchRight(struct seqWindow *seqWin, int extraPadding, int basesShifted)
 /* We need to fetch more sequence to the right; update seqWin and p* params
  * with the distance that we have shifted so far, and fetch a larger block of sequence.
  * Return the number of bases by which we expanded to the right. */
 {
 // Double the size of our padding on the right
 int padding = basesShifted * 2 + extraPadding;
 uint newEnd = seqWin->end + padding;
 seqWin->fetch(seqWin, seqWin->seqName, seqWin->start, newEnd);
 }
 
 static uint slideRightOne(struct seqWindow *seqWin, uint start, int refLen, int altLen,
                           int *pBasesShifted)
 /* Advance one base to the right; if we hit the end of the sequence window, fetch more. */
 {
 uint newStart = start + 1;
 *pBasesShifted += 1;
 if (newStart >= seqWin->end - seqWin->start - refLen - altLen)
     fetchRight(seqWin, refLen+altLen, *pBasesShifted);
 return newStart;
 }
 
 static int slideRight(struct seqWindow *seqWin, uint varStart, int refLen, char *alt, int maxShift)
 /* Slide right within seqWin as far as we can; if we still haven't found a mismatch then
  * fetch more sequence and keep going.  Return the number of bases shifted.
  * This can modify all params (except maxShift). */
 {
 int basesShifted = 0;
 int altLen = strlen(alt);
 // Relative coords within seqWin:
 uint start = varStart - seqWin->start;
 boolean done = FALSE;
 // If insertion, first compare inserted seq to genomic seq to the right of insertion point.
 int ix;
 for (ix = 0; !done && ix < altLen && start < seqWin->end - seqWin->start - refLen &&
              notMax(basesShifted, maxShift); ix++)
     {
     if (seqWin->seq[start] == alt[ix])
         start = slideRightOne(seqWin, start, refLen, altLen, &basesShifted);
     else
         done = TRUE;
     }
 // If we're not done, keep trying to shift right.
 while (!done && start < seqWin->end - seqWin->start - refLen && notMax(basesShifted, maxShift))
     {
     if (seqWin->seq[start - altLen] == seqWin->seq[start + refLen])
         start = slideRightOne(seqWin, start, refLen, altLen, &basesShifted);
     else
         done = TRUE;
     }
 return basesShifted;
 }
 
 int indelShift(struct seqWindow *seqWin, uint *pVarStart, uint *pVarEnd, char *alt,
                int maxShift, enum indelShiftDirection direction)
 /* Shift variant as far left or right as possible in seqName, expanding seqWindow as necessary,
  * updating pVarStart and pVarEnd, and possibly rotating the bases in alt (if non-empty).
  * If maxShift > INDEL_SHIFT_NO_MAX, shift at most maxShift bases.
  * Return the number of bases by which we shifted, usually 0.
  * NOTES:
  * - This can modify all params (except direction).  Recompute anything derived from them
  *   if result is not 0.
  * - seqWin must have the correct seqName for variant and ideally will already contain a
  *   sufficiently large region around variant.
  * - This works only on pure insertions or deletions; if original ref and alt have identical
  *   bases at the beginning and/or end, trim those before passing into this function. */
 {
 int basesShifted = 0;
 int refLen = *pVarEnd - *pVarStart;
 if (refLen < 0)
     errAbort("indelShift: variant end (%u) is less than variant start (%u)",
              *pVarEnd, *pVarStart);
 int altLen = strlen(alt);
 if (indelShiftIsApplicable(refLen, altLen))
     {
     // It's a pure insertion or deletion; attempt to slide the variant left
     expandWindowIfNecessary(seqWin, *pVarStart, refLen, altLen);
     if (direction == isdLeft)
         basesShifted = slideLeft(seqWin, *pVarStart, refLen, alt, maxShift);
     else if (direction == isdRight)
         basesShifted = slideRight(seqWin, *pVarStart, refLen, alt, maxShift);
     else
         errAbort("indelShift: invalid direction (%d), should be isdLeft or isdRight", direction);
     if (basesShifted)
         {
         // Update coords and alt sequence (if non-empty)
         int coordOffset = (direction == isdRight) ? basesShifted : -basesShifted;
         *pVarStart += coordOffset;
         *pVarEnd += coordOffset;
         if (altLen > 0)
             {
             if (basesShifted >= altLen)
                 // All of alt is a shifted portion of the reference.
                 seqWindowCopy(seqWin, *pVarStart - (direction == isdLeft ? 0 : altLen), altLen,
                               alt, altLen+1);
             else if (direction == isdRight)
                 {
                 // The beginning of alt is a left-shifted portion of the original.  The remainder
                 // is from reference seq.  Do the shifting first, then the copying from ref.
                 int shiftedLen = altLen - basesShifted;
                 memmove(alt, alt+basesShifted, shiftedLen);
                 seqWindowCopy(seqWin, *pVarStart - basesShifted, basesShifted,
                               alt+shiftedLen, basesShifted+1);
                 }
             else
                 {
                 // The beginning of alt is from reference seq. The remainder is a right-shifted
                 // portion of the original.  Do the shifting first, then the copying from ref.
                 memmove(alt+basesShifted, alt, (altLen - basesShifted));
                 // Note: can't use seqWindowCopy because it zero-terminates.
                 char *refAlStart = seqWin->seq + (*pVarStart - seqWin->start);
                 memcpy(alt, refAlStart, basesShifted);
                 }
             }
         }
     }
 return basesShifted;
 }