533112afe2a2005e80cdb1f82904ea65032d4302
braney
  Sat Oct 2 11:37:34 2021 -0700
split hg/lib into two separate libaries, one only used by the cgis

diff --git src/hg/lib/variant.c src/hg/lib/variant.c
deleted file mode 100644
index b6f9f71..0000000
--- src/hg/lib/variant.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/* variant.c -- routines to convert other variant formats to a generic
- *              variant structure */
-
-/* Copyright (C) 2014 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
-
-#include "common.h"
-#include "annoRow.h"
-#include "variant.h"
-
-struct allele  *alleleClip(struct allele *allele, int sx, int ex, struct lm *lm)
-/* Return new allele pointing to new variant, both clipped to region defined by [sx,ex). */
-{
-struct variant *oldVariant = allele->variant;
-int start = oldVariant->chromStart;
-int end = oldVariant->chromEnd;
-int delFront = 0;
-int delRear = 0;
-
-if (start < sx)
-    {
-    delFront = min(sx - start, allele->length);
-    start = sx;
-    }
-
-if (end > ex)
-    {
-    delRear = min(end - ex, allele->length - delFront);
-    end = ex;
-    }
-
-struct variant *newVariant;
-lmAllocVar(lm, newVariant);
-newVariant->chrom = lmCloneString(lm, oldVariant->chrom);
-newVariant->chromStart = start;
-newVariant->chromEnd = end;
-newVariant->numAlleles = 1;
-
-struct allele *newAllele;
-lmAllocVar(lm, newAllele);
-newVariant->alleles = newAllele;
-newAllele->variant = newVariant;
-newAllele->length = allele->length - delRear - delFront;
-assert(newAllele->length >= 0);
-newAllele->sequence = lmCloneString(lm, &allele->sequence[delFront]);
-newAllele->sequence[newAllele->length] = 0;   // cut off delRear part
-
-return newAllele;
-}
-
-static boolean isDash(char *string)
-/* Return TRUE if the only char in string is '-'
- * (possibly repeated like the darn pgVenter alleles). */
-{
-char *p;
-for (p = string;  p != NULL && *p != '\0';  p++)
-    if (*p != '-')
-	return FALSE;
-return TRUE;
-}
-
-struct variant *variantNew(char *chrom, unsigned start, unsigned end, unsigned numAlleles,
-			   char *slashSepAlleles, char *refAllele, struct lm *lm)
-/* Create a variant from basic information that is easy to extract from most other variant
- * formats: coords, allele count, string of slash-separated alleles and reference allele. */
-{
-struct variant *variant;
-
-// We have a new variant!
-lmAllocVar(lm, variant);
-variant->chrom = lmCloneString(lm, chrom);
-variant->chromStart = start;
-variant->chromEnd = end;
-variant->numAlleles = numAlleles;
-
-// get the alleles.
-char *nextAlleleString = lmCloneString(lm, slashSepAlleles);
-int alleleNumber = 0;
-for( ; alleleNumber < numAlleles; alleleNumber++)
-    {
-    if (nextAlleleString == NULL)
-	errAbort("number of alleles in /-separated string doesn't match numAlleles");
-    
-    char *thisAlleleString = nextAlleleString;
-
-    // advance pointer to next variant string
-    // probably there's some kent routine to do this behind the curtain
-    nextAlleleString = strchr(thisAlleleString, '/');
-    if (nextAlleleString)	 // null out '/' and move to next char
-	{
-	*nextAlleleString = 0;
-	nextAlleleString++;
-	}
-
-    boolean isRefAllele = (sameWord(thisAlleleString, refAllele) ||
-			   (isEmpty(refAllele) && sameString(thisAlleleString, "-")) ||
-			   sameString(thisAlleleString, "<X>") || // samtools mpileup no variation
-			   sameString(thisAlleleString, "<*>"));  // gVCF no variation
-    int alleleStringLength = strlen(thisAlleleString);
-    if (isDash(thisAlleleString))
-	{
-	alleleStringLength = 0;
-	thisAlleleString[0] = '\0';
-	}
-
-    // we have a new allele!
-    struct allele *allele;
-    lmAllocVar(lm, allele);
-    slAddHead(&variant->alleles, allele);
-    allele->variant = variant;
-    allele->length = alleleStringLength; 
-    allele->sequence = lmCloneString(lm, thisAlleleString);
-    allele->isReference = isRefAllele;
-    }
-
-slReverse(&variant->alleles);
-
-return variant;
-}
-
-struct variant *variantFromPgSnpAnnoRow(struct annoRow *row, char *refAllele, boolean hasBin,
-                                        struct lm *lm)
-/* Translate pgSnp annoRow into variant (allocated by lm). */
-{
-struct pgSnp pgSnp;
-char **words = row->data;
-char *wordsWithFakeBin[PGSNP_NUM_COLS];
-if (! hasBin)
-    {
-    // pgSnp file input doesn't have a bin column, but the pgSnp code expects one --
-    // so make a fake bin column to ignore.
-    wordsWithFakeBin[0] = "1";
-    int i;
-    for (i = 1;  i < PGSNP_NUM_COLS;  i++)
-        wordsWithFakeBin[i] = words[i-1];
-    words = wordsWithFakeBin;
-    }
-pgSnpStaticLoad(words, &pgSnp);
-return variantNew(pgSnp.chrom, pgSnp.chromStart, pgSnp.chromEnd, pgSnp.alleleCount,
-		  pgSnp.name, refAllele, lm);
-}
-
-struct variant *variantFromVcfAnnoRow(struct annoRow *row, char *refAllele, struct lm *lm,
-				      struct dyString *dyScratch)
-/* Translate vcf array of words into variant (allocated by lm, overwriting dyScratch
- * as temporary scratch string). */
-{
-char **words = row->data;
-char *alStr = vcfGetSlashSepAllelesFromWords(words, dyScratch);
-// The reference allele is the first allele in alStr -- and it may be trimmed on both ends with
-// respect to the raw VCF ref allele in words[3], so copy vcfRefAllele back out of alStr.
-// That ensures that variantNew will get the reference allele that matches the slash-separated
-// allele string.
-int refLen = strlen(alStr);
-char *p = strchr(alStr, '/');
-if (p)
-    refLen = p - alStr;
-char vcfRefAllele[refLen + 1];
-safencpy(vcfRefAllele, sizeof(vcfRefAllele), alStr, refLen);
-unsigned alCount = countChars(alStr, '/') + 1;
-return variantNew(row->chrom, row->start, row->end, alCount, alStr, vcfRefAllele, lm);
-}
-
-static char *findRefAllele(struct variant *variant)
-/* Find the reference allele (preferably not symbolic); return NULL if variant doesn't have one.
- * Don't free result. */
-{
-char *refAllele = NULL;
-struct allele *allele;
-for (allele = variant->alleles;  allele != NULL;  allele = allele->next)
-    {
-    if (allele->isReference)
-        {
-        refAllele = allele->sequence;
-        if (isAllNt(refAllele, strlen(refAllele)))
-            break;
-        }
-    }
-return refAllele;
-}
-
-struct variant *splitAndTrimVariants(struct variant *variantIn, struct lm *lm)
-/* Split variantIn into a list of single-allele variants with redundant ref/alt bases trimmed. */
-{
-struct variant *variantList = NULL;
-struct variant *variant;
-for (variant = variantIn; variant != NULL;  variant = variant->next)
-    {
-    char *refAllele = findRefAllele(variant);
-    if (refAllele == NULL || !isAllNt(refAllele, strlen(refAllele)))
-        slAddHead(&variantList, variant);
-    else
-        {
-        struct allele *allele;
-        for (allele = variant->alleles;  allele != NULL;  allele = allele->next)
-            {
-            if (! allele->isReference)
-                {
-                int refLen = strlen(refAllele), altLen = strlen(allele->sequence);
-                char ref[refLen+1], alt[altLen+1];
-                safecpy(ref, sizeof(ref), refAllele);
-                safecpy(alt, sizeof(alt), allele->sequence);
-                uint start = variant->chromStart, end = variant->chromEnd;
-                trimRefAlt(ref, alt, &start, &end, &refLen, &altLen);
-                slAddHead(&variantList, variantNew(variant->chrom, start, end, 1, alt, ref, lm));
-                }
-            }
-        }
-    }
-return variantList;
-}
-