0345dae65cc819f447fb0414829b2f6d32473c32
markd
  Sun Jun 2 13:24:16 2024 -0700
added number of start/end hard and soft based to BAM hgc page

diff --git src/hg/hgc/bamClick.c src/hg/hgc/bamClick.c
index 2b9cb39..2d3c9b8 100644
--- src/hg/hgc/bamClick.c
+++ src/hg/hgc/bamClick.c
@@ -13,54 +13,98 @@
 #include "udc.h"
 #include "chromAlias.h"
 
 
 #include "hgBam.h"
 #include "hgConfig.h"
 
 struct bamTrackData
     {
     int itemStart;
     char *itemName;
     struct hash *pairHash;
     boolean foundIt;
     };
 
+#include <htslib/sam.h>
+
+static void getClippedBases(const bam1_t *bam, int clippedBases[4]) 
+/* Function to get the number of soft and hard clipped bases at the start and end of a read,
+ the array will contain [startHardCnt, startSoftCnt, endSoftCnt, endHardCnt] */
+{
+uint32_t *cigar = bam_get_cigar(bam);
+int nCigar = bam->core.n_cigar;
+for (int i = 0; i < 4; i++)
+    clippedBases[i] = 0;
+
+int iCigar = 0;
+
+// start
+if ((iCigar < nCigar) && (bam_cigar_op(cigar[iCigar]) == BAM_CHARD_CLIP))
+    {
+    clippedBases[0] += bam_cigar_oplen(cigar[iCigar]);
+    iCigar++;
+    }
+if ((iCigar < nCigar) && (bam_cigar_op(cigar[iCigar]) == BAM_CSOFT_CLIP))
+    {
+    clippedBases[1] += bam_cigar_oplen(cigar[iCigar]);
+    }
+
+// end
+iCigar = nCigar - 1;
+if ((iCigar >= 0) && (bam_cigar_op(cigar[iCigar]) == BAM_CHARD_CLIP))
+    {
+    clippedBases[3] += bam_cigar_oplen(cigar[iCigar]);
+    iCigar--;
+    }
+if ((iCigar >= 0) && (bam_cigar_op(cigar[iCigar]) == BAM_CSOFT_CLIP))
+    {
+    clippedBases[2] += bam_cigar_oplen(cigar[iCigar]);
+    }
+}
+
 /* Maybe make this an option someday -- for now, I find it too confusing to deal with
  * CIGAR that is anchored to positive strand while showing rc'd sequence.  I think
  * to do it right, we would need to reverse the CIGAR string for display. */
 static boolean useStrand = FALSE;
 static boolean skipQualityScore = FALSE;
+
 static void singleBamDetails(const bam1_t *bam)
 /* Print out the properties of this alignment. */
 {
 const bam1_core_t *core = &bam->core;
 char *itemName = bam1_qname(bam);
 int tLength = bamGetTargetLength(bam);
 int tStart = core->pos, tEnd = tStart+tLength;
 boolean isRc = useStrand && bamIsRc(bam);
 printPosOnChrom(seqName, tStart, tEnd, NULL, FALSE, itemName);
 if (!skipQualityScore)
     printf("<B>Alignment Quality: </B>%d<BR>\n", core->qual);
 if (core->n_cigar > 50)
     printf("<B>CIGAR string: </B> Cannot show long CIGAR string, more than 50 operations. Contact us if you need to see the full CIGAR string here.<BR>\n");
 else
     {
     printf("<B>CIGAR string: </B><tt>%s</tt>", bamGetCigar(bam));
     //bamShowCigarEnglish(bam);
     printf("<BR>\n");
     }
+
+int clippedBases[4];
+getClippedBases(bam, clippedBases);
+printf("<B>Start clipping:</B> hard: %d  soft: %d</BR>\n", clippedBases[0], clippedBases[1]);
+printf("<B>End clipping:</B> hard: %d  soft: %d</BR> \n", clippedBases[3], clippedBases[2]);
+
 printf("<B>Tags:</B>");
 bamShowTags(bam);
 puts("<BR>");
 printf("<B>Flags: </B><tt>0x%02x:</tt><BR>\n &nbsp;&nbsp;", core->flag);
 bamShowFlagsEnglish(bam);
 puts("<BR>");
 if (bamIsRc(bam))
     printf("<em>Note: although the read was mapped to the reverse strand of the genome, "
 	   "the sequence and CIGAR in BAM are relative to the forward strand.</em><BR>\n");
 puts("<BR>");
 struct dnaSeq *genoSeq = hChromSeq(database, seqName, tStart, tEnd);
 char *qSeq = bamGetQuerySequence(bam, FALSE);
 if (core->l_qseq > 5000)
     printf("<B>Alignment not shown, very long sequence</B><BR>\n");
 else