dccdf29485000ddce07f65a431d52a51ffd158cf braney Wed Nov 16 18:41:22 2016 -0800 add support for protein psls in bigPsl diff --git src/hg/lib/bigPsl.c src/hg/lib/bigPsl.c index 5af5d32..90624e6 100644 --- src/hg/lib/bigPsl.c +++ src/hg/lib/bigPsl.c @@ -269,78 +269,92 @@ if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->oCDS); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%u", el->chromSize); fputc(sep,f); fprintf(f, "%u", el->match); fputc(sep,f); fprintf(f, "%u", el->misMatch); fputc(sep,f); fprintf(f, "%u", el->repMatch); fputc(sep,f); fprintf(f, "%u", el->nCount); +fputc(sep,f); +fprintf(f, "%u", el->seqType); fputc(lastSep,f); } /* -------------------------------- End autoSql Generated Code -------------------------------- */ -struct psl *pslFromBigPsl( char *chrom, struct bigBedInterval *bb, char **seq, char **cds) +struct psl *pslFromBigPsl( char *chrom, struct bigBedInterval *bb, int seqTypeField, char **seq, char **cds) /* build a psl from a bigPsl */ { char *extra = cloneString(bb->rest); -int numCols = 12 + 12 - 3; +int numCols = 12 + 13 - 3; char *row[numCols]; int wordCount = chopByChar(extra, '\t', row, numCols); -if (wordCount != numCols) - errAbort("pslFromBigPsl: expected %d columns in `rest' field, found %d columns", numCols, wordCount); +if (wordCount < numCols - 1) + errAbort("pslFromBigPsl: expected at least %d columns in `rest' field, found %d columns", numCols, wordCount); + +int seqType = 0; +if (wordCount == numCols) + seqType = sqlUnsigned(row[21]); +if (seq != NULL) + *seq = NULL; +if (cds != NULL) + *cds = NULL; struct psl *psl; int ii; int sizeOne; +boolean isProt = (seqType == PSL_SEQTYPE_PROTEIN); AllocVar(psl); psl->qName = cloneString(row[0]); psl->strand[0] = *row[2]; -if ((cds != NULL) && row[15] != NULL) +if ((cds != NULL) && !isEmpty(row[15])) *cds = cloneString(row[15]); -if ((seq != NULL) && row[14] != NULL) +if ((seq != NULL) && !isEmpty(row[14])) *seq = cloneString(row[14]); psl->tSize = sqlUnsigned(row[16]); psl->match = sqlUnsigned(row[17]); psl->misMatch = sqlUnsigned(row[18]); psl->repMatch = sqlUnsigned(row[19]); psl->nCount = sqlUnsigned(row[20]); psl->tName = chrom; psl->tStart = bb->start; psl->tEnd = bb->end; psl->blockCount = sqlSigned(row[6]); sqlUnsignedDynamicArray(row[7], &psl->blockSizes, &sizeOne); assert(sizeOne == psl->blockCount); sqlUnsignedDynamicArray(row[8], &psl->tStarts, &sizeOne); assert(sizeOne == psl->blockCount); psl->qStart = sqlSigned(row[9]); psl->qEnd = sqlSigned(row[10]); psl->strand[1] = *row[11]; psl->strand[2] = 0; psl->qSize = sqlSigned(row[12]); sqlUnsignedDynamicArray(row[13], &psl->qStarts, &sizeOne); assert(sizeOne == psl->blockCount); for(ii=0; ii < psl->blockCount; ii++) { psl->tStarts[ii] += psl->tStart; } // because reference blocks are always on the positive strand in beds, we need to revComp them // if the alignment is meant to be on the reference's negative strand +if (isProt) + for(ii=0; ii < psl->blockCount; ii++) + psl->blockSizes[ii] /= 3; if (psl->strand[1] == '-') { psl->strand[1] = '+'; pslRc(psl); } pslComputeInsertCounts(psl); return psl; }