06d7be056190c14b85e71bc12523f18ea6815b5e markd Mon Dec 7 00:50:29 2020 -0800 BLAT mmap index support merge with master diff --git src/hg/near/hgNear/getSeq.c src/hg/near/hgNear/getSeq.c index 130556f..d962e3c 100644 --- src/hg/near/hgNear/getSeq.c +++ src/hg/near/hgNear/getSeq.c @@ -1,320 +1,320 @@ /* getSeq - pages to get protein and nucleic acid sequence. */ /* Copyright (C) 2013 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "obscure.h" #include "jksql.h" #include "cart.h" #include "dnautil.h" #include "hdb.h" #include "cheapcgi.h" #include "hgSeq.h" #include "hgNear.h" #include "genePred.h" #include "bed.h" static void printNameAndDescription(struct sqlConnection *conn, struct genePos *gp, struct column *nameCol, struct column *descCol) /* Look up name and description and print. */ { char *name = NULL; char *description = NULL; if (nameCol != NULL) name = nameCol->cellVal(nameCol, gp, conn); if (descCol != NULL) description = descCol->cellVal(descCol, gp, conn); if (name != NULL) hPrintf(" %s", name); if (description != NULL) hPrintf(" - %s", description); freeMem(name); freeMem(description); } static void getSeqFromBlob(struct sqlConnection *conn, struct column *colList, struct genePos *geneList, char *tableId) /* Get sequence from blob field in table and print it as fasta. */ { struct sqlResult *sr; char **row; char query[256]; struct genePos *gp; -struct sqlConnection *conn2 = hAllocConn(database); +struct sqlConnection *conn2 = hAllocConn(sqlGetDatabase(conn)); char *tableName = genomeSetting(tableId); struct column *descCol = findNamedColumn("description"); struct column *nameCol = findNamedColumn("name"); hPrintf("
");
 for (gp = geneList; gp != NULL; gp = gp->next)
     {
     char *id = gp->name;
     sqlSafef(query, sizeof(query), 
     	"select seq from %s where name = '%s'", tableName, id);
     sr = sqlGetResult(conn, query);
     if ((row = sqlNextRow(sr)) != NULL)
 	{
 	char *seq = row[0];
 	hPrintf(">%s", id);
 	printNameAndDescription(conn2, gp, nameCol, descCol);
 	hPrintf("\n");
 	writeSeqWithBreaks(stdout, seq, strlen(seq), 60);
 	}
     sqlFreeResult(&sr);
     }
 hPrintf("
"); hFreeConn(&conn2); } static void getProtein( struct sqlConnection *conn, struct column *colList, struct genePos *geneList) /* Print out proteins. */ { getSeqFromBlob(conn, colList, geneList, "pepTable"); } void getGeneMrna(struct sqlConnection *conn, struct column *colList, struct genePos *geneList, char *tableId) /* Get mRNA sequence for gene from gene prediction. */ { struct sqlResult *sr; char **row; char query[256]; struct genePos *gp; struct sqlConnection *conn2 = hAllocConn(database); struct column *descCol = findNamedColumn("description"); struct column *nameCol = findNamedColumn("name"); char *table = genomeSetting(tableId); boolean hasBin = hOffsetPastBin(database, NULL, table); hPrintf("
");
 for (gp = geneList; gp != NULL; gp = gp->next)
     {
 
     char *id = gp->name;
     sqlSafef(query, sizeof(query), 
     	"select * from %s where name='%s'"
 	" and chrom='%s' and txStart=%d and txEnd=%d", 
     	table, id, gp->chrom, gp->start, gp->end);
     sr = sqlGetResult(conn, query);
     while ((row = sqlNextRow(sr)) != NULL)
         {
 	struct genePred *gene = genePredLoad(row+hasBin);
 	struct bed *bed = bedFromGenePred(gene);
 	struct dnaSeq *seq = hSeqForBed(database, bed);
 	hPrintf(">%s (predicted mRNA)", id);
 	printNameAndDescription(conn2, gp, nameCol, descCol);
 	hPrintf("\n");
 	writeSeqWithBreaks(stdout, seq->dna, seq->size, 50);
 	dnaSeqFree(&seq);
 	bedFree(&bed);
 	genePredFree(&gene);
 	}
     sqlFreeResult(&sr);
     }
 hPrintf("
"); hFreeConn(&conn2); } static void getMrna(struct sqlConnection *conn, struct column *colList, struct genePos *geneList) /* Print out proteins. */ { if (genomeOptionalSetting("mrnaTable") != NULL) getSeqFromBlob(conn, colList, geneList, "mrnaTable"); else getGeneMrna(conn, colList, geneList, "geneTable"); } static boolean hasUtr5(struct genePred *gp) /* Return TRUE if it looks like gene has a 5' UTR. */ { if (gp->strand[0] == '-') return gp->txEnd != gp->cdsEnd; else return gp->txStart != gp->cdsStart; } static struct dnaSeq *genePromoSeq(struct genePred *gp, int upSize, int downSize) /* Get promoter sequence for gene. */ { struct dnaSeq *seq; assert(upSize >= 0 && downSize >= 0); if (gp->strand[0] == '-') { int start = gp->txEnd - downSize; int end = gp->txEnd + upSize; seq = hChromSeq(database, gp->chrom, start, end); reverseComplement(seq->dna, seq->size); } else { int start = gp->txStart - upSize; int end = gp->txStart + downSize; seq = hChromSeq(database, gp->chrom, start, end); } return seq; } static void getPromoter(struct sqlConnection *conn, struct column *colList, struct genePos *geneList) /* Print out promoters. */ { struct sqlResult *sr; char **row; char query[256]; struct genePos *gp; char *table = genomeSetting("geneTable"); struct sqlConnection *conn2 = hAllocConn(database); int upSize = cartInt(cart, proUpSizeVarName); int downSize = cartInt(cart, proDownSizeVarName); boolean fiveOnly = cartBoolean(cart, proIncludeFiveOnly); struct column *descCol = findNamedColumn("description"); struct column *nameCol = findNamedColumn("name"); boolean hasBin = hOffsetPastBin(database, NULL, table); hPrintf("
");
 for (gp = geneList; gp != NULL; gp = gp->next)
     {
     char *id = gp->name;
     sqlSafef(query, sizeof(query), 
     	"select * from %s where name='%s'"
 	" and chrom='%s' and txStart=%d and txEnd=%d", 
     	table, id, gp->chrom, gp->start, gp->end);
     sr = sqlGetResult(conn, query);
     while ((row = sqlNextRow(sr)) != NULL)
         {
 	struct genePred *gene = genePredLoad(row+hasBin);
 	if (!fiveOnly || hasUtr5(gene))
 	    {
 	    struct dnaSeq *seq = genePromoSeq(gene, upSize, downSize);
 	    hPrintf(">%s (promoter %d %d)", id, upSize, downSize);
 	    printNameAndDescription(conn2, gp, nameCol, descCol);
 	    hPrintf("\n");
 	    writeSeqWithBreaks(stdout, seq->dna, seq->size, 50);
 	    dnaSeqFree(&seq);
 	    }
 	genePredFree(&gene);
 	}
     }
 hPrintf("
"); hFreeConn(&conn2); } static void getGenomic(struct sqlConnection *conn, struct column *colList, struct genePos *geneList) /* Put up dialog to get genomic sequence. */ { struct hTableInfo *hti = hFindTableInfo(database, NULL, genomeSetting("geneTable")); makeTitle("Get Genomic Sequence Near Gene", NULL); hPrintf("
\n"); cartSaveSession(cart); hgSeqOptionsHtiCart(hti, cart); hPrintf("
\n"); cgiMakeButton(getGenomicSeqVarName, "get sequence"); hPrintf("
"); } void doGetGenomicSeq(struct sqlConnection *conn, struct column *colList, struct genePos *geneList) /* Retrieve genomic sequence sequence according to options. */ { char *table = genomeSetting("geneTable"); -struct hTableInfo *hti = hFindTableInfo(database, NULL, table); +struct hTableInfo *hti = hFindTableInfo(sqlGetDatabase(conn), NULL, table); struct genePos *gp; char query[256]; struct sqlResult *sr; char **row; boolean hasBin = hOffsetPastBin(database, NULL, table); makeTitle("Genomic Sequence", NULL); hPrintf("
");
 for (gp = geneList; gp != NULL; gp = gp->next)
     {
     char *id = gp->name;
     sqlSafef(query, sizeof(query), "select * from %s where name='%s'", 
     	table, id);
     sr = sqlGetResult(conn, query);
     while ((row = sqlNextRow(sr)) != NULL)
         {
 	struct genePred *gene = genePredLoad(row+hasBin);
 	struct bed *bed = bedFromGenePred(gene);
 	hgSeqBed(database, hti, bed);
 	bedFree(&bed);
 	genePredFree(&gene);
 	}
     }
 hPrintf("
"); } void doGetSeq(struct sqlConnection *conn, struct column *colList, struct genePos *geneList, char *how) /* Put up the get sequence page. */ { if (sameString(how, "protein")) { makeTitle("Protein Sequence", NULL); getProtein(conn, colList, geneList); } else if (sameString(how, "mRNA")) { makeTitle("mRNA Sequence", NULL); getMrna(conn, colList, geneList); } else if (sameString(how, "promoter")) { makeTitle("Promoter Sequence", NULL); getPromoter(conn, colList, geneList); } else if (sameString(how, "genomic")) { getGenomic(conn, colList, geneList); } else errAbort("Unrecognized %s value %s", getSeqHowVarName, how); } static void howRadioButton(char *how) /* Put up a getSeqHow radio button. */ { char *howName = getSeqHowVarName; char *oldVal = cartUsualString(cart, howName, "protein"); cgiMakeRadioButton(howName, how, sameString(how, oldVal)); } void doGetSeqPage(struct sqlConnection *conn, struct column *colList) /* Put up the get sequence page asking how to get sequence. */ { makeTitle("Get Sequence", NULL); hPrintf("
\n"); cartSaveSession(cart); hPrintf("Select sequence type:
\n"); howRadioButton("protein"); hPrintf("Protein
\n"); howRadioButton("mRNA"); hPrintf("mRNA
\n"); howRadioButton("promoter"); hPrintf("Promoter including "); cgiMakeIntVar(proUpSizeVarName, cartUsualInt(cart, proUpSizeVarName, 1000), 4); hPrintf(" bases upstream and "); cgiMakeIntVar(proDownSizeVarName, cartUsualInt(cart, proDownSizeVarName, 50), 3); hPrintf(" downstream.
\n"); hPrintf("   "); cgiMakeCheckBox(proIncludeFiveOnly, cartUsualBoolean(cart, proIncludeFiveOnly, TRUE)); hPrintf("Include only those with annotated 5' UTRs
"); howRadioButton("genomic"); hPrintf("Genomic
\n"); cgiMakeButton(getSeqVarName, "get sequence"); hPrintf("
\n"); }