4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/hg/gsid/gsidAaMsa/gsidAaMsa.c src/hg/gsid/gsidAaMsa/gsidAaMsa.c index 38797d6..9ea560e 100644 --- src/hg/gsid/gsidAaMsa/gsidAaMsa.c +++ src/hg/gsid/gsidAaMsa/gsidAaMsa.c @@ -1,240 +1,240 @@ /* gsidAaMsa - create conservation wiggle track for GSID protein MSA */ /* Copyright (C) 2013 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "hdb.h" #include "options.h" #include "localmem.h" #include "dystring.h" #include "portable.h" #include "obscure.h" #define MAXSEQ 5000 #define MAXSEQLEN 5000 #define MAXBASE 21 char seq[MAXSEQ][MAXSEQLEN]; char seqId[MAXSEQ][40]; int cnt[MAXSEQLEN][MAXBASE]; char consensusSeq[MAXSEQLEN]; char consensusSeq2[MAXSEQLEN]; char *baseGenomeSeq; int baseSeqLen; char aaCode1[MAXBASE][2] = { "A", "V", "L", "I", "P", "F", "W", "M", "G", "S", "T", "C", "Y", "N", "Q", "D", "E", "K", "R", "H", "-" }; char aaCode2[MAXBASE][2] = { "a", "v", "l", "i", "p", "f", "w", "m", "g", "s", "t", "c", "y", "n", "q", "d", "e", "k", "r", "h", "-" }; char refBase[MAXBASE]; char refBase2[MAXBASE]; char aliSeq[MAXSEQLEN]; float identity[MAXSEQLEN]; void usage() /* Explain usage and exit. */ { errAbort( "gsidAaMsa - create conservation wiggle track for GSID protein MSA \n" "usage:\n" " gsidAaMsa database table baseAcc startPos wigOutput concensusOutput\n" "\n" "example: gsidAaMsa hiv1 vax04AliSeq HXB2 6348 vax04Msa.wig vax04Consnsus.fa\n" ); } void gsidAaMsa(char *database, char *table, char *baseAcc, int startPos, char *outWigFn, char *outConsFn) { struct sqlConnection *conn2; char query2[256]; struct sqlResult *sr2; char **row2; FILE *outf, *outf2; char base; int ii; int i, j, jj, k; int seqCnt = 0; int max, kmax, kmax2; for (i=0; i < MAXBASE; i++) { refBase[i] = *aaCode1[i]; refBase2[i] = *aaCode2[i]; } conn2= hAllocConn(database); outf = mustOpen(outWigFn, "w"); sqlSafef(query2, sizeof query2, "select seq from %s.%s where id='%s'", database, table, baseAcc); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); baseGenomeSeq = cloneString(row2[0]); baseSeqLen=strlen(baseGenomeSeq); sqlFreeResult(&sr2); sqlSafef(query2, sizeof query2, "select * from %s.%s", database, table); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); ii=0; while (row2 != NULL) { strcpy(seqId[ii], row2[0]); strcpy(seq[ii], row2[1]); ii++; row2 = sqlNextRow(sr2); } seqCnt = ii; sqlFreeResult(&sr2); hFreeConn(&conn2); /* print header */ fprintf(outf, "browser position chr1:1-9000\n"); fprintf(outf, "track type=wiggle_0\n"); jj=0; for (j=0; j max) { max = cnt[j][k]; /* keep track of the 2nd hightest */ kmax2 = kmax; kmax = k; } } consensusSeq[j] = refBase[kmax]; if (refBase[kmax] == '-') { consensusSeq2[j] = refBase2[kmax2]; } else { consensusSeq2[j] = refBase[kmax]; } aliSeq[j] = refBase[kmax]; identity[j] = (float)max/(float)seqCnt; if (baseGenomeSeq[j] != '-') { /* protein takes up 3 bases for each AA */ fprintf(outf, "chr1 %d %d %f\n",startPos+jj*3-1, startPos+jj*3+2, identity[j]); jj++; } } fclose(outf); consensusSeq[baseSeqLen] = '\0'; consensusSeq2[baseSeqLen] = '\0'; outf2 = mustOpen(outConsFn, "w"); fprintf(outf2, ">%s Protein MSA Consensus Sequence\n", table); fprintf(outf2, "%s\n", consensusSeq2); fclose(outf2); } int main(int argc, char *argv[]) /* Process command line. */ { int ia; char *database, *table; char *baseAcc; int startPos; char *outFn, *outConsFn; if (argc != 7) usage(); ia=1; database = argv[ia]; ia++; table = argv[ia]; ia++; baseAcc = argv[ia]; ia++; startPos = atoi(argv[ia]); ia++; outFn = argv[ia]; ia++; outConsFn = argv[ia]; gsidAaMsa(database, table, baseAcc, startPos, outFn, outConsFn); return 0; }