4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/hg/protein/getInterProFa/getInterProFa.c src/hg/protein/getInterProFa/getInterProFa.c index c4817a0..a8116d0 100644 --- src/hg/protein/getInterProFa/getInterProFa.c +++ src/hg/protein/getInterProFa/getInterProFa.c @@ -1,121 +1,121 @@ /* getInterProFa - get AA sequence of InterPro domains for a specific organism */ /* Copyright (C) 2013 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "hCommon.h" #include "hdb.h" void usage() /* Explain usage and exit. */ { errAbort( "getInterProFa - get AA sequence of InterPro domains for a specific organism\n" "usage:\n" " getInterProFa tableName outFileName\n" " tableName is the InterPro xref table name for the specific organism\n" " outFileName is the output file name\n" "example: getInterProFa interProXrefHiv1 interProHiv1.fa\n"); } int main(int argc, char *argv[]) { struct sqlConnection *conn2, *conn3, *conn4; char query2[256], query3[256], query4[256]; struct sqlResult *sr2, *sr3, *sr4; char **row2, **row3, **row4; char *aaSeq; char *accession; char *desc; FILE *outFile; char *outFileName; char *tableName; char *interProId; int maxLen, len; char *maxAcc = NULL; char *start, *end; char *maxStart=NULL, *maxEnd=NULL, *maxDesc = NULL; if (argc != 3) usage(); tableName = argv[1]; outFileName = argv[2]; outFile = mustOpen(outFileName, "w"); conn2 = hAllocConn(); conn3 = hAllocConn(); conn4 = hAllocConn(); /* loop over all InterPro entry for the specific InterPro xref table for this organism */ sqlSafef(query2, sizeof query2, "select distinct interProId from proteome.%s", tableName); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { interProId = row2[0]; /* get all start/end positions of this InterPro domain */ sqlSafef(query3, sizeof query3, "select accession, start, end, description from proteome.%s where interProId='%s'", tableName, interProId); sr3 = sqlMustGetResult(conn3, query3); row3 = sqlNextRow(sr3); maxLen = 0; while (row3 != NULL) { accession = row3[0]; start = row3[1]; end = row3[2]; desc = row3[3]; len = atoi(end) - atoi(start) + 1; /* remember the max len, so far */ if (len > maxLen) { maxLen = len; maxAcc = cloneString(accession); maxStart = cloneString(start); maxEnd = cloneString(end); maxDesc = cloneString(desc); } row3 = sqlNextRow(sr3); } sqlFreeResult(&sr3); /* fetch the corresponding AA sequence of the domain having the max length */ sqlSafef(query4, sizeof query4, "select substring(val, %s, %d) from uniProt.protein where acc='%s'", maxStart, maxLen, maxAcc); sr4 = sqlMustGetResult(conn4, query4); row4 = sqlNextRow(sr4); if (row4 == NULL) { fprintf(stderr, "%s %s missing, exiting ...\n", maxAcc, interProId); exit(1); } else { aaSeq = row4[0]; if (maxLen >= 18) { fprintf(outFile, ">%s %s\n", interProId, maxDesc); fprintf(outFile, "%s\n", aaSeq);fflush(stdout); } } sqlFreeResult(&sr4); row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); hFreeConn(&conn2); hFreeConn(&conn3); hFreeConn(&conn4); fclose(outFile); return(0); }