4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/hg/protein/kgXref/kgXref.c src/hg/protein/kgXref/kgXref.c index e9164c0..b5ade82 100644 --- src/hg/protein/kgXref/kgXref.c +++ src/hg/protein/kgXref/kgXref.c @@ -1,175 +1,175 @@ /* kgXref - create Known Gene cross reference table kgXref.tab file */ /* Copyright (C) 2013 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "hCommon.h" #include "hdb.h" void usage() /* Explain usage and exit. */ { errAbort( "kgXref - create Known Gene cross reference table kgXref.tab file." "usage:\n" " kgXref <db> <proteinsYYMMDD> <ro_db>\n" " <db> is known Genes database under construction\n" " <proteinsYYMMDD> is protein database name \n" " <ro_db> is target organism database\n" "example: kgXref kgDB proteins040115 hg16\n"); } int main(int argc, char *argv[]) { struct sqlConnection *conn, *conn2, *conn3; char query2[256]; struct sqlResult *sr2; char **row2; char cond_str[256]; char *kgID; char *seqType; /* sequence type m=mRNA g=genomic u=undefined */ FILE *o1; char *database; char *proteinDB; char *ro_DB; char *refSeqName; char *hugoID; char *protAcc; /* protein Accession number from NCBI */ char *answer; int leg; /* marker for debugging */ char *spID, *spDisplayID, *geneSymbol, *refseqID, *desc; if (argc != 4) usage(); database = cloneString(argv[1]); proteinDB = cloneString(argv[2]); ro_DB = cloneString(argv[3]); conn = hAllocConn(database); conn2= hAllocConn(database); conn3= hAllocConn(database); o1 = mustOpen("j.dat", "w"); sqlSafef(query2, sizeof query2, "select name, proteinID from %s.knownGene;", database); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); while (row2 != NULL) { kgID = row2[0]; spDisplayID = row2[1]; refseqID = strdup(""); geneSymbol = strdup(""); desc = strdup(""); protAcc = strdup(""); sqlSafefFrag(cond_str, sizeof cond_str, "displayID='%s'", spDisplayID); spID = sqlGetField(proteinDB, "spXref3", "accession", cond_str); /* use description for the protein as default, replace it with HUGO desc if available. */ sqlSafefFrag(cond_str, sizeof cond_str, "displayID='%s'", spDisplayID); desc = sqlGetField(proteinDB, "spXref3", "description", cond_str); sqlSafefFrag(cond_str, sizeof cond_str, "name='%s' and seqType='g'", kgID); seqType = sqlGetField(database, "knownGeneLink", "seqType", cond_str); if (seqType != NULL) { leg = 1; /* special processing for RefSeq DNA based genes */ sqlSafefFrag(cond_str, sizeof cond_str, "mrnaAcc = '%s'", kgID); refSeqName = sqlGetField(ro_DB, "refLink", "name", cond_str); if (refSeqName != NULL) { geneSymbol = cloneString(refSeqName); refseqID = kgID; sqlSafefFrag(cond_str, sizeof cond_str, "mrnaAcc = '%s'", kgID); desc = sqlGetField(ro_DB, "refLink", "product", cond_str); sqlSafefFrag(cond_str, sizeof cond_str, "mrnaAcc='%s'", refseqID); answer = sqlGetField(ro_DB, "refLink", "protAcc", cond_str); if (answer != NULL) { protAcc = strdup(answer); } } } else { sqlSafefFrag(cond_str, sizeof cond_str, "displayID = '%s'", spDisplayID); hugoID = sqlGetField(proteinDB, "spXref3", "hugoSymbol", cond_str); if (!((hugoID == NULL) || (*hugoID == '\0')) ) { leg = 21; geneSymbol = cloneString(hugoID); sqlSafefFrag(cond_str, sizeof cond_str, "displayID = '%s'", spDisplayID); desc = sqlGetField(proteinDB, "spXref3", "hugoDesc", cond_str); } sqlSafefFrag(cond_str, sizeof cond_str, "mrna = '%s'", kgID); answer = sqlGetField(database, "mrnaRefseq", "refseq", cond_str); if (answer != NULL) { leg = 22; refseqID = strdup(answer); sqlSafefFrag(cond_str, sizeof cond_str, "mrnaAcc='%s'", refseqID); answer = sqlGetField(ro_DB, "refLink", "protAcc", cond_str); if (answer != NULL) { protAcc = strdup(answer); } } if (strlen(geneSymbol) == 0) { leg = 23; if (strlen(refseqID) != 0) { sqlSafefFrag(cond_str, sizeof cond_str, "mrnaAcc = '%s'", refseqID); answer = sqlGetField(ro_DB, "refLink", "name", cond_str); if (answer != NULL) { leg = 24; geneSymbol = strdup(answer); } } } } /* fix missing fields */ if (strlen(refseqID) == 0) { /* printf("%3d %s reseqID is empty.\n", leg, kgID);*/ } if (strlen(geneSymbol) == 0) { /* printf("%3d %s geneSymbol is empty.\n", leg, kgID);fflush(stdout);*/ geneSymbol = strdup(kgID); } if (strlen(desc) == 0) { /* printf("%3d %s desc is empty.\n", leg, kgID);fflush(stdout);*/ desc = strdup("N/A"); } fprintf(o1, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", kgID, kgID, spID, spDisplayID, geneSymbol, refseqID, protAcc, desc); row2 = sqlNextRow(sr2); } fclose(o1); hFreeConn(&conn); hFreeConn(&conn2); hFreeConn(&conn3); mustSystem("cat j.dat|sort|uniq >kgXref.tab"); mustSystem("rm j.dat"); return(0); }