4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/regulate/regClusterAttachMetadataToTableOfTables/regClusterAttachMetadataToTableOfTables.c src/hg/regulate/regClusterAttachMetadataToTableOfTables/regClusterAttachMetadataToTableOfTables.c
index 78bb7ee..067fec4 100644
--- src/hg/regulate/regClusterAttachMetadataToTableOfTables/regClusterAttachMetadataToTableOfTables.c
+++ src/hg/regulate/regClusterAttachMetadataToTableOfTables/regClusterAttachMetadataToTableOfTables.c
@@ -1,210 +1,210 @@
 /* regClusterAttachMetadataToTableOfTables - Try and find metadata (cell line, antibody, etc) for tables - using metaDb first, and if no metaDb object then trying to parse it out of file name. */
 
 /* Copyright (C) 2013 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "jksql.h"
 #include "cv.h"
 
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "regClusterAttachMetadataToTableOfTables - Try and find metadata (cell line, antibody, etc)\n"
   "for tables - using metaDb first, and if no metaDb object then trying to parse it out of\n"
   "file name\n"
   "usage:\n"
   "   regClusterAttachMetadataToTableOfTables database partial.table output.table\n"
   "options:\n"
   "   -antibodyTarget - substitute target (from cv.ra) for antibody\n"
   );
 }
 
 boolean antibodyTarget = FALSE;
 
 static struct optionSpec options[] = {
    {"antibodyTarget", OPTION_BOOLEAN},
    {NULL, 0}
 };
 
 boolean getMetaFromMetaDb(struct sqlConnection *conn,
 	char *obj, char **retCell, char **retAntibody, char **retTreatment, char **retLab)
 /* Look in metadata for object and return cell, antibody and treatment from it. */
 {
 char query[256];
 struct hash *cvTerm = NULL;
 
 sqlSafef(query, sizeof(query), "select val from metaDb where obj='%s' and var='cell'", obj);
 *retCell = sqlQuickString(conn, query);
 sqlSafef(query, sizeof(query), "select val from metaDb where obj='%s' and var='antibody'", obj);
 *retAntibody = sqlQuickString(conn, query);
 if (antibodyTarget)
     {
     cvTerm = (struct hash *)cvOneTermHash(CV_TERM_ANTIBODY, *retAntibody);
     if (cvTerm)
         {
         *retAntibody = hashOptionalVal(cvTerm, CV_TARGET, *retAntibody);
         }
     }
 sqlSafef(query, sizeof(query), "select val from metaDb where obj='%s' and var='treatment'", obj);
 *retTreatment = sqlQuickString(conn, query);
 sqlSafef(query, sizeof(query), "select val from metaDb where obj='%s' and var='lab'", obj);
 *retLab = sqlQuickString(conn, query);
 return *retCell != NULL || *retAntibody != NULL || *retTreatment != NULL;
 }
 
 char *firstUpper(char *s)
 /* Return pointer to first upper case letter in string */
 {
 for (;;)
     {
     char c = *s;
     if (c == 0)
         return NULL;
     else if (isupper(c))
         return s;
     ++s;
     }
 }
 
 void getMetaFromObjName(char *obj, char **retCell, char **retAntibody, char **retTreatment,
 	char **retLab)
 /* Look in metadata for object and return cell, antibody and treatment from it. */
 {
 char *cell = NULL, *antibody = NULL, *treatment = NULL, *lab = NULL;
 
 /* Skip past first bits. */
 char *pattern = "wgEncodeHaibTfbs";
 int patLen = strlen(pattern);
 if (!startsWith(pattern, obj))
    errAbort("getMetaFromObjName can't handle %s since it doesn't begin with %s", obj, pattern);
 
 /* If we are wgEncodeHaib prefix then we must be HudsonAlpha */
 lab = "HudsonAlpha";
 
 /* Rely on CamelCasing to pick out cell/antibody/treatment. */
 char *cellStart = obj+patLen;
 char *abStart = firstUpper(cellStart+1);
 assert(abStart != NULL);
 char *treatmentStart = firstUpper(abStart+1);
 assert(treatmentStart != NULL);
 
 /* Look up cell */
 char *cellId = cloneStringZ(cellStart, abStart - cellStart);
 if (sameString(cellId, "Gm12878"))
     cell = "GM12878";
 else if (sameString(cellId, "Gm12891"))
     cell = "GM12891";
 else if (sameString(cellId, "Gm12892"))
     cell = "GM12892";
 else if (sameString(cellId, "H1hesc"))
     cell = "H1-hESC";
 else if (sameString(cellId, "Hepg2"))
     cell = "HepG2";
 else if (sameString(cellId, "K562"))
     cell = "K562";
 else if (sameString(cellId, "A549"))
     cell = "A549";
 else
     errAbort("Unrecognized cellId %s in %s", cellId, obj);
 
 /* Look up antibody */
 char *abId = cloneStringZ(abStart, treatmentStart - abStart);
 if (sameString(abId, "Bcl3"))
     antibody = "BCL3";
 else if (sameString(abId, "Ebf"))
     antibody = "EBF";
 else if (sameString(abId, "Egr1"))
     antibody = "Egr-1";
 else if (sameString(abId, "Irf4"))
     antibody = "IRF4_(M-17)";
 else if (sameString(abId, "Oct2"))
     antibody = "Oct-2";	 // This is just a guess, Antibody not even on wiki yet
 else if (sameString(abId, "Pou2f2"))
     antibody = "POU2F2"; // Again a guess. Ironically Oct-2 and POU2F2 are same gene
 else if (sameString(abId, "Sin3ak20"))
     antibody = "Sin3Ak-20";
 else if (sameString(abId, "Taf1"))
     antibody = "TAF1";
 else if (sameString(abId, "Yy1"))
     antibody = "YY1_(C-20)";
 else if (sameString(abId, "Pol24h8"))
     antibody = "Pol2-4H8";
 else if (sameString(abId, "Pol2"))
     antibody = "Pol2";
 else if (sameString(abId, "Nrsf"))
     antibody = "NRSF";
 else if (sameString(abId, "P300"))
     antibody = "p300";
 else if (sameString(abId, "Atf3"))
     antibody = "ATF3";
 else if (sameString(abId, "Gabp"))
     antibody = "GABP";
 else if (sameString(abId, "Max"))
     antibody = "Max";
 else
     errAbort("Unrecognized abId %s in %s", abId, obj);
 
 /* Looks like all of the HAIB missing metadata have no treatment. */
 treatment = "None";
 
 /* Clean up, set return variables, and go home. */
 freez(&cellId);
 freez(&abId);
 *retCell = cell;
 *retAntibody = antibody;
 *retTreatment = treatment;
 }
 
 void regClusterAttachMetadataToTableOfTables(char *database, char *partialTable, char *outputTable)
 /* regClusterAttachMetadataToTableOfTables - Try and find metadata (cell line, antibody, etc) for 
  * tables - using metaDb first, and if no metaDb object then trying to parse it out of file name. */
 {
 struct sqlConnection *conn = sqlConnect(database);
 struct lineFile *lf = lineFileOpen(partialTable, TRUE);
 FILE *f = mustOpen(outputTable, "w");
 char *row[7];
 
 while (lineFileRow(lf, row))
     {
     /* Get object ID and attempt to find basic experimental variables from metadata. */
     char *obj = row[6];
     char *cell = NULL, *antibody=NULL, *treatment=NULL, *lab = NULL;
     if (!getMetaFromMetaDb(conn, obj, &cell, &antibody, &treatment, &lab))
         {
         verbose(3, "Can't get metadata for %s from metaDb, parsing filename\n", obj); 
         getMetaFromObjName(obj, &cell, &antibody, &treatment, &lab);
         }
 
     /* Write out first fields unchanged, and append our new fields. */
     int i;
     for (i=0; i<7; ++i)
     	fprintf(f, "%s\t", row[i]);
     fprintf(f, "%s\t", naForNull(cell));
     fprintf(f, "%s\t", naForNull(antibody));
     if (treatment == NULL)
         treatment = "None";
     fprintf(f, "%s\t", treatment);
     fprintf(f, "%s\n", naForNull(lab));
     }
 
 /* Clean up and go home. */
 carefulClose(&f);
 lineFileClose(&lf);
 sqlDisconnect(&conn);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 4)
     usage();
 antibodyTarget = optionExists("antibodyTarget");
 regClusterAttachMetadataToTableOfTables(argv[1], argv[2], argv[3]);
 return 0;
 }