4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/protein/lib/spMapper.c src/hg/protein/lib/spMapper.c
index 2593483..1a9c591 100644
--- src/hg/protein/lib/spMapper.c
+++ src/hg/protein/lib/spMapper.c
@@ -1,278 +1,278 @@
 /* spMapper - map swissprot accessions to KnownGene ids.  Intended to use in
  * building gene sorter tables.  Can optionally map uniref entry ids. */
 
 /* Copyright (C) 2011 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "spMapper.h"
 #include "localmem.h"
 #include "hash.h"
 #include "jksql.h"
 #include "hdb.h"
 #include "spKgMap.h"
 #include "unirefTbl.h"
 #include "linefile.h"
 #include "hgRelate.h"
 #include "verbose.h"
 #include "obscure.h"
 
 
 /* values for noMapTbl */
 char noUnirefMapping = 'R';           /* indicates no uniref mapping */
 char noKGMapping = 'K';               /* indicates no kg mapping */
 
 static struct kgEntry *kgEntryAdd(struct spMapper *sm, struct hashEl* kgHel, struct kgPair *kgPair)
 /* Add a new kgEntry to a hash chain */
 {
 struct kgEntry *kgEntry;
 lmAllocVar(sm->kgPairMap->lm, kgEntry);
 kgEntry->id = kgHel->name;
 kgEntry->kgPair = kgPair;
 slAddHead((struct kgEntry**)&kgHel->val, kgEntry);
 return kgEntry;
 }
 
 static void kgPairAdd(struct spMapper *sm, struct hashEl* kg1Hel,
                       struct hashEl* kg2Hel, float score)
 /* add a new kgPair object to the hash */
 {
 struct kgPair *kgPair;
 
 lmAllocVar(sm->kgPairMap->lm, kgPair);
 kgPair->score = score;
 slAddHead(&sm->kgPairList, kgPair);
 
 /* link both kgId objects in struct */
 kgPair->kg1Entry = kgEntryAdd(sm, kg1Hel, kgPair);
 kgPair->kg2Entry = kgEntryAdd(sm, kg2Hel, kgPair);
 }
 
 static struct kgPair *kgPairFind(struct hashEl* queryHel, struct hashEl* targetHel)
 /* find a kgPair object for a pair of kg ids, or NULL if not in table */
 {
 struct kgEntry *entry;
 
 /* search for an entry for the pair.  Ok to compare string ptrs here, since
  * they are in same local memory.  Don't know order in structure, so must
  * check both ways.
  */
 for (entry = queryHel->val; entry != NULL; entry = entry->next)
     {
     if (((entry->kgPair->kg1Entry->id == queryHel->name)
          && (entry->kgPair->kg2Entry->id == targetHel->name))
         || ((entry->kgPair->kg1Entry->id == targetHel->name)
             && (entry->kgPair->kg2Entry->id == queryHel->name)))
         return entry->kgPair;
     }
 return NULL;
 }
 
 static void kgPairSave(struct spMapper *sm, char *qSpId, char *tSpId,
                        char *queryId, char *targetId, float score)
 /* save a bi-directional e-value for a query/target pair */
 {
 struct hashEl *queryHel = hashStore(sm->kgPairMap, queryId);
 struct hashEl *targetHel = hashStore(sm->kgPairMap, targetId);
 struct kgPair *kgPair = kgPairFind(queryHel, targetHel);
 if (kgPair != NULL)
     {
     if (((sm->scoreDir < 0) && (score < kgPair->score))
         || ((sm->scoreDir > 0) && (score > kgPair->score)))
         {
         kgPair->score = score; /* better score */
         verbose(3, "updating: %s <> %s -> %s <> %s: %0.3f\n", qSpId, tSpId,
                 queryId, targetId, score);
         }
     }
 else
     {
     kgPairAdd(sm, queryHel, targetHel, score);
     verbose(3, "mapping: %s <> %s -> %s <> %s: %0.3f\n", qSpId, tSpId,
             queryId, targetId, score);
     }
 }
 
 static void savePairs(struct spMapper *sm, char *qSpId, char *tSpId,
                       struct slName *qIds, struct slName *tIds, float score)
 /* add all combinations of ids. */
 {
 struct slName *qId, *tId;
 
 /* transform to common structure */
 for (qId = qIds; qId != NULL; qId = qId->next)
     {
     for (tId = tIds; tId != NULL; tId = tId->next)
         kgPairSave(sm, qSpId, tSpId, qId->name, tId->name, score);
     }
 }
 
 struct spMapper *spMapperNew(struct sqlConnection *conn, int scoreDir,
                              char *unirefFile, char *orgFilter)
 /* construct a new spMapper object. scoreDir is 1 if higher score is better,
  * -1 if lower score is better If unirefFile is not null, this is a tab file
  * reformatting of the uniref xml file with member data extracted (see
  * uniref.h).  If orgFilter is not null, then only uniref members from this
  * organism are loaded.
  */
 {
 struct spMapper *sm;
 AllocVar(sm);
 sm->scoreDir = scoreDir;
 sm->spKgMap = spKgMapNew(conn);
 if (unirefFile != NULL)
     sm->unirefTbl = unirefTblNew(unirefFile, orgFilter);
 sm->kgPairMap = hashNew(23);
 sm->noMapTbl = hashNew(22);
 return sm;
 }
 
 void spMapperFree(struct spMapper **smPtr)
 /* free smMapper object */
 {
 struct spMapper *sm = *smPtr;
 if (sm != NULL)
     {
     spKgMapFree(&sm->spKgMap);
     unirefTblFree(&sm->unirefTbl);
     hashFree(&sm->noMapTbl);
     freez(smPtr);
     }
 }
 
 static struct slName *mapSpId(struct spMapper *sm, char *spId)
 /* get the list of ids associated with a swissprot.  If the swssprot acc has
  * an isoform extension, look up with and without the extension, */
 {
 struct slName *ids = spKgMapGet(sm->spKgMap, spId);
 if (ids == NULL)
     {
     char *extPtr = strchr(spId, '-');
     if (extPtr != NULL)
         {
         *extPtr = '\0';
         ids = spKgMapGet(sm->spKgMap, spId);
         *extPtr = '-';
         }
     }
 return ids;
 }
 
 static void handleNoMapping(struct spMapper *sm, char *id, char reason)
 /* record id that can't be mapped */
 {
 struct hashEl *hel = hashStore(sm->noMapTbl, id);
 if (hel->val == NULL)
     {
     if (reason == noUnirefMapping)
         sm->noUnirefMapCnt++;
     else if (reason == noKGMapping)
         sm->noSpIdMapCnt++;
     }
 hel->val = intToPt(reason);
 }
 
 static void mapUniprot(struct spMapper *sm, char *qSpId, char *tSpId, float score)
 /* map uniprot/swissprot ids without uniref mappings */
 {
 struct slName *qIds = mapSpId(sm, qSpId);
 struct slName *tIds = mapSpId(sm, tSpId);
 
 if ((qIds == NULL) || (tIds == NULL))
     {
     if (qIds == NULL)
         handleNoMapping(sm, qSpId, noKGMapping);
     if (tIds == NULL)
         handleNoMapping(sm, tSpId, noKGMapping);
     sm->qtNoSpIdMapCnt++;
     }
 else 
     {
     savePairs(sm, qSpId, tSpId, qIds, tIds, score);
     sm->qtMapCnt++;
     }
 }
 
 static void mapUnirefEntries(struct spMapper *sm,
                              struct uniref *qUniref, struct uniref *tUniref,
                              char *qSpId, char *tSpId, float score)
 /* add all combinations of uniref entries */
 {
 struct uniref *qUniprot, *tUniprot;
 boolean qKgFound = FALSE, tKgFound = FALSE;
 
 /* build list of kg pairs associate with these two uniref entries, avoiding
  * duplicated pairs. */
 for (qUniprot = qUniref; qUniprot != NULL; qUniprot = qUniprot->next)
     {
     for (tUniprot = tUniref; tUniprot != NULL; tUniprot = tUniprot->next)
         {
         struct slName *qIds = mapSpId(sm, qUniprot->upAcc);
         struct slName *tIds = mapSpId(sm, tUniprot->upAcc);
         if (qIds != NULL)
             qKgFound = TRUE;
         if (tIds != NULL)
             tKgFound = TRUE;
         if ((qIds != NULL) && (tIds != NULL))
             savePairs(sm, qSpId, tSpId, qIds, tIds, score);
         }
     }
 if (!(qKgFound && tKgFound))
     {
     /* couldn't map to known genes */
     if (!qKgFound)
         handleNoMapping(sm, qSpId, noKGMapping);
     if (!tKgFound)
         handleNoMapping(sm, tSpId, noKGMapping);
     sm->qtNoSpIdMapCnt++;
     }
 else
     {
     sm->qtMapCnt++;
     }
 }
 
 static void mapUniref(struct spMapper *sm, char *qSpId, char *tSpId, float score)
 /* load a rankProt with uniref mappings */
 {
 struct uniref *qUniref = unirefTblGetEntryById(sm->unirefTbl, qSpId);
 struct uniref *tUniref = unirefTblGetEntryById(sm->unirefTbl, tSpId);
 
 if ((qUniref == NULL) || (tUniref == NULL))
     {
     /* couldn't map to uniref */
     if (qUniref == NULL)
         handleNoMapping(sm, qSpId, noUnirefMapping);
     if (tUniref == NULL)
         handleNoMapping(sm, tSpId, noUnirefMapping);
     sm->qtNoUnirefMapCnt++;
     }
 else
     {
     mapUnirefEntries(sm, qUniref, tUniref, qSpId, tSpId, score);
     }
 }
 
 void spMapperMapPair(struct spMapper *sm, char *qSpId, char *tSpId, float score)
 /* map a pair of swissprot/uniprot ids to one or more pairs of known gene
  * ids and add them to the table */
 {
 if (sm->unirefTbl != NULL)
     return mapUniref(sm, qSpId, tSpId, score);
 else
     return mapUniprot(sm, qSpId, tSpId, score);
 }
 
 void spMapperPrintNoMapInfo(struct spMapper *sm, char *outFile)
 /* Print missed id table. First column is id, second column is R if it wasn't
  * found in uniref, or K if it couldn't be associated with a known gene */
 {
 FILE *fh = mustOpen(outFile, "w");
 struct hashCookie cookie = hashFirst(sm->noMapTbl);
 struct hashEl *hel;
 
 while ((hel = hashNext(&cookie)) != NULL)
     fprintf(fh, "%s\t%c\n", hel->name, ptToInt(hel->val));
 
 carefulClose(&fh);
 }