232916a177e494f8e1b3543c7ed71d37b2f96191
angie
  Fri Sep 5 17:21:37 2025 -0700
Handle .gz protobuf file

diff --git src/hg/hgPhyloPlace/parsimonyProto.c src/hg/hgPhyloPlace/parsimonyProto.c
index adaad5cef26..8db04322296 100644
--- src/hg/hgPhyloPlace/parsimonyProto.c
+++ src/hg/hgPhyloPlace/parsimonyProto.c
@@ -1,24 +1,25 @@
 /* Parse a protobuf-serialized file created by Yatish Turakhia's usher program
  * with the --save-mutation-annotated-tree into a phylogenetic tree annotated with SNVs.
  * https://github.com/yatisht/usher/ file parsimony.proto defines the file's data structure. */
 
 /* Copyright (C) 2020 The Regents of the University of California */
 
 #include "common.h"
 #include "hash.h"
 #include "obscure.h"
+#include "pipeline.h"
 #include "parsimonyProto.h"
 #include "protobuf.h"
 
 /* Data structures corresponding to Yatish's parsimony.proto spec */
 // struct singleNucChange defined in parsimonyProto.h corresponds to mutation message
 
 struct mutationList
 /* Data for one node in a phylogenetic tree: a list of single-nucleotide mutations
  * associated with this node by parsimonious assignment. */
     {
     struct mutationList *next;
     struct singleNucChange *muts;
     };
 
 struct parsimonyData
@@ -200,31 +201,39 @@
 if (node->numEdges > 0)
     (*pINodeNum)++;
 int i;
 for (i = 0;  i < node->numEdges;  i++)
     treeAddVariants(node->edges[i], nodeMutations, nodeCount, pNodeNum, pINodeNum, nodeHash);
 if (isNotEmpty(node->ident->name))
     hashAdd(nodeHash, node->ident->name, node);
 }
 
 struct mutationAnnotatedTree *parseParsimonyProtobuf(char *savedAssignmentsFile)
 /* Return an annotated phylogenetic tree loaded from savedAssignments file.  Each node->priv
  * points to struct singleNucChange list (NULL if no change is associated with that node).
  * condensedNodes is a hash mapping names of condensed nodes to slName lists of
  * sample IDs that were condensed.  nodeHash is a hash mapping node names to nodes. */
 {
-FILE *f = mustOpen(savedAssignmentsFile, "r");
+FILE *f = NULL;
+if (endsWith(savedAssignmentsFile, ".gz"))
+    {
+    static char *command[] = {"gzip", "-dc", NULL};
+    struct pipeline *pl = pipelineOpen1(command, pipelineRead|pipelineSigpipe, savedAssignmentsFile, NULL, 0);
+    f = pipelineFile(pl);
+    }
+else
+    f = mustOpen(savedAssignmentsFile, "r");
 
 // Hand-compiled from ~angie/github/yatish_strain_phylogenetics/parsimony.proto Aug. 10 2020...
 // message mut {
 //     int32 position = 1;
 //     int32 ref_nuc = 2;
 //     int32 par_nuc = 3;
 //     repeated int32 mut_nuc = 4;
 // }
 struct protobufFieldDef chromField = { NULL, "chromosome", 5, pbdtString, NULL, FALSE };
 struct protobufFieldDef mutNucField = { &chromField, "mut_nuc", 4, pbdtInt32, NULL, TRUE };
 struct protobufFieldDef parNucField = { &mutNucField, "par_nuc", 3, pbdtInt32, NULL, FALSE };
 struct protobufFieldDef refNucField = { &parNucField, "ref_nuc", 2, pbdtInt32, NULL, FALSE };
 struct protobufFieldDef positionField = { &refNucField, "position", 1, pbdtInt32, NULL, FALSE };
 struct protobufDef mutDef = { NULL, "mut", &positionField };
 // message mutation_list {