6ff0c3e3a83b03257780785c6366ac492a468e9e
kent
  Fri Jan 1 16:36:46 2021 -0800
Freen helps me understand how to read strings from heartcellatlas hdf5 files.

diff --git src/hg/oneShot/freen/freen.c src/hg/oneShot/freen/freen.c
index ae8f201..e208ceb 100644
--- src/hg/oneShot/freen/freen.c
+++ src/hg/oneShot/freen/freen.c
@@ -1,51 +1,120 @@
 /* freen - My Pet Freen.  A pet freen is actually more dangerous than a wild one. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "dystring.h"
 #include "cheapcgi.h"
 #include "jksql.h"
 #include "portable.h"
 #include "obscure.h"
 #include "localmem.h"
 #include "csv.h"
 #include "tokenizer.h"
 #include "strex.h"
 #include "hmac.h"
+#include "hdf5.h"
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
    {NULL, 0},
 };
 
 void usage()
 {
 errAbort("freen - test some hairbrained thing.\n"
          "usage:  freen input\n");
 }
 
-void freen(char *s)
+#define FULLDATASET         "obs/__categories/cell_type"
+#define DIM0            4
+
+hid_t  h5dOpen(hid_t parent, char *name)
+/* Open up a subpart or die trying */
+{
+hid_t ret = H5Dopen(parent, name, H5P_DEFAULT);
+if (ret < 0)
+   errAbort("Couldn't find %s in parent", name);
+return ret;
+}
+
+void freen(char *fileName)
 /* Test something */
 {
-uglyf("%s: %s\n", s, sqlEscapeString(s));
+
+/*
+ * Open file, dataset, and attribute.
+ */
+hid_t file = H5Fopen (fileName, H5F_ACC_RDONLY, H5P_DEFAULT);
+hid_t dset = h5dOpen (file, FULLDATASET);
+
+/*
+ * Get the datatype.
+ */
+hid_t filetype = H5Dget_type (dset);
+uglyf("filetype = %d\n", filetype);
+
+/*
+ * Get dataspace and allocate memory for read buffer.
+ */
+hid_t space = H5Dget_space (dset);
+hsize_t     dims[1] = {DIM0};
+int ndims = H5Sget_simple_extent_dims (space, dims, NULL);
+uglyf("Got %d ndims\n", ndims);
+char **rdata = (char **) malloc (dims[0] * sizeof (char *));
+
+/*
+ * Create the memory datatype.
+ */
+hid_t memtype = H5Tcopy (H5T_C_S1);
+herr_t status = H5Tset_size (memtype, H5T_VARIABLE);
+status = H5Tset_cset(memtype, H5T_CSET_UTF8) ;
+
+/*
+ * Read the data.
+ */
+status = H5Dread (dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata);
+if (status < 0)
+    errAbort("status %d, rats", status);
+
+/*
+ * Output the data to the screen.
+ */
+int i;
+for (i=0; i<dims[0]; i++)
+    printf ("%s[%d]: %s\n", "cell_type", i, rdata[i]);
+
+
+/* Close the dataset. */
+/*
+ * Close and release resources.  Note that H5Dvlen_reclaim works
+ * for variable-length strings as well as variable-length arrays.
+ * Also note that we must still free the array of pointers stored
+ * in rdata, as H5Tvlen_reclaim only frees the data these point to.
+ */
+status = H5Dvlen_reclaim (memtype, space, H5P_DEFAULT, rdata);
+free (rdata);
+status = H5Dclose (dset);
+status = H5Sclose (space);
+status = H5Tclose (filetype);
+status = H5Tclose (memtype);
+status = H5Fclose (file);
+if (status < 0)
+   warn("status %d", status);
 }
 
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 2)
     usage();
-freen("\"This is, comma in quoted\"");
-freen("This is a \" quote in the middle");
-freen("0");
-freen("Now, and, then");
+freen(argv[1]);
 return 0;
 }