1 | #!/usr/bin/env python |
---|
2 | |
---|
3 | ''' |
---|
4 | Print the structure of an HDF5 file to stdout |
---|
5 | |
---|
6 | $Id: h5toText.py 520 2011-05-03 15:50:38Z jemian $ |
---|
7 | ''' |
---|
8 | |
---|
9 | |
---|
10 | ########### SVN repository information ################### |
---|
11 | # $Date: 2011-05-03 15:50:38 +0000 (Tue, 03 May 2011) $ |
---|
12 | # $Author: jemian $ |
---|
13 | # $Revision: 520 $ |
---|
14 | # $URL: hdf5_exchange/h5py_examples/src/h5toText/h5toText.py $ |
---|
15 | # $Id: h5toText.py 520 2011-05-03 15:50:38Z jemian $ |
---|
16 | ########### SVN repository information ################### |
---|
17 | |
---|
18 | |
---|
19 | import h5py |
---|
20 | import os |
---|
21 | import sys |
---|
22 | import getopt |
---|
23 | |
---|
24 | |
---|
25 | class H5toText(object): |
---|
26 | ''' |
---|
27 | Example usage showing default display:: |
---|
28 | |
---|
29 | mc = H5toText(filename) |
---|
30 | mc.array_items_shown = 5 |
---|
31 | mc.report() |
---|
32 | ''' |
---|
33 | filename = None |
---|
34 | requested_filename = None |
---|
35 | isNeXus = False |
---|
36 | array_items_shown = 5 |
---|
37 | |
---|
38 | def __init__(self, filename, makeReport = False): |
---|
39 | ''' Constructor ''' |
---|
40 | self.requested_filename = filename |
---|
41 | if os.path.exists(filename): |
---|
42 | self.filename = filename |
---|
43 | self.isNeXus = self.testIsNeXus() |
---|
44 | if makeReport: |
---|
45 | self.report() |
---|
46 | |
---|
47 | def report(self): |
---|
48 | ''' reporter ''' |
---|
49 | if self.filename == None: return |
---|
50 | f = h5py.File(self.filename, 'r') |
---|
51 | txt = self.filename |
---|
52 | if self.isNeXus: |
---|
53 | txt += ":NeXus data file" |
---|
54 | self.showGroup(f, txt, indentation = "") |
---|
55 | f.close() |
---|
56 | |
---|
57 | def testIsNeXus(self): |
---|
58 | ''' test if the selected HDF5 file is a NeXus file ''' |
---|
59 | result = False |
---|
60 | try: |
---|
61 | f = h5py.File(self.filename, 'r') |
---|
62 | for value in f.itervalues(): |
---|
63 | if str(type(value)) in ("<class 'h5py.highlevel.Group'>"): |
---|
64 | if 'NX_class' in value.attrs: |
---|
65 | v = value.attrs['NX_class'] |
---|
66 | if type(v) == type("a string"): |
---|
67 | if v == 'NXentry': |
---|
68 | result = True |
---|
69 | break |
---|
70 | f.close() |
---|
71 | except: |
---|
72 | pass |
---|
73 | return result |
---|
74 | |
---|
75 | def showGroup(self, obj, name, indentation = " "): |
---|
76 | '''print the contents of the group''' |
---|
77 | nxclass = "" |
---|
78 | if 'NX_class' in obj.attrs: |
---|
79 | class_attr = obj.attrs['NX_class'] |
---|
80 | nxclass = ":" + str(class_attr) |
---|
81 | print indentation + name + nxclass |
---|
82 | self.showAttributes(obj, indentation) |
---|
83 | group_equivalents = ("<class 'h5py.highlevel.File'>", "<class 'h5py.highlevel.Group'>") |
---|
84 | # show datasets (and links) first |
---|
85 | for itemname in sorted(obj): |
---|
86 | value = obj[itemname] |
---|
87 | if str(type(value)) not in group_equivalents: |
---|
88 | self.showDataset(value, itemname, indentation = indentation+" ") |
---|
89 | # then show things that look like groups |
---|
90 | for itemname in sorted(obj): |
---|
91 | value = obj[itemname] |
---|
92 | if str(type(value)) in group_equivalents: |
---|
93 | self.showGroup(value, itemname, indentation = indentation+" ") |
---|
94 | |
---|
95 | def showAttributes(self, obj, indentation = " "): |
---|
96 | '''print any attributes''' |
---|
97 | for name, value in obj.attrs.iteritems(): |
---|
98 | print "%s @%s = %s" % (indentation, name, str(value)) |
---|
99 | |
---|
100 | def showDataset(self, dset, name, indentation = " "): |
---|
101 | '''print the contents and structure of a dataset''' |
---|
102 | shape = dset.shape |
---|
103 | if self.isNeXus: |
---|
104 | if "target" in dset.attrs: |
---|
105 | if dset.attrs['target'] != dset.name: |
---|
106 | print "%s%s --> %s" % (indentation, name, dset.attrs['target']) |
---|
107 | return |
---|
108 | txType = self.getType(dset) |
---|
109 | txShape = self.getShape(dset) |
---|
110 | if shape == (1,): |
---|
111 | value = " = %s" % str(dset[0]) |
---|
112 | print "%s%s:%s%s%s" % (indentation, name, txType, txShape, value) |
---|
113 | self.showAttributes(dset, indentation) |
---|
114 | else: |
---|
115 | print "%s%s:%s%s = __array" % (indentation, name, txType, txShape) |
---|
116 | self.showAttributes(dset, indentation) # show these before __array |
---|
117 | if self.array_items_shown > 2: |
---|
118 | value = self.formatArray(dset, indentation + ' ') |
---|
119 | print "%s %s = %s" % (indentation, "__array", value) |
---|
120 | else: |
---|
121 | print "%s %s: %s" % (indentation, "__array", "not shown") |
---|
122 | |
---|
123 | def getType(self, obj): |
---|
124 | ''' get the storage (data) type of the dataset ''' |
---|
125 | t = str(obj.dtype) |
---|
126 | if t[0:2] == '|S': |
---|
127 | t = 'char[%s]' % t[2:] |
---|
128 | if self.isNeXus: |
---|
129 | t = 'NX_' + t.upper() |
---|
130 | return t |
---|
131 | |
---|
132 | def getShape(self, obj): |
---|
133 | ''' return the shape of the HDF5 dataset ''' |
---|
134 | s = obj.shape |
---|
135 | l = [] |
---|
136 | for dim in s: |
---|
137 | l.append(str(dim)) |
---|
138 | if l == ['1']: |
---|
139 | result = "" |
---|
140 | else: |
---|
141 | result = "[%s]" % ",".join(l) |
---|
142 | return result |
---|
143 | |
---|
144 | def formatArray(self, obj, indentation = ' '): |
---|
145 | ''' nicely format an array up to rank=5 ''' |
---|
146 | shape = obj.shape |
---|
147 | r = "" |
---|
148 | if len(shape) in (1, 2, 3, 4, 5): |
---|
149 | r = self.formatNdArray(obj, indentation + ' ') |
---|
150 | if len(shape) > 5: |
---|
151 | r = "### no arrays for rank > 5 ###" |
---|
152 | return r |
---|
153 | |
---|
154 | def decideNumShown(self, n): |
---|
155 | ''' determine how many values to show ''' |
---|
156 | if self.array_items_shown != None: |
---|
157 | if n > self.array_items_shown: |
---|
158 | n = self.array_items_shown - 2 |
---|
159 | return n |
---|
160 | |
---|
161 | def formatNdArray(self, obj, indentation = ' '): |
---|
162 | ''' return a list of lower-dimension arrays, nicely formatted ''' |
---|
163 | shape = obj.shape |
---|
164 | rank = len(shape) |
---|
165 | if not rank in (1, 2, 3, 4, 5): return None |
---|
166 | n = self.decideNumShown( shape[0] ) |
---|
167 | r = [] |
---|
168 | for i in range(n): |
---|
169 | if rank == 1: item = obj[i] |
---|
170 | if rank == 2: item = self.formatNdArray(obj[i, :]) |
---|
171 | if rank == 3: item = self.formatNdArray(obj[i, :, :], indentation + ' ') |
---|
172 | if rank == 4: item = self.formatNdArray(obj[i, :, :, :], indentation + ' ') |
---|
173 | if rank == 5: item = self.formatNdArray(obj[i, :, :, :, :], indentation + ' ') |
---|
174 | r.append( item ) |
---|
175 | if n < shape[0]: |
---|
176 | # skip over most |
---|
177 | r.append("...") |
---|
178 | # get the last one |
---|
179 | if rank == 1: item = obj[-1] |
---|
180 | if rank == 2: item = self.formatNdArray(obj[-1, :]) |
---|
181 | if rank == 3: item = self.formatNdArray(obj[-1, :, :], indentation + ' ') |
---|
182 | if rank == 4: item = self.formatNdArray(obj[-1, :, :, :], indentation + ' ') |
---|
183 | if rank == 5: item = self.formatNdArray(obj[-1, :, :, :, :], indentation + ' ') |
---|
184 | r.append( item ) |
---|
185 | if rank == 1: |
---|
186 | s = str( r ) |
---|
187 | else: |
---|
188 | s = "[\n" + indentation + ' ' |
---|
189 | s += ("\n" + indentation + ' ').join(r) |
---|
190 | s += "\n" + indentation + "]" |
---|
191 | return s |
---|
192 | |
---|
193 | |
---|
194 | if __name__ == '__main__': |
---|
195 | limit = 5 |
---|
196 | filelist = [] |
---|
197 | filelist.append('../Create/example1.hdf5') |
---|
198 | filelist.append('../Create/example2.hdf5') |
---|
199 | filelist.append('../Create/example3.hdf5') |
---|
200 | filelist.append('../Create/example4.hdf5') |
---|
201 | filelist.append('../../../NeXus/definitions/trunk/manual/examples/h5py/prj_test.nexus.hdf5') |
---|
202 | filelist.append('../../../NeXus/definitions/exampledata/code/hdf5/dmc01.h5') |
---|
203 | filelist.append('../../../NeXus/definitions/exampledata/code/hdf5/dmc02.h5') |
---|
204 | filelist.append('../../../NeXus/definitions/exampledata/code/hdf5/focus2007n001335.hdf') |
---|
205 | filelist.append('../../../NeXus/definitions/exampledata/code/hdf5/NXtest.h5') |
---|
206 | filelist.append('../../../NeXus/definitions/exampledata/code/hdf5/sans2009n012333.hdf') |
---|
207 | filelist.append('../Create/simple5.nxs') |
---|
208 | filelist.append('../Create/bad.h5') |
---|
209 | #filelist = [] |
---|
210 | #filelist.append('testG.h5') |
---|
211 | #filelist.append('testG-pj.h5') |
---|
212 | if len(sys.argv) > 1: |
---|
213 | try: |
---|
214 | opts, args = getopt.getopt(sys.argv[1:], "n:") |
---|
215 | except: |
---|
216 | print |
---|
217 | print "SVN: $Id: h5toText.py 520 2011-05-03 15:50:38Z jemian $" |
---|
218 | print "usage: ", sys.argv[0], " [-n ##] HDF5_file_name [another_HDF5_file_name]" |
---|
219 | print " -n ## : limit number of displayed array items to ## (must be 3 or more or 'None')" |
---|
220 | print |
---|
221 | for item in opts: |
---|
222 | if item[0] == "-n": |
---|
223 | if item[1].lower() == "none": |
---|
224 | limit = None |
---|
225 | else: |
---|
226 | limit = int(item[1]) |
---|
227 | filelist = args |
---|
228 | for item in filelist: |
---|
229 | mc = H5toText(item) |
---|
230 | mc.array_items_shown = limit |
---|
231 | mc.report() |
---|