Read Data¶
This example demonstrates how to read the HDF5 output of Luna using h5py and pandas.
Luna Command¶
tpx3dump process -i /Users/Ciaran/atlassian-bitbucket-pipelines-runner/temp/e71169e4-520a-5b30-a5ab-ee8a44eb5fac/build/docs/source/_static/example_data.tpx3 -o /Users/Ciaran/atlassian-bitbucket-pipelines-runner/temp/e71169e4-520a-5b30-a5ab-ee8a44eb5fac/build/docs/source/_static/example_data.hdf5 --eps-t 150ns --eps-s 1 --ctot-cut 500
Python Script¶
Python code to read HDF5 data¶
1import os, sys
2from typing import *
3import h5py # ensure you have `pip install h5py`
4import pandas as pd # ensure you have `pip install pandas`
5import numpy as np
6# on our system "EXAMPLE_DATA_HDF5" refers to the absolute path
7# to a hdf5 file generated by luna. Replace with your own!
8sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
9from env_vars_for_docs_examples import EXAMPLE_DATA_HDF5
10
11
12def load_pixel_hits(hdf5_fname: str) -> pd.DataFrame:
13 """
14 Load pixel hits data from an HDF5 file.
15
16 Parameters:
17 -----------
18 hdf5_fname : str
19 The path to the HDF5 file.
20
21 Returns:
22 --------
23 pd.DataFrame
24 A DataFrame containing the pixel hits data.
25 """
26 with h5py.File(hdf5_fname, 'r') as hdf5_file:
27 print(f"hdf5 datasets: {list(hdf5_file.keys())}")
28 pixel_hits = pd.DataFrame(hdf5_file["PixelHits"][:])
29 return pixel_hits
30
31
32def load_clusters(hdf5_fname: str) -> pd.DataFrame:
33 """
34 Load clusters data from an HDF5 file, if available.
35
36 Parameters:
37 -----------
38 hdf5_fname : str
39 The path to the HDF5 file.
40
41 Returns:
42 --------
43 pd.DataFrame
44 A DataFrame containing the clusters data.
45 """
46 with (h5py.File(hdf5_fname, 'r') as hdf5_file):
47 clusters = pd.DataFrame(hdf5_file["Clusters"][:])
48 return clusters
49
50
51
52def load_timewalk_matrix(DATA_FNAME) -> pd.DataFrame:
53 """
54 Load the timewalk matrix from an HDF5 file.
55
56 The timewalk matrix is a long form matrix with columns:
57 - CToT
58 - ToT
59 - AverageDToA
60 - SumSquareDiff
61 - Count
62 - Std (standard deviation)
63 - Sem (Standard error of mean)
64
65 Parameters:
66 -----------
67 DATA_FNAME : str
68 The path to the HDF5 file.
69
70 Returns:
71 --------
72 pd.DataFrame
73 A DataFrame containing the timewalk matrix data.
74 """
75 with h5py.File(DATA_FNAME, "r") as f:
76 if "TimewalkMatrix" in f.keys():
77 ds = f["TimewalkMatrix"]
78 timewalk_matrix = pd.DataFrame(ds[:])
79 colnames: List[Tuple[bytes]] = ds.attrs["col_names"]
80 # decode from bytes
81 colnames: List[str] = [i[0].decode() for i in colnames]
82 timewalk_matrix.columns = colnames
83 else:
84 return
85
86 integer_types = ["CToT", "ToT", "Count"]
87 timewalk_matrix[integer_types] = timewalk_matrix[integer_types].astype(np.uint32)
88 return timewalk_matrix
89
90
91def load_timewalk_lookup_table(DATA_FNAME) -> pd.DataFrame:
92 """
93 Load the timewalk lookup table from an HDF5 file.
94
95 Parameters:
96 -----------
97 DATA_FNAME : str
98 The path to the HDF5 file.
99
100 Returns:
101 --------
102 pd.DataFrame
103 A DataFrame containing the timewalk lookup table data.
104 """
105 with h5py.File(DATA_FNAME, "r") as f:
106 if "TimewalkLookupTable" in f.keys():
107 ds = f["TimewalkLookupTable"]
108 lookup_df = pd.DataFrame(ds)
109 lookup_df.columns = [i[0].decode() for i in ds.attrs["col_names"]]
110 else:
111 return None
112 # column names:
113 # ToT Average SumSquareDiff Std Count
114 lookup_df = lookup_df.dropna(axis=0, how="all")
115 integer_types = ["ToT", "Count"]
116 lookup_df[integer_types] = lookup_df[integer_types].astype(np.uint32)
117 lookup_df.set_index("ToT", inplace=True)
118
119 return lookup_df
120
121
122
123if __name__ == "__main__":
124 # 12 decimals
125 pd.set_option('display.float_format', '{:.12f}'.format)
126
127 pixel_hits = load_pixel_hits(EXAMPLE_DATA_HDF5)
128 clusters = load_clusters(EXAMPLE_DATA_HDF5)
129 timewalk_matrix = load_timewalk_matrix(EXAMPLE_DATA_HDF5)
130 timewalk_lut = load_timewalk_lookup_table(EXAMPLE_DATA_HDF5)
131
132 print("Pixel Hits: ")
133 print(pixel_hits.head(15).to_string())
134
135 print("Clusters: ")
136 print(clusters.head(15).to_string())
137
138 if timewalk_matrix is not None:
139 print("Time walk matrix: ")
140 print(timewalk_matrix.iloc[0:10, 0:10].to_string())
141
142 if timewalk_lut is not None:
143 print("Time walk look up table: ")
144 print(timewalk_lut.head(15).to_string())
Script Output¶
Example Output¶
hdf5 datasets: ['Clusters', 'ExposureTimeBoundaries', 'PixelHits', 'TimewalkLookupTable', 'TimewalkMatrix']
Pixel Hits:
toa corrected_toa cid dtoa tot x y
0 110345462734375 -1 0 0 200 140 193
1 110345462984375 -1 1 0 150 143 193
2 110345896437500 -1 2 0 1475 68 92
3 110345896453125 -1 2 15625 850 67 92
4 110345896656250 -1 2 218750 300 68 91
5 110345896890625 -1 2 453125 150 67 91
6 110346426531250 -1 3 0 800 80 80
7 110346426750000 -1 3 218750 250 80 81
8 110346426828125 -1 3 296875 250 79 80
9 110346973375000 -1 4 0 375 14 18
10 110346973406250 -1 4 31250 375 14 19
11 110349264125000 -1 5 0 375 49 5
12 110350602171875 -1 6 0 1675 146 106
13 110350602250000 -1 6 78125 425 146 105
14 110350602343750 -1 6 171875 350 145 106
Clusters:
id size min_toa max_toa ctoa corrected_ctoa sum_tot ctot x_average x_centroid y_average y_centroid x y
0 0 1 110345462734375 110345462734375 110345462734375 -1 200 200 140.000000000000 140.000000000000 193.000000000000 193.000000000000 140 140
1 1 1 110345462984375 110345462984375 110345462984375 -1 150 150 143.000000000000 143.000000000000 193.000000000000 193.000000000000 143 143
2 2 4 110345896437500 110345896890625 110345896437500 -1 2775 1475 67.500000000000 67.639639639640 91.500000000000 91.837837837838 67 68
3 3 3 110346426531250 110346426828125 110346426531250 -1 1300 800 79.666666666667 79.807692307692 80.333333333333 80.192307692308 79 80
4 4 2 110346973375000 110346973406250 110346973375000 -1 750 375 14.000000000000 14.000000000000 18.500000000000 18.500000000000 14 14
5 5 1 110349264125000 110349264125000 110349264125000 -1 375 375 49.000000000000 49.000000000000 5.000000000000 5.000000000000 49 49
6 6 4 110350602171875 110350602765625 110350602171875 -1 2600 1675 146.000000000000 145.923076923077 105.750000000000 105.836538461538 145 146
7 7 2 110351021187500 110351021312500 110351021187500 -1 1475 1125 142.500000000000 142.237288135593 0.000000000000 0.000000000000 142 142
8 8 3 110351021250000 110351021500000 110351021250000 -1 850 425 142.666666666667 142.735294117647 2.333333333333 2.235294117647 142 143
9 9 4 110353755656250 110353755796875 110353755656250 -1 1700 700 38.500000000000 38.382352941176 194.500000000000 194.632352941176 38 38
10 10 4 110354402125000 110354402250000 110354402125000 -1 2500 925 63.500000000000 63.430000000000 22.500000000000 22.360000000000 63 63
11 11 4 110355869359375 110355869484375 110355869359375 -1 2875 1250 91.500000000000 91.339130434783 28.500000000000 28.652173913043 91 91
12 12 6 110356706015625 110356706968750 110356706015625 -1 2325 925 244.000000000000 244.333333333333 207.833333333333 207.709677419355 243 245
13 13 3 110360353937500 110360354109375 110360353937500 -1 1425 850 174.333333333333 174.228070175439 188.666666666667 188.824561403509 174 174
14 14 4 110360842171875 110360842218750 110360842187500 -1 3200 975 141.500000000000 141.406250000000 106.500000000000 106.507812500000 141 141
Time walk matrix:
CToT ToT AverageDToA SumSquareDiff Count Std Sem
0 500 25 993566.187500000000 941923368960.000000000000 17 235387.453125000000 57089.843750000000
1 500 50 846507.437500000000 804314087424.000000000000 17 217514.578125000000 52755.035156250000
2 500 75 731534.125000000000 767222939648.000000000000 44 132048.796875000000 19907.105468750000
3 500 100 631304.750000000000 1042412011520.000000000000 57 135232.875000000000 17912.033203125000
4 500 125 546655.000000000000 1446529990656.000000000000 71 142736.343750000000 16939.687500000000
5 500 150 443782.218750000000 1320634417152.000000000000 97 116682.421875000000 11847.304687500000
6 500 175 358829.937500000000 556370952192.000000000000 86 80432.765625000000 8673.288085937500
7 500 200 290532.218750000000 617762848768.000000000000 101 78207.820312500000 7781.969238281250
8 500 225 230799.812500000000 554400022528.000000000000 118 68544.179687500000 6310.002929687500
9 500 250 176983.187500000000 557833125888.000000000000 104 73237.835937500000 7181.560546875000
Time walk look up table:
AverageDToA SumSquareDiff Std Count
ToT
25 762459.312500000000 8039622508544.000000000000 235469.046875000000 146
50 702579.312500000000 6421094072320.000000000000 204194.562500000000 155
75 606299.562500000000 5496268914688.000000000000 184765.578125000000 162
100 502045.375000000000 4444139290624.000000000000 165628.984375000000 163
125 434020.375000000000 2673515429888.000000000000 127291.492187500000 166
150 359546.500000000000 1739993907200.000000000000 103003.492187500000 165
175 302247.875000000000 1132830261248.000000000000 82859.148437500000 166
200 258924.640625000000 706738716672.000000000000 65846.945312500000 164
225 214258.000000000000 608579092480.000000000000 61291.605468750000 163
250 183489.984375000000 415105777664.000000000000 49707.824218750000 169
275 162849.390625000000 374076506112.000000000000 48202.246093750000 162
300 144676.890625000000 251067138048.000000000000 39126.679687500000 165
325 126646.156250000000 399410954240.000000000000 49653.796875000000 163
350 116081.500000000000 265686974464.000000000000 41536.007812500000 155
375 99530.406250000000 112967852032.000000000000 26245.539062500000 165