Read data, Unit Conversion

This example demonstrates how to read the HDF5 output of Luna using h5py and pandas and shows you how to convert the ToA time units to whatever you like.

Luna Command

tpx3dump process -i /Users/Ciaran/atlassian-bitbucket-pipelines-runner/temp/e71169e4-520a-5b30-a5ab-ee8a44eb5fac/build/docs/source/_static/example_data.tpx3 -o /Users/Ciaran/atlassian-bitbucket-pipelines-runner/temp/e71169e4-520a-5b30-a5ab-ee8a44eb5fac/build/docs/source/_static/example_data.hdf5  --eps-t 150ns --eps-s 1 --ctot-cut 500

Python Script

Python code to read HDF5 data and convert the units
  1import os, sys
  2from enum import Enum
  3from typing import Literal
  4
  5import h5py  # ensure you have `pip install h5py`
  6import numpy as np
  7import pandas as pd  # ensure you have `pip install pandas`
  8
  9# on our system "EXAMPLE_DATA_HDF5" refers to the absolute path
 10# to a hdf5 file generated by luna
 11sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 12from env_vars_for_docs_examples import EXAMPLE_DATA_HDF5
 13
 14
 15class DetectorType(Enum):
 16    """Enum for specifying the detector type."""
 17    TPX3 = "tpx3"
 18    TPX4 = "tpx4"
 19
 20
 21class TimeUnit(Enum):
 22    """Enum for specifying time units."""
 23    Seconds = 's'
 24    Milliseconds = 'ms'
 25    Microseconds = 'us'
 26    Nanoseconds = 'ns'
 27    Picoseconds = 'ps'
 28    Femtoseconds100 = 'fs100'
 29    Femtoseconds = 'fs'
 30    Attoseconds100 = 'as100'
 31    Attoseconds = 'as'
 32
 33
 34# tpx4 users should change this to DetectorType.TPX4
 35DETECTOR_TYPE: DetectorType = DetectorType.TPX3
 36
 37# toa unit in luna output is 1e-13 in tpx3 and 1e-16 in tpx4
 38INTERNAL_TOA_UNITS = TimeUnit.Femtoseconds100 if DETECTOR_TYPE == DetectorType.TPX3 else TimeUnit.Attoseconds100
 39
 40# in tpx3 tot is in increments of 25ns but in tpx4 tot is the same time resolution as toa.
 41INTERNAL_TOT_UNITS = TimeUnit.Nanoseconds if DETECTOR_TYPE == DetectorType.TPX3 else TimeUnit.Attoseconds100
 42
 43
 44def convert_time_units(value: float | pd.Series | pd.DataFrame, from_unit: TimeUnit, to_unit: TimeUnit):
 45    """
 46    Convert time units from one unit to another.
 47
 48    Parameters:
 49    -----------
 50    value : float | pd.Series | pd.DataFrame
 51        The value(s) to be converted.
 52
 53    from_unit : TimeUnit
 54        The unit of the input value(s).
 55
 56    to_unit : TimeUnit
 57        The unit to convert the value(s) to.
 58
 59    Returns:
 60    --------
 61    float | pd.Series | pd.DataFrame
 62        The converted value(s).
 63    """
 64    conversion_factors = {
 65        TimeUnit.Seconds: 1,
 66        TimeUnit.Milliseconds: 1e-3,
 67        TimeUnit.Microseconds: 1e-6,
 68        TimeUnit.Nanoseconds: 1e-9,
 69        TimeUnit.Picoseconds: 1e-12,
 70        TimeUnit.Femtoseconds100: 1e-13,
 71        TimeUnit.Femtoseconds: 1e-15,
 72        TimeUnit.Attoseconds100: 1e-16,
 73        TimeUnit.Attoseconds: 1e-18,
 74    }
 75
 76    value_in_seconds = value * conversion_factors[from_unit]
 77    return value_in_seconds / conversion_factors[to_unit]
 78
 79
 80def load_pixel_hits(hdf5_fname: str, toa_unit: TimeUnit) -> pd.DataFrame:
 81    """
 82    Load pixel hits data from an HDF5 file and convert time units.
 83
 84    Parameters:
 85    -----------
 86    hdf5_fname : str
 87        The path to the HDF5 file.
 88
 89    toa_unit : TimeUnit
 90        The time unit to convert the 'toa' columns to.
 91
 92    Returns:
 93    --------
 94    pd.DataFrame
 95        A DataFrame containing the pixel hits data with converted time units.
 96    """
 97    with h5py.File(hdf5_fname, 'r') as hdf5_file:
 98        print(f"hdf5 datasets: {list(hdf5_file.keys())}")
 99        pixel_hits = pd.DataFrame(hdf5_file["PixelHits"][:])
100        # Convert 'toa' columns to specified time units
101        for col in ['toa', 'corrected_toa']:
102            if col in pixel_hits.columns:
103                pixel_hits[col] = convert_time_units(pixel_hits[col], INTERNAL_TOA_UNITS, toa_unit)
104    return pixel_hits
105
106
107def load_clusters(hdf5_fname: str, toa_unit: TimeUnit) -> pd.DataFrame:
108    """
109    Load clusters data from an HDF5 file and convert time units.
110
111    Parameters:
112    -----------
113    hdf5_fname : str
114        The path to the HDF5 file.
115
116    toa_unit : TimeUnit
117        The time unit to convert the 'toa' columns to.
118
119    Returns:
120    --------
121    pd.DataFrame
122        A DataFrame containing the clusters data with converted time units.
123    """
124    with h5py.File(hdf5_fname, 'r') as hdf5_file:
125        clusters = pd.DataFrame(hdf5_file["Clusters"][:])
126        # Convert relevant 'toa' columns to specified time units
127        for col in ['min_toa', 'max_toa', 'ctoa', 'corrected_ctoa']:
128            clusters[col] = convert_time_units(clusters[col], INTERNAL_TOA_UNITS, toa_unit)
129    return clusters
130
131
132def load_timewalk_matrix(hdf5_fname, toa_unit: TimeUnit) -> pd.DataFrame:
133    """
134    Load the timewalk matrix from an HDF5 file and convert time units.
135
136    The timewalk matrix is a long form matrix with columns:
137    - CToT
138    - ToT
139    - AverageDToA
140    - SumSquareDiff
141    - Count
142    - Std (standard deviation)
143    - Sem (Standard error of mean)
144
145    Parameters:
146    -----------
147    hdf5_fname : str
148        The path to the HDF5 file.
149
150    toa_unit : TimeUnit
151        The time unit to convert the time columns to.
152
153    Returns:
154    --------
155    pd.DataFrame
156        A DataFrame containing the timewalk matrix data with converted time units.
157    """
158    with h5py.File(hdf5_fname, "r") as f:
159        print(f.keys())
160        if "TimewalkMatrix" in f.keys():
161            ds = f["TimewalkMatrix"]
162            timewalk_matrix = pd.DataFrame(ds[:])
163            colnames = ds.attrs["col_names"]
164            # decode from bytes
165            colnames = [i[0].decode() for i in colnames]
166            timewalk_matrix.columns = colnames
167        else:
168            return
169
170    # all column names:
171    # CToT, ToT, AverageDToA, SumSquareDiff, Count, Std, Sem
172
173    integer_types = ["CToT", "ToT", "Count"]
174    timewalk_matrix[integer_types] = timewalk_matrix[integer_types].astype(np.uint32)
175
176    time_columns = ['AverageDToA', 'SumSquareDiff', 'Std', 'Sem']  # time
177    timewalk_matrix[time_columns] = convert_time_units(
178        timewalk_matrix[time_columns],
179        INTERNAL_TOA_UNITS, toa_unit
180    )
181
182    return timewalk_matrix
183
184
185def load_timewalk_lookup_table(hdf5_fname, toa_unit: TimeUnit) -> pd.DataFrame:
186    """
187    Load the timewalk lookup table from an HDF5 file and convert time units.
188
189    The timewalk lookup table contains columns:
190    - ToT
191    - Average
192    - SumSquareDiff
193    - Std (standard deviation)
194    - Count
195
196    Parameters:
197    -----------
198    hdf5_fname : str
199        The path to the HDF5 file.
200
201    toa_unit : TimeUnit
202        The time unit to convert the time columns to.
203
204    Returns:
205    --------
206    pd.DataFrame
207        A DataFrame containing the timewalk lookup table data with converted time units.
208    """
209    with h5py.File(hdf5_fname, "r") as f:
210        if "TimewalkLookupTable" in f.keys():
211            ds = f["TimewalkLookupTable"]
212            lookup_df = pd.DataFrame(ds)
213            lookup_df.columns = [i[0].decode() for i in ds.attrs["col_names"]]
214        else:
215            return
216    # column names:
217    # ToT Average SumSquareDiff Std Count
218    lookup_df = lookup_df.dropna(axis=0, how="all")
219    integer_types = ["ToT", "Count"]
220    lookup_df[integer_types] = lookup_df[integer_types].astype(np.uint32)
221    lookup_df.set_index("ToT", inplace=True)
222
223    time_columns = ['AverageDToA', 'SumSquareDiff', 'Std']  # time
224    lookup_df[time_columns] = convert_time_units(
225        lookup_df[time_columns],
226        INTERNAL_TOA_UNITS, toa_unit
227    )
228
229    return lookup_df
230
231
232if __name__ == "__main__":
233    pd.set_option('display.float_format', '{:.12f}'.format)
234    toa_units = TimeUnit.Seconds
235
236    pixel_hits = load_pixel_hits(EXAMPLE_DATA_HDF5, toa_units)
237    clusters = load_clusters(EXAMPLE_DATA_HDF5, toa_units)
238    timewalk_matrix = load_timewalk_matrix(EXAMPLE_DATA_HDF5, toa_unit=toa_units)
239    timewalk_lut = load_timewalk_lookup_table(EXAMPLE_DATA_HDF5, toa_unit=toa_units)
240
241    print("Pixel Hits: ")
242    print(pixel_hits.head(15).to_string())
243
244    print("Clusters: ")
245    print(clusters.head(15).to_string())
246
247    if timewalk_matrix is not None:
248        print("Time walk matrix: ")
249        print(timewalk_matrix.iloc[0:10, 0:10].to_string())
250
251    if timewalk_lut is not None:
252        print("Time walk look up table: ")
253        print(timewalk_lut.head(15).to_string())

Script Output

Example Output
hdf5 datasets: ['Clusters', 'ExposureTimeBoundaries', 'PixelHits', 'TimewalkLookupTable', 'TimewalkMatrix']
<KeysViewHDF5 ['Clusters', 'ExposureTimeBoundaries', 'PixelHits', 'TimewalkLookupTable', 'TimewalkMatrix']>
Pixel Hits: 
               toa   corrected_toa  cid    dtoa   tot    x    y
0  11.034546273438 -0.000000000000    0       0   200  140  193
1  11.034546298438 -0.000000000000    1       0   150  143  193
2  11.034589643750 -0.000000000000    2       0  1475   68   92
3  11.034589645313 -0.000000000000    2   15625   850   67   92
4  11.034589665625 -0.000000000000    2  218750   300   68   91
5  11.034589689062 -0.000000000000    2  453125   150   67   91
6  11.034642653125 -0.000000000000    3       0   800   80   80
7  11.034642675000 -0.000000000000    3  218750   250   80   81
8  11.034642682812 -0.000000000000    3  296875   250   79   80
9  11.034697337500 -0.000000000000    4       0   375   14   18
10 11.034697340625 -0.000000000000    4   31250   375   14   19
11 11.034926412500 -0.000000000000    5       0   375   49    5
12 11.035060217187 -0.000000000000    6       0  1675  146  106
13 11.035060225000 -0.000000000000    6   78125   425  146  105
14 11.035060234375 -0.000000000000    6  171875   350  145  106
Clusters: 
    id  size         min_toa         max_toa            ctoa  corrected_ctoa  sum_tot  ctot        x_average       x_centroid        y_average       y_centroid    x    y
0    0     1 11.034546273438 11.034546273438 11.034546273438 -0.000000000000      200   200 140.000000000000 140.000000000000 193.000000000000 193.000000000000  140  140
1    1     1 11.034546298438 11.034546298438 11.034546298438 -0.000000000000      150   150 143.000000000000 143.000000000000 193.000000000000 193.000000000000  143  143
2    2     4 11.034589643750 11.034589689062 11.034589643750 -0.000000000000     2775  1475  67.500000000000  67.639639639640  91.500000000000  91.837837837838   67   68
3    3     3 11.034642653125 11.034642682812 11.034642653125 -0.000000000000     1300   800  79.666666666667  79.807692307692  80.333333333333  80.192307692308   79   80
4    4     2 11.034697337500 11.034697340625 11.034697337500 -0.000000000000      750   375  14.000000000000  14.000000000000  18.500000000000  18.500000000000   14   14
5    5     1 11.034926412500 11.034926412500 11.034926412500 -0.000000000000      375   375  49.000000000000  49.000000000000   5.000000000000   5.000000000000   49   49
6    6     4 11.035060217187 11.035060276563 11.035060217187 -0.000000000000     2600  1675 146.000000000000 145.923076923077 105.750000000000 105.836538461538  145  146
7    7     2 11.035102118750 11.035102131250 11.035102118750 -0.000000000000     1475  1125 142.500000000000 142.237288135593   0.000000000000   0.000000000000  142  142
8    8     3 11.035102125000 11.035102150000 11.035102125000 -0.000000000000      850   425 142.666666666667 142.735294117647   2.333333333333   2.235294117647  142  143
9    9     4 11.035375565625 11.035375579687 11.035375565625 -0.000000000000     1700   700  38.500000000000  38.382352941176 194.500000000000 194.632352941176   38   38
10  10     4 11.035440212500 11.035440225000 11.035440212500 -0.000000000000     2500   925  63.500000000000  63.430000000000  22.500000000000  22.360000000000   63   63
11  11     4 11.035586935938 11.035586948437 11.035586935938 -0.000000000000     2875  1250  91.500000000000  91.339130434783  28.500000000000  28.652173913043   91   91
12  12     6 11.035670601563 11.035670696875 11.035670601563 -0.000000000000     2325   925 244.000000000000 244.333333333333 207.833333333333 207.709677419355  243  245
13  13     3 11.036035393750 11.036035410938 11.036035393750 -0.000000000000     1425   850 174.333333333333 174.228070175439 188.666666666667 188.824561403509  174  174
14  14     4 11.036084217188 11.036084221875 11.036084218750 -0.000000000000     3200   975 141.500000000000 141.406250000000 106.500000000000 106.507812500000  141  141
Time walk matrix: 
   CToT  ToT    AverageDToA  SumSquareDiff  Count            Std            Sem
0   500   25 0.000000099357 0.094192333519     17 0.000000023539 0.000000005709
1   500   50 0.000000084651 0.080431409180     17 0.000000021751 0.000000005276
2   500   75 0.000000073153 0.076722294092     44 0.000000013205 0.000000001991
3   500  100 0.000000063130 0.104241199791     57 0.000000013523 0.000000001791
4   500  125 0.000000054665 0.144652992487     71 0.000000014274 0.000000001694
5   500  150 0.000000044378 0.132063433528     97 0.000000011668 0.000000001185
6   500  175 0.000000035883 0.055637095124     86 0.000000008043 0.000000000867
7   500  200 0.000000029053 0.061776284128    101 0.000000007821 0.000000000778
8   500  225 0.000000023080 0.055440001190    118 0.000000006854 0.000000000631
9   500  250 0.000000017698 0.055783312768    104 0.000000007324 0.000000000718
Time walk look up table: 
       AverageDToA  SumSquareDiff            Std  Count
ToT                                                    
25  0.000000076246 0.803962230682 0.000000023547    146
50  0.000000070258 0.642109394073 0.000000020419    155
75  0.000000060630 0.549626886845 0.000000018477    162
100 0.000000050205 0.444413930178 0.000000016563    163
125 0.000000043402 0.267351537943 0.000000012729    166
150 0.000000035955 0.173999384046 0.000000010300    165
175 0.000000030225 0.113283023238 0.000000008286    166
200 0.000000025892 0.070673868060 0.000000006585    164
225 0.000000021426 0.060857906938 0.000000006129    163
250 0.000000018349 0.041510578245 0.000000004971    169
275 0.000000016285 0.037407651544 0.000000004820    162
300 0.000000014468 0.025106713176 0.000000003913    165
325 0.000000012665 0.039941094816 0.000000004965    163
350 0.000000011608 0.026568697765 0.000000004154    155
375 0.000000009953 0.011296785437 0.000000002625    165