Read Data

This example demonstrates how to read the HDF5 output of Luna using h5py and pandas.

Luna Command

tpx3dump process -i /Users/Ciaran/atlassian-bitbucket-pipelines-runner/temp/e71169e4-520a-5b30-a5ab-ee8a44eb5fac/build/docs/source/_static/example_data.tpx3 -o /Users/Ciaran/atlassian-bitbucket-pipelines-runner/temp/e71169e4-520a-5b30-a5ab-ee8a44eb5fac/build/docs/source/_static/example_data.hdf5  --eps-t 150ns --eps-s 1 --ctot-cut 500

Python Script

Python code to read HDF5 data
  1import os, sys
  2from typing import *
  3import h5py  # ensure you have `pip install h5py`
  4import pandas as pd  # ensure you have `pip install pandas`
  5import numpy as np
  6# on our system "EXAMPLE_DATA_HDF5" refers to the absolute path
  7# to a hdf5 file generated by luna. Replace with your own!
  8sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
  9from env_vars_for_docs_examples import EXAMPLE_DATA_HDF5
 10
 11
 12def load_pixel_hits(hdf5_fname: str) -> pd.DataFrame:
 13    """
 14    Load pixel hits data from an HDF5 file.
 15
 16    Parameters:
 17    -----------
 18    hdf5_fname : str
 19        The path to the HDF5 file.
 20
 21    Returns:
 22    --------
 23    pd.DataFrame
 24        A DataFrame containing the pixel hits data.
 25    """
 26    with h5py.File(hdf5_fname, 'r') as hdf5_file:
 27        print(f"hdf5 datasets: {list(hdf5_file.keys())}")
 28        pixel_hits = pd.DataFrame(hdf5_file["PixelHits"][:])
 29    return pixel_hits
 30
 31
 32def load_clusters(hdf5_fname: str) -> pd.DataFrame:
 33    """
 34    Load clusters data from an HDF5 file, if available.
 35
 36    Parameters:
 37    -----------
 38    hdf5_fname : str
 39        The path to the HDF5 file.
 40
 41    Returns:
 42    --------
 43    pd.DataFrame
 44        A DataFrame containing the clusters data.
 45    """
 46    with (h5py.File(hdf5_fname, 'r') as hdf5_file):
 47        clusters = pd.DataFrame(hdf5_file["Clusters"][:])
 48    return clusters
 49
 50
 51
 52def load_timewalk_matrix(DATA_FNAME) -> pd.DataFrame:
 53    """
 54    Load the timewalk matrix from an HDF5 file.
 55
 56    The timewalk matrix is a long form matrix with columns:
 57    - CToT
 58    - ToT
 59    - AverageDToA
 60    - SumSquareDiff
 61    - Count
 62    - Std (standard deviation)
 63    - Sem (Standard error of mean)
 64
 65    Parameters:
 66    -----------
 67    DATA_FNAME : str
 68        The path to the HDF5 file.
 69
 70    Returns:
 71    --------
 72    pd.DataFrame
 73        A DataFrame containing the timewalk matrix data.
 74    """
 75    with h5py.File(DATA_FNAME, "r") as f:
 76        if "TimewalkMatrix" in f.keys():
 77            ds = f["TimewalkMatrix"]
 78            timewalk_matrix = pd.DataFrame(ds[:])
 79            colnames: List[Tuple[bytes]] = ds.attrs["col_names"]
 80            # decode from bytes
 81            colnames: List[str] = [i[0].decode() for i in colnames]
 82            timewalk_matrix.columns = colnames
 83        else:
 84            return
 85
 86    integer_types = ["CToT", "ToT", "Count"]
 87    timewalk_matrix[integer_types] = timewalk_matrix[integer_types].astype(np.uint32)
 88    return timewalk_matrix
 89
 90
 91def load_timewalk_lookup_table(DATA_FNAME) -> pd.DataFrame:
 92    """
 93    Load the timewalk lookup table from an HDF5 file.
 94
 95    Parameters:
 96    -----------
 97    DATA_FNAME : str
 98        The path to the HDF5 file.
 99
100    Returns:
101    --------
102    pd.DataFrame
103        A DataFrame containing the timewalk lookup table data.
104    """
105    with h5py.File(DATA_FNAME, "r") as f:
106        if "TimewalkLookupTable" in f.keys():
107            ds = f["TimewalkLookupTable"]
108            lookup_df = pd.DataFrame(ds)
109            lookup_df.columns = [i[0].decode() for i in ds.attrs["col_names"]]
110        else:
111            return None
112    # column names:
113    # ToT Average SumSquareDiff Std Count
114    lookup_df = lookup_df.dropna(axis=0, how="all")
115    integer_types = ["ToT", "Count"]
116    lookup_df[integer_types] = lookup_df[integer_types].astype(np.uint32)
117    lookup_df.set_index("ToT", inplace=True)
118
119    return lookup_df
120
121
122
123if __name__ == "__main__":
124    # 12 decimals
125    pd.set_option('display.float_format', '{:.12f}'.format)
126
127    pixel_hits = load_pixel_hits(EXAMPLE_DATA_HDF5)
128    clusters = load_clusters(EXAMPLE_DATA_HDF5)
129    timewalk_matrix = load_timewalk_matrix(EXAMPLE_DATA_HDF5)
130    timewalk_lut = load_timewalk_lookup_table(EXAMPLE_DATA_HDF5)
131
132    print("Pixel Hits: ")
133    print(pixel_hits.head(15).to_string())
134
135    print("Clusters: ")
136    print(clusters.head(15).to_string())
137
138    if timewalk_matrix is not None:
139        print("Time walk matrix: ")
140        print(timewalk_matrix.iloc[0:10, 0:10].to_string())
141
142    if timewalk_lut is not None:
143        print("Time walk look up table: ")
144        print(timewalk_lut.head(15).to_string())

Script Output

Example Output
hdf5 datasets: ['Clusters', 'ExposureTimeBoundaries', 'PixelHits', 'TimewalkLookupTable', 'TimewalkMatrix']
Pixel Hits: 
                toa  corrected_toa  cid    dtoa   tot    x    y
0   110345462734375             -1    0       0   200  140  193
1   110345462984375             -1    1       0   150  143  193
2   110345896437500             -1    2       0  1475   68   92
3   110345896453125             -1    2   15625   850   67   92
4   110345896656250             -1    2  218750   300   68   91
5   110345896890625             -1    2  453125   150   67   91
6   110346426531250             -1    3       0   800   80   80
7   110346426750000             -1    3  218750   250   80   81
8   110346426828125             -1    3  296875   250   79   80
9   110346973375000             -1    4       0   375   14   18
10  110346973406250             -1    4   31250   375   14   19
11  110349264125000             -1    5       0   375   49    5
12  110350602171875             -1    6       0  1675  146  106
13  110350602250000             -1    6   78125   425  146  105
14  110350602343750             -1    6  171875   350  145  106
Clusters: 
    id  size          min_toa          max_toa             ctoa  corrected_ctoa  sum_tot  ctot        x_average       x_centroid        y_average       y_centroid    x    y
0    0     1  110345462734375  110345462734375  110345462734375              -1      200   200 140.000000000000 140.000000000000 193.000000000000 193.000000000000  140  140
1    1     1  110345462984375  110345462984375  110345462984375              -1      150   150 143.000000000000 143.000000000000 193.000000000000 193.000000000000  143  143
2    2     4  110345896437500  110345896890625  110345896437500              -1     2775  1475  67.500000000000  67.639639639640  91.500000000000  91.837837837838   67   68
3    3     3  110346426531250  110346426828125  110346426531250              -1     1300   800  79.666666666667  79.807692307692  80.333333333333  80.192307692308   79   80
4    4     2  110346973375000  110346973406250  110346973375000              -1      750   375  14.000000000000  14.000000000000  18.500000000000  18.500000000000   14   14
5    5     1  110349264125000  110349264125000  110349264125000              -1      375   375  49.000000000000  49.000000000000   5.000000000000   5.000000000000   49   49
6    6     4  110350602171875  110350602765625  110350602171875              -1     2600  1675 146.000000000000 145.923076923077 105.750000000000 105.836538461538  145  146
7    7     2  110351021187500  110351021312500  110351021187500              -1     1475  1125 142.500000000000 142.237288135593   0.000000000000   0.000000000000  142  142
8    8     3  110351021250000  110351021500000  110351021250000              -1      850   425 142.666666666667 142.735294117647   2.333333333333   2.235294117647  142  143
9    9     4  110353755656250  110353755796875  110353755656250              -1     1700   700  38.500000000000  38.382352941176 194.500000000000 194.632352941176   38   38
10  10     4  110354402125000  110354402250000  110354402125000              -1     2500   925  63.500000000000  63.430000000000  22.500000000000  22.360000000000   63   63
11  11     4  110355869359375  110355869484375  110355869359375              -1     2875  1250  91.500000000000  91.339130434783  28.500000000000  28.652173913043   91   91
12  12     6  110356706015625  110356706968750  110356706015625              -1     2325   925 244.000000000000 244.333333333333 207.833333333333 207.709677419355  243  245
13  13     3  110360353937500  110360354109375  110360353937500              -1     1425   850 174.333333333333 174.228070175439 188.666666666667 188.824561403509  174  174
14  14     4  110360842171875  110360842218750  110360842187500              -1     3200   975 141.500000000000 141.406250000000 106.500000000000 106.507812500000  141  141
Time walk matrix: 
   CToT  ToT         AverageDToA              SumSquareDiff  Count                 Std                Sem
0   500   25 993566.187500000000  941923368960.000000000000     17 235387.453125000000 57089.843750000000
1   500   50 846507.437500000000  804314087424.000000000000     17 217514.578125000000 52755.035156250000
2   500   75 731534.125000000000  767222939648.000000000000     44 132048.796875000000 19907.105468750000
3   500  100 631304.750000000000 1042412011520.000000000000     57 135232.875000000000 17912.033203125000
4   500  125 546655.000000000000 1446529990656.000000000000     71 142736.343750000000 16939.687500000000
5   500  150 443782.218750000000 1320634417152.000000000000     97 116682.421875000000 11847.304687500000
6   500  175 358829.937500000000  556370952192.000000000000     86  80432.765625000000  8673.288085937500
7   500  200 290532.218750000000  617762848768.000000000000    101  78207.820312500000  7781.969238281250
8   500  225 230799.812500000000  554400022528.000000000000    118  68544.179687500000  6310.002929687500
9   500  250 176983.187500000000  557833125888.000000000000    104  73237.835937500000  7181.560546875000
Time walk look up table: 
            AverageDToA              SumSquareDiff                 Std  Count
ToT                                                                          
25  762459.312500000000 8039622508544.000000000000 235469.046875000000    146
50  702579.312500000000 6421094072320.000000000000 204194.562500000000    155
75  606299.562500000000 5496268914688.000000000000 184765.578125000000    162
100 502045.375000000000 4444139290624.000000000000 165628.984375000000    163
125 434020.375000000000 2673515429888.000000000000 127291.492187500000    166
150 359546.500000000000 1739993907200.000000000000 103003.492187500000    165
175 302247.875000000000 1132830261248.000000000000  82859.148437500000    166
200 258924.640625000000  706738716672.000000000000  65846.945312500000    164
225 214258.000000000000  608579092480.000000000000  61291.605468750000    163
250 183489.984375000000  415105777664.000000000000  49707.824218750000    169
275 162849.390625000000  374076506112.000000000000  48202.246093750000    162
300 144676.890625000000  251067138048.000000000000  39126.679687500000    165
325 126646.156250000000  399410954240.000000000000  49653.796875000000    163
350 116081.500000000000  265686974464.000000000000  41536.007812500000    155
375  99530.406250000000  112967852032.000000000000  26245.539062500000    165