commit 8aa180c27665a9476041943f02105e0fe38fd137 Author: Jianshun Gao Date: Mon Apr 24 13:03:23 2023 +0200 finish reading from hdf5 diff --git a/Analyser/AbsoprtionImaging.py b/Analyser/AbsoprtionImaging.py new file mode 100644 index 0000000..e69de29 diff --git a/DataContainer/ReadData.py b/DataContainer/ReadData.py new file mode 100644 index 0000000..282e8f4 --- /dev/null +++ b/DataContainer/ReadData.py @@ -0,0 +1,153 @@ +import xarray as xr +import numpy as np +from collections import OrderedDict +from functools import partial +import copy + + +def _read_globals_attrs(variable_attrs, context=None): + """Combine attributes from different variables according to combine_attrs""" + if not variable_attrs: + # no attributes to merge + return None + + from xarray.core.utils import equivalent + + result = {} + dropped_attrs = OrderedDict() + for attrs in variable_attrs: + result.update( + { + key: value + for key, value in attrs.items() + if key not in result and key not in dropped_attrs.keys() + } + ) + result = { + key: value + for key, value in result.items() + if key not in attrs or equivalent(attrs[key], value) + } + dropped_attrs.update( + { + key: [] + for key in attrs if key not in result + } + ) + + for attrs in variable_attrs: + dropped_attrs.update( + { + key: np.append(dropped_attrs[key], attrs[key]) + for key in dropped_attrs.keys() + } + ) + + scan_attrs = OrderedDict() + scan_length = [] + for attrs_key in dropped_attrs.keys(): + flag = True + for key in scan_attrs.keys(): + if equivalent(scan_attrs[key], dropped_attrs[attrs_key]): + flag = False + + result.update({attrs_key: key}) + + break + if flag: + scan_attrs.update({ + attrs_key: dropped_attrs[attrs_key] + }) + scan_length = np.append(scan_length, len(dropped_attrs[attrs_key])) + + result.update( + { + key: value + for key, value in scan_attrs.items() + } + ) + + result.update( + { + "scanAxis": list(scan_attrs.keys()), + "scanAxisLength": scan_length, + } + ) + + return result + + +def _read_shot_number_from_hdf5(x): + filePath = x.encoding["source"] + shotNum = filePath.split("_")[-1].split("_")[-1].split(".")[0] + return x.assign(shotNum=shotNum) + + +def _assign_scan_axis_partial(x, datesetOfGlobal): + scanAxis = datesetOfGlobal.scanAxis + filePath = x.encoding["source"] + shotNum = filePath.split("_")[-1].split("_")[-1].split(".")[0] + x = x.assign(shotNum=shotNum) + x = x.expand_dims(list(scanAxis)) + + return x.assign_coords( + { + key: np.atleast_1d(datesetOfGlobal.attrs[key][int(shotNum)]) + for key in scanAxis + } + ) + + +def read_hdf5_file(filePath, group=None, datesetOfGlobal=None, preprocess=None, join="outer", parallel=True, engine="h5netcdf", phony_dims="access", **kwargs): + + kwargs.update( + { + 'join': join, + 'parallel': parallel, + 'engine': engine, + 'phony_dims': phony_dims, + 'group': group + } + ) + + if datesetOfGlobal is None: + datesetOfGlobal = xr.open_mfdataset( + filePath, + group="globals", + concat_dim="fileNum", + combine="nested", + preprocess=_read_shot_number_from_hdf5, + engine="h5netcdf", + phony_dims="access", + combine_attrs=_read_globals_attrs, + parallel=True, ) + + _assgin_scan_axis = partial(_assign_scan_axis_partial, datesetOfGlobal=datesetOfGlobal) + + if preprocess is None: + kwargs.update({'preprocess':_assgin_scan_axis}) + else: + kwargs.update({'preprocess':preprocess}) + + ds = xr.open_mfdataset(filePath, **kwargs) + + newDimKey = np.append(['x', 'y', 'z'], [ chr(i) for i in range(97, 97+23)]) + + oldDimKey = np.sort( + [ + key + for key in ds.dims + if not key in datesetOfGlobal.scanAxis + ] + ) + + renameDict = { + oldDimKey[j]: newDimKey[j] + for j in range(len(oldDimKey)) + } + + ds = ds.rename_dims(renameDict) + + ds.attrs = copy.deepcopy(datesetOfGlobal.attrs) + + return ds \ No newline at end of file diff --git a/DataContainer/__pycache__/ReadData.cpython-38.pyc b/DataContainer/__pycache__/ReadData.cpython-38.pyc new file mode 100644 index 0000000..4be8a65 Binary files /dev/null and b/DataContainer/__pycache__/ReadData.cpython-38.pyc differ diff --git a/test.ipynb b/test.ipynb new file mode 100644 index 0000000..9cd4979 --- /dev/null +++ b/test.ipynb @@ -0,0 +1,850 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from DataContainer.ReadData import read_hdf5_file" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# filepath = \"//DyLabNAS/Data/Evaporative_Cooling/2023/04/18/0003/*.h5\"\n", + "# filepath = \"//DyLabNAS/Data/Evaporative_Cooling/2023/04/18/0003/2023-04-18_0003_Evaporative_Cooling_000.h5\"\n", + "\n", + "filepath = \"//DyLabNAS/Data/Repetition_scan/2023/04/21/0000/*.h5\"\n", + "\n", + "groupList = [\n", + " \"images/MOT_3D_Camera/in_situ_absorption\",\n", + " \"images/ODT_1_Axis_Camera/in_situ_absorption\",\n", + "]\n", + "\n", + "dskey = {\n", + " \"images/MOT_3D_Camera/in_situ_absorption\": \"camera_1\",\n", + " \"images/ODT_1_Axis_Camera/in_situ_absorption\": \"camera_2\",\n", + "}\n", + "\n", + "ds = {\n", + " dskey[groupList[i]]: read_hdf5_file(filepath, groupList[i])\n", + " for i in range(len(groupList))\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:       (initial_freq: 10, runs: 3, x: 1200, y: 1920)\n",
+       "Coordinates:\n",
+       "  * initial_freq  (initial_freq) float64 100.8 101.0 101.2 ... 102.4 102.6 102.8\n",
+       "  * runs          (runs) float64 0.0 1.0 2.0\n",
+       "Dimensions without coordinates: x, y\n",
+       "Data variables:\n",
+       "    atoms         (initial_freq, runs, x, y) uint16 dask.array<chunksize=(1, 1, 1200, 1920), meta=np.ndarray>\n",
+       "    background    (initial_freq, runs, x, y) uint16 dask.array<chunksize=(1, 1, 1200, 1920), meta=np.ndarray>\n",
+       "    dark          (initial_freq, runs, x, y) uint16 dask.array<chunksize=(1, 1, 1200, 1920), meta=np.ndarray>\n",
+       "    shotNum       (initial_freq, runs) <U2 '09' '19' '29' ... '05' '15' '25'\n",
+       "Attributes: (12/96)\n",
+       "    TOF_free:                          0.02\n",
+       "    abs_img_freq:                      110.858\n",
+       "    absorption_imaging_flag:           True\n",
+       "    backup_data:                       True\n",
+       "    blink_off_time:                    nan\n",
+       "    blink_on_time:                     nan\n",
+       "    ...                                ...\n",
+       "    z_offset_img:                      0.189\n",
+       "    mot_3d_freq:                       initial_freq\n",
+       "    initial_freq:                      [101.67 102.36 102.13 100.98 102.59 10...\n",
+       "    runs:                              [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1...\n",
+       "    scanAxis:                          ['initial_freq', 'runs']\n",
+       "    scanAxisLength:                    [30. 30.]
" + ], + "text/plain": [ + "\n", + "Dimensions: (initial_freq: 10, runs: 3, x: 1200, y: 1920)\n", + "Coordinates:\n", + " * initial_freq (initial_freq) float64 100.8 101.0 101.2 ... 102.4 102.6 102.8\n", + " * runs (runs) float64 0.0 1.0 2.0\n", + "Dimensions without coordinates: x, y\n", + "Data variables:\n", + " atoms (initial_freq, runs, x, y) uint16 dask.array\n", + " background (initial_freq, runs, x, y) uint16 dask.array\n", + " dark (initial_freq, runs, x, y) uint16 dask.array\n", + " shotNum (initial_freq, runs)