From 8aa180c27665a9476041943f02105e0fe38fd137 Mon Sep 17 00:00:00 2001 From: Jianshun Gao Date: Mon, 24 Apr 2023 13:03:23 +0200 Subject: [PATCH] finish reading from hdf5 --- Analyser/AbsoprtionImaging.py | 0 DataContainer/ReadData.py | 153 ++++ .../__pycache__/ReadData.cpython-38.pyc | Bin 0 -> 3890 bytes test.ipynb | 850 ++++++++++++++++++ 4 files changed, 1003 insertions(+) create mode 100644 Analyser/AbsoprtionImaging.py create mode 100644 DataContainer/ReadData.py create mode 100644 DataContainer/__pycache__/ReadData.cpython-38.pyc create mode 100644 test.ipynb diff --git a/Analyser/AbsoprtionImaging.py b/Analyser/AbsoprtionImaging.py new file mode 100644 index 0000000..e69de29 diff --git a/DataContainer/ReadData.py b/DataContainer/ReadData.py new file mode 100644 index 0000000..282e8f4 --- /dev/null +++ b/DataContainer/ReadData.py @@ -0,0 +1,153 @@ +import xarray as xr +import numpy as np +from collections import OrderedDict +from functools import partial +import copy + + +def _read_globals_attrs(variable_attrs, context=None): + """Combine attributes from different variables according to combine_attrs""" + if not variable_attrs: + # no attributes to merge + return None + + from xarray.core.utils import equivalent + + result = {} + dropped_attrs = OrderedDict() + for attrs in variable_attrs: + result.update( + { + key: value + for key, value in attrs.items() + if key not in result and key not in dropped_attrs.keys() + } + ) + result = { + key: value + for key, value in result.items() + if key not in attrs or equivalent(attrs[key], value) + } + dropped_attrs.update( + { + key: [] + for key in attrs if key not in result + } + ) + + for attrs in variable_attrs: + dropped_attrs.update( + { + key: np.append(dropped_attrs[key], attrs[key]) + for key in dropped_attrs.keys() + } + ) + + scan_attrs = OrderedDict() + scan_length = [] + for attrs_key in dropped_attrs.keys(): + flag = True + for key in scan_attrs.keys(): + if equivalent(scan_attrs[key], dropped_attrs[attrs_key]): + flag = False + + result.update({attrs_key: key}) + + break + if flag: + scan_attrs.update({ + attrs_key: dropped_attrs[attrs_key] + }) + scan_length = np.append(scan_length, len(dropped_attrs[attrs_key])) + + result.update( + { + key: value + for key, value in scan_attrs.items() + } + ) + + result.update( + { + "scanAxis": list(scan_attrs.keys()), + "scanAxisLength": scan_length, + } + ) + + return result + + +def _read_shot_number_from_hdf5(x): + filePath = x.encoding["source"] + shotNum = filePath.split("_")[-1].split("_")[-1].split(".")[0] + return x.assign(shotNum=shotNum) + + +def _assign_scan_axis_partial(x, datesetOfGlobal): + scanAxis = datesetOfGlobal.scanAxis + filePath = x.encoding["source"] + shotNum = filePath.split("_")[-1].split("_")[-1].split(".")[0] + x = x.assign(shotNum=shotNum) + x = x.expand_dims(list(scanAxis)) + + return x.assign_coords( + { + key: np.atleast_1d(datesetOfGlobal.attrs[key][int(shotNum)]) + for key in scanAxis + } + ) + + +def read_hdf5_file(filePath, group=None, datesetOfGlobal=None, preprocess=None, join="outer", parallel=True, engine="h5netcdf", phony_dims="access", **kwargs): + + kwargs.update( + { + 'join': join, + 'parallel': parallel, + 'engine': engine, + 'phony_dims': phony_dims, + 'group': group + } + ) + + if datesetOfGlobal is None: + datesetOfGlobal = xr.open_mfdataset( + filePath, + group="globals", + concat_dim="fileNum", + combine="nested", + preprocess=_read_shot_number_from_hdf5, + engine="h5netcdf", + phony_dims="access", + combine_attrs=_read_globals_attrs, + parallel=True, ) + + _assgin_scan_axis = partial(_assign_scan_axis_partial, datesetOfGlobal=datesetOfGlobal) + + if preprocess is None: + kwargs.update({'preprocess':_assgin_scan_axis}) + else: + kwargs.update({'preprocess':preprocess}) + + ds = xr.open_mfdataset(filePath, **kwargs) + + newDimKey = np.append(['x', 'y', 'z'], [ chr(i) for i in range(97, 97+23)]) + + oldDimKey = np.sort( + [ + key + for key in ds.dims + if not key in datesetOfGlobal.scanAxis + ] + ) + + renameDict = { + oldDimKey[j]: newDimKey[j] + for j in range(len(oldDimKey)) + } + + ds = ds.rename_dims(renameDict) + + ds.attrs = copy.deepcopy(datesetOfGlobal.attrs) + + return ds \ No newline at end of file diff --git a/DataContainer/__pycache__/ReadData.cpython-38.pyc b/DataContainer/__pycache__/ReadData.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4be8a6555678ef912d70bbb14adf5a144c883d50 GIT binary patch literal 3890 zcmaJ^&2JmW6`z^?;BrY(mYt?h(x$4Key~W?q-}sUFpRVooFEO9Aa;v_ki)Kamek72 zU21liNXGIcqmm9TTJ)L&b=0Z(6Y_WLH78$kEzkgVes7kfsMzij`*uFwym{a6@y|<3 zKBMPvx8#4WF!q(1EPxuu@8He<3Lu%}3F~n46&(v-k=O&L<8a!u5_eGR)XbPCUdNMH zq%EBXyi=F1tUX{IUwX2RZ$qxgmR!b8Q}Wxaef1Hjv#mBS8y_kesYq_dp~kc}43v(8 zBou@&eTXM|Exh>_XRKhkM|7kB4{X3s1!plkwX`^5r@UaB?C!^>?8KT_$Nbpp3B#3l z!Y1|^_Kx@&=j@mr3+%Yy*8}Es=hQmkIA!&?teKrT{{Fh5yAHa9!Hzjjdj^M3oC~`9 z_lb+K=(91=GCOy7$F{7GokC3KuZ`UkQHa7_1+~InWk*7NDgCiSJQsH1#)e1E_P^f$ zvT)QFIEnQNx7yS0q*hqko1W)qVr=byLFZ)yTrr$jke1eUy}JL17nW>(E=UU}7Ob&! z|JKcHuoI`zTA;OxcSbtO*Lo@&tjW07gLzWD_GzHvU?;&w5QdqOaoS(g*;-f~?9#z} zOTjn{X7r=|QT%C;;9y8gvxh9`;Jt=7|0O^STi#vY7GpjZ1wY~UYRbl!P1qT$#)bH| z*tc)9bE0eCiw^U1s}vh=lorONgG)wHTa-&uWy4`4D@ICJMfoVviZ1)#rhI$*{WwVT z-BG&sZjf!?jt8S8&~cXL+dhk0~6RPj*f+na$7Zf2U0X zyn&>yWIvmFfuRpcMLI0q00vK`LK=hcY8fBZAaI$7UtlSB0l)(|xO3~0;N+1c_-(>^ z5y0@!npxmEy1vlCg{bu`Eo5mhS$uF|fBuqy2&&UL%iY0ctt>iD$~&Kn)*j!#!s zi=BoU;~4Jl)QyvLDSGT)67-GvD!tkZN;FPnOiI=)uRX)P$#!`UfC(GFrm#7}f!n+( z+{gA4yGCEvvAHh<=5Fm`&#k$HXWpya7A*vljh|rd6W4l?C{1bD!sm3_t|{`%r;JAi zyoope9YDeMP_kATxzL15q&R7L=GjPvQOUcHp5Q6@ zhRJ5V%$-)Wn)Ducg{Z0|6jR?)FXKw}0|GxJ@CpG+>gq=T zm+V2#I}d0Q!35JQ^MEFe!#Wyl9N~M$7cxrQHZETJfli_z*WEXz@y61MQ*C1WT$IZc z7I~yU?7eI9?1G->Ip-7HyAbd9U+-2X>Q(`PaO_T-jAy0ic1=+_Qa>g@IZ6Elpln13 z!yuJi86zIQkCo2Slxmov80O&qRYInkRuT9qjZ7wL)5rwT>T_B73vhBOD-0n-9?;}1 z1VPgTj-tv?IvL8cx>I_)Z>5nAWe>g=hEbk}r1IHA5lcys-u!ET!kaL36i(q5wF18K zxnt@+`nP+YdRudt#Pm_p)|Pant8H{X-oyGS(~fpen5;h(<2pJ9!vBP*Nx!I9JxJP` z&aG)rVyN|a;r)Y;eO;S)=tY`xsc)I?kLfx(#FeAUyTmC6hJVb^Yh6We=oS86qw>2` zfxeBpvupB&2cE9$<_SFYh|})4QTUUkq5;44b!)PWIJk1g3%`4%@ZqoD+iE+dy_dyl z>A}oFl0-@ABJ-i1m;P`!OAn0^OQ)~0(Xgyd3wEbQv5O!t-89N|B-I9#r+x)+r|qfV z;@k0&Tf;!p`4V~7bWEVBqM^#j5bD=-R|!-|7AL%Dp&K;3U?Li9$CNS@prS0f1N6XX;=OZjGV~242{aV=~F1_ zbV=leAebL`6ae|r0l+i4qX5Y%85{1VO~T%^O}NBk6$(_K)$a*hROTC)p5mQFD)%2h zbr)gJ@0v9vi(SD&`lysL)5adEZ#|7Z+l&W)LY=E;3I1TvaDfAAi~hO5!4voY>1N*h zwO14wrW6ON)5_3*bO$|Tr2tvUWNlNW6s2hswhMLJQ9;^|N;}V#E^U)ZsNv`|&|e3G zsB%j?%+PCkGKz*YGAXd#R3@8{6E67B8#E=)y70Twz4uw5`gv!CG7WtGBI%Vv<{iJf zivDa>3bQJDe{CUbugpgObsJ6L@hhn6EwPR? z<_nuDxYrau`T-xS>H|<~^DPwKtu6IBCU-6ZuD#T8tM*83VyARaEujkPfXYUgA*4go zyDQTy)qA5f)LDiO%4q#PpjAeo3RD?^ZmUdhJkS7>k-h7KOpcQ11{JiN9MwYy)5OoS K{P#T9TmBZlRb`g| literal 0 HcmV?d00001 diff --git a/test.ipynb b/test.ipynb new file mode 100644 index 0000000..9cd4979 --- /dev/null +++ b/test.ipynb @@ -0,0 +1,850 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from DataContainer.ReadData import read_hdf5_file" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# filepath = \"//DyLabNAS/Data/Evaporative_Cooling/2023/04/18/0003/*.h5\"\n", + "# filepath = \"//DyLabNAS/Data/Evaporative_Cooling/2023/04/18/0003/2023-04-18_0003_Evaporative_Cooling_000.h5\"\n", + "\n", + "filepath = \"//DyLabNAS/Data/Repetition_scan/2023/04/21/0000/*.h5\"\n", + "\n", + "groupList = [\n", + " \"images/MOT_3D_Camera/in_situ_absorption\",\n", + " \"images/ODT_1_Axis_Camera/in_situ_absorption\",\n", + "]\n", + "\n", + "dskey = {\n", + " \"images/MOT_3D_Camera/in_situ_absorption\": \"camera_1\",\n", + " \"images/ODT_1_Axis_Camera/in_situ_absorption\": \"camera_2\",\n", + "}\n", + "\n", + "ds = {\n", + " dskey[groupList[i]]: read_hdf5_file(filepath, groupList[i])\n", + " for i in range(len(groupList))\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:       (initial_freq: 10, runs: 3, x: 1200, y: 1920)\n",
+       "Coordinates:\n",
+       "  * initial_freq  (initial_freq) float64 100.8 101.0 101.2 ... 102.4 102.6 102.8\n",
+       "  * runs          (runs) float64 0.0 1.0 2.0\n",
+       "Dimensions without coordinates: x, y\n",
+       "Data variables:\n",
+       "    atoms         (initial_freq, runs, x, y) uint16 dask.array<chunksize=(1, 1, 1200, 1920), meta=np.ndarray>\n",
+       "    background    (initial_freq, runs, x, y) uint16 dask.array<chunksize=(1, 1, 1200, 1920), meta=np.ndarray>\n",
+       "    dark          (initial_freq, runs, x, y) uint16 dask.array<chunksize=(1, 1, 1200, 1920), meta=np.ndarray>\n",
+       "    shotNum       (initial_freq, runs) <U2 '09' '19' '29' ... '05' '15' '25'\n",
+       "Attributes: (12/96)\n",
+       "    TOF_free:                          0.02\n",
+       "    abs_img_freq:                      110.858\n",
+       "    absorption_imaging_flag:           True\n",
+       "    backup_data:                       True\n",
+       "    blink_off_time:                    nan\n",
+       "    blink_on_time:                     nan\n",
+       "    ...                                ...\n",
+       "    z_offset_img:                      0.189\n",
+       "    mot_3d_freq:                       initial_freq\n",
+       "    initial_freq:                      [101.67 102.36 102.13 100.98 102.59 10...\n",
+       "    runs:                              [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1...\n",
+       "    scanAxis:                          ['initial_freq', 'runs']\n",
+       "    scanAxisLength:                    [30. 30.]
" + ], + "text/plain": [ + "\n", + "Dimensions: (initial_freq: 10, runs: 3, x: 1200, y: 1920)\n", + "Coordinates:\n", + " * initial_freq (initial_freq) float64 100.8 101.0 101.2 ... 102.4 102.6 102.8\n", + " * runs (runs) float64 0.0 1.0 2.0\n", + "Dimensions without coordinates: x, y\n", + "Data variables:\n", + " atoms (initial_freq, runs, x, y) uint16 dask.array\n", + " background (initial_freq, runs, x, y) uint16 dask.array\n", + " dark (initial_freq, runs, x, y) uint16 dask.array\n", + " shotNum (initial_freq, runs)