From 43106834a5f22733d3a4a189fc73255a77260cce Mon Sep 17 00:00:00 2001
From: Gao <gao@physi.uni-heidelberg.de>
Date: Mon, 22 May 2023 19:35:09 +0200
Subject: [PATCH] implement reading from csv

---
 Analyser/FitAnalyser.py   |  2 +-
 DataContainer/ReadData.py | 94 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/Analyser/FitAnalyser.py b/Analyser/FitAnalyser.py
index cc12f87..4fd7591 100644
--- a/Analyser/FitAnalyser.py
+++ b/Analyser/FitAnalyser.py
@@ -284,7 +284,7 @@ class FitAnalyser():
         
         self.fitDim = fitDim
 
-    def print_params_set_templat(self, params=None):
+    def print_params_set_template(self, params=None):
         
         if params is None:
             params = self.fitModel.make_params()
diff --git a/DataContainer/ReadData.py b/DataContainer/ReadData.py
index 799bc95..6928cb8 100644
--- a/DataContainer/ReadData.py
+++ b/DataContainer/ReadData.py
@@ -1,4 +1,6 @@
 import xarray as xr
+import dask.dataframe as df
+import pandas as pd
 import numpy as np
 from collections import OrderedDict
 from functools import partial
@@ -188,6 +190,7 @@ def read_hdf5_file(filePath, group=None, datesetOfGlobal=None, preprocess=None,
         
     return ds
 
+
 def _assign_scan_axis_partial_and_remove_everything(x, datesetOfGlobal, fullFilePath):
     scanAxis = datesetOfGlobal.scanAxis
     filePath = x.encoding["source"].replace("\\", "/")
@@ -208,6 +211,7 @@ def _read_run_time_from_hdf5(x):
     runTime = datetime.strptime(x.attrs['run time'], '%Y%m%dT%H%M%S')
     return runTime
 
+
 def read_hdf5_run_time(filePath, group=None, datesetOfGlobal=None, preprocess=None, join="outer", parallel=True, engine="h5netcdf", phony_dims="access", excludeAxis=[], maxFileNum=None, **kwargs):
 
     filePath = np.sort(np.atleast_1d(filePath))
@@ -282,6 +286,7 @@ def read_hdf5_run_time(filePath, group=None, datesetOfGlobal=None, preprocess=No
         
     return ds
 
+
 def read_hdf5_global(filePath, preprocess=None, join="outer", combine="nested", parallel=True, engine="h5netcdf", phony_dims="access", excludeAxis=[], maxFileNum=None, **kwargs):
     
     filePath = np.sort(np.atleast_1d(filePath))
@@ -321,4 +326,91 @@ def read_hdf5_global(filePath, preprocess=None, join="outer", combine="nested",
     datesetOfGlobal.attrs['scanAxis'] = np.setdiff1d(datesetOfGlobal.attrs['scanAxis'], excludeAxis)
         
     return datesetOfGlobal
-    
\ No newline at end of file
+
+
+def _read_csv_file_pandas(filePath, **kwargs):
+   
+    res = pd.read_csv(filePath, **kwargs)
+        
+    res = xr.Dataset.from_dataframe(res).to_array().to_numpy()
+    return res
+
+
+def _read_csv_file_dask(filePath, **kwargs):
+   
+    res = df.read_csv(filePath, **kwargs)
+        
+    res = xr.Dataset.from_dataframe(res).to_array().to_numpy()
+    return res
+    
+    
+def read_csv_file(filePath, maxFileNum=None, dask='parallelized', vectorize=True, csvEngine='pandas', daskKwargs={}, csvKwargs={}, **kwargs):
+    filePath = np.sort(np.atleast_1d(filePath))
+
+    filePathAbs = []
+
+    for i in range(len(filePath)):
+        filePathAbs.append(os.path.abspath(filePath[i]).replace("\\", "/"))
+
+    fullFilePath = []
+    for i in range(len(filePathAbs)):
+        fullFilePath.append(list(np.sort(glob.glob(filePathAbs[i]))))
+    fullFilePath = np.array(fullFilePath).flatten()
+
+    for i in range(len(fullFilePath)):
+        fullFilePath[i] = fullFilePath[i].replace("\\", "/")
+        
+    if not maxFileNum is None:
+        fullFilePath = fullFilePath[0:int(maxFileNum)]
+    
+    if csvEngine=='pandas':
+        res_first = pd.read_csv(fullFilePath[0], **csvKwargs)
+    elif csvEngine=='dask':
+        res_first = df.read_csv(fullFilePath[0], **csvKwargs)
+
+    res_first = xr.Dataset.from_dataframe(res_first)
+        
+    data_vars = list(res_first.keys())
+    
+    # print(data_vars)
+    # print(np.shape(data_vars)[1])
+    
+    if len(np.shape(data_vars)) > 1:
+        data_vars = np.array(
+            [
+                ''.join(data_vars[i]) 
+                for i in range(np.shape(data_vars)[0])
+            ]
+        )
+        
+    fullFilePath = xr.DataArray(
+        data=fullFilePath,
+        dims=['fileIndex']
+    )
+    
+    newDimKey = np.append(['data_vars'], list(res_first.dims.keys()))
+    newDimKey = np.append(newDimKey, ['x', 'y', 'z'])
+    newDimKey = np.append(newDimKey, [ chr(i) for i in range(97, 97+23)])
+    
+    kwargs.update(
+        {
+            'dask': dask, 
+            'vectorize': vectorize,
+            'output_core_dims': [newDimKey[0:len(res_first.dims) + 1]],
+            "dask_gufunc_kwargs": daskKwargs,
+        }
+    )
+
+    if csvEngine=='pandas':
+        res = xr.apply_ufunc(_read_csv_file_pandas, fullFilePath, kwargs=csvKwargs, **kwargs)
+    elif csvEngine=='dask':
+        res = xr.apply_ufunc(_read_csv_file_dask, fullFilePath, kwargs=csvKwargs, **kwargs)
+    
+    res = res.assign_coords({'data_vars': data_vars})
+    
+    res = res.to_dataset(dim='data_vars')
+    
+    for key in list(res_first.coords.keys()):
+        res = res.assign_coords({key: res_first[key]})
+    
+    return res   
\ No newline at end of file