diff --git a/Analyser/FitAnalyser.py b/Analyser/FitAnalyser.py index 0115501..5d49d40 100644 --- a/Analyser/FitAnalyser.py +++ b/Analyser/FitAnalyser.py @@ -568,11 +568,11 @@ class FitAnalyser(): :type input_core_dims: list or array like, optional :param dask: over write of the same argument in xarray.apply_ufunc,, defaults to 'parallelized' :type dask: str, optional - :param vectorize: over write of the same argument in xarray.apply_ufunc,, defaults to True + :param vectorize: over write of the same argument in xarray.apply_ufunc, defaults to True :type vectorize: bool, optional - :param keep_attrs: over write of the same argument in xarray.apply_ufunc,, defaults to True + :param keep_attrs: over write of the same argument in xarray.apply_ufunc, defaults to True :type keep_attrs: bool, optional - :param daskKwargs: over write of the same argument in xarray.apply_ufunc,, defaults to None + :param daskKwargs: over write of the same argument in xarray.apply_ufunc, defaults to None :type daskKwargs: dict, optional :return: The guessed initial parameters for the fit :rtype: xarray DataArray @@ -891,7 +891,7 @@ class FitAnalyser(): return res.reshape(shape, order='F') def eval(self, fitResultArray, x=None, y=None, output_core_dims=None, prefix="", dask='parallelized', vectorize=True, daskKwargs=None, **kwargs): - """_summary_ + """Call the eval function of the fit model to calculate the curve. :param fitResultArray: The result of fit :type fitResultArray: xarray DataArray diff --git a/DataContainer/ReadData.py b/DataContainer/ReadData.py index 91eefa6..0da41e9 100644 --- a/DataContainer/ReadData.py +++ b/DataContainer/ReadData.py @@ -11,7 +11,17 @@ from datetime import datetime def _read_globals_attrs(variable_attrs, context=None): - """Combine attributes from different variables according to combine_attrs""" + """Find global parameters of shots, including scan axes. + + :param variable_attrs: The attrs of current shot. + :type variable_attrs: dict + :param context: _description_, defaults to None + :type context: _type_, optional + :return: The globals attrs of the whole shot. + :rtype: dict + """ + + # Combine attributes from different variables according to combine_attrs if not variable_attrs: # no attributes to merge return None @@ -86,12 +96,31 @@ def _read_globals_attrs(variable_attrs, context=None): def _read_shot_number_from_hdf5(x): + """Add the current shot number to the data read from HDF5 file. + + :param x: The data of current shot + :type x: xarray DataArray + :return: The data with current shot number + :rtype: xarray DataArray + """ filePath = x.encoding["source"] shotNum = filePath.split("_")[-1].split("_")[-1].split(".")[0] return x.assign(shotNum=shotNum) def _assign_scan_axis_partial(x, datesetOfGlobal, fullFilePath): + """Find and add the scan axes to the data read from HDF5 file. + + :param x: The data of current shot + :type x: xarray DataArray + :param datesetOfGlobal: The xarray DataSet stored the information of global parameters + :type datesetOfGlobal: xarray DataSet + :param fullFilePath: The full and absolute file path of current shot + :type fullFilePath: str + :return: The data of current shot with scan axes + :rtype: xarray DataArray + """ + scanAxis = datesetOfGlobal.scanAxis filePath = x.encoding["source"].replace("\\", "/") shotNum = np.where(fullFilePath==filePath) @@ -109,15 +138,42 @@ def _assign_scan_axis_partial(x, datesetOfGlobal, fullFilePath): def _update_globals_attrs(variable_attrs, context=None): + # for live plot panel pass def update_hdf5_file(): + # for live plot panel pass def read_hdf5_file(filePath, group=None, datesetOfGlobal=None, preprocess=None, join="outer", parallel=True, engine="h5netcdf", phony_dims="access", excludeAxis=[], maxFileNum=None, **kwargs): - + """Read the data from HDF5 files in given path. + + :param filePath: The path of HDF5 files, which python glob.glob() can read. It has to end with '.h5'. + :type filePath: str + :param group: The path of the group in HDF5 file where data is, defaults to None. Please use '/', instead of '\\' + :type group: str, optional + :param datesetOfGlobal: A xarry dataSet stored the global parameters of the data, defaults to None + :type datesetOfGlobal: xarry DataSet, optional + :param preprocess: The function you want to run for each file after read before combination, defaults to None + :type preprocess: a handle to function, optional + :param join: over write of the same argument in xarray.open_mfdataset, defaults to "outer" + :type join: str, optional + :param parallel: over write of the same argument in xarray.open_mfdataset, defaults to True + :type parallel: bool, optional + :param engine: The engine to read HDF5 file, defaults to "h5netcdf" + :type engine: str, optional + :param phony_dims: Please read the introduction of h5netcdf package, defaults to "access" + :type phony_dims: str, optional + :param excludeAxis: The name of axes, whose value changes together with scan axes, defaults to [] + :type excludeAxis: list, optional + :param maxFileNum: The maximal number of files to read, defaults to None + :type maxFileNum: int, optional + :return: A xarray dataSet contain the data read from specified HDF5 file, including scan axes and shot number. + :rtype: xarray DataSet + """ + filePath = np.sort(np.atleast_1d(filePath)) filePathAbs = [] @@ -192,6 +248,18 @@ def read_hdf5_file(filePath, group=None, datesetOfGlobal=None, preprocess=None, def _assign_scan_axis_partial_and_remove_everything(x, datesetOfGlobal, fullFilePath): + """Find ONLY and add ONLY the scan axes to the data read from HDF5 file. + + :param x: The data of current shot + :type x: xarray DataArray + :param datesetOfGlobal: The xarray DataSet stored the information of global parameters + :type datesetOfGlobal: xarray DataSet + :param fullFilePath: The full and absolute file path of current shot + :type fullFilePath: str + :return: The data of current shot with scan axes + :rtype: xarray DataArray + """ + scanAxis = datesetOfGlobal.scanAxis filePath = x.encoding["source"].replace("\\", "/") shotNum = np.where(fullFilePath==filePath) @@ -209,12 +277,44 @@ def _assign_scan_axis_partial_and_remove_everything(x, datesetOfGlobal, fullFile def _read_run_time_from_hdf5(x): + """Find the run time of give data read from HDF5 file. + + :param x: The data of current shot + :type x: xarray DataArray + :return: The data of current shot with last modification time + :rtype: xarray DataArray + """ runTime = datetime.strptime(x.attrs['run time'], '%Y%m%dT%H%M%S') return runTime def read_hdf5_run_time(filePath, group=None, datesetOfGlobal=None, preprocess=None, join="outer", parallel=True, engine="h5netcdf", phony_dims="access", excludeAxis=[], maxFileNum=None, **kwargs): - + """Read the run time from HDF5 files in given path. + + :param filePath: The path of HDF5 files, which python glob.glob() can read. It has to end with '.h5'. + :type filePath: str + :param group: The path of the group in HDF5 file where run time is, defaults to None. Please use '/', instead of '\\' + :type group: str, optional + :param datesetOfGlobal: A xarry dataSet stored the global parameters of the data, defaults to None + :type datesetOfGlobal: xarry DataSet, optional + :param preprocess: The function you want to run for each file after read before combination, defaults to None + :type preprocess: a handle to function, optional + :param join: over write of the same argument in xarray.open_mfdataset, defaults to "outer" + :type join: str, optional + :param parallel: over write of the same argument in xarray.open_mfdataset, defaults to True + :type parallel: bool, optional + :param engine: The engine to read HDF5 file, defaults to "h5netcdf" + :type engine: str, optional + :param phony_dims: Please read the introduction of h5netcdf package, defaults to "access" + :type phony_dims: str, optional + :param excludeAxis: The name of axes, whose value changes together with scan axes, defaults to [] + :type excludeAxis: list, optional + :param maxFileNum: The maximal number of files to read, defaults to None + :type maxFileNum: int, optional + :return: A xarray dataSet contain the data read from specified HDF5 file. + :rtype: xarray DataSet + """ + filePath = np.sort(np.atleast_1d(filePath)) filePathAbs = [] @@ -289,6 +389,29 @@ def read_hdf5_run_time(filePath, group=None, datesetOfGlobal=None, preprocess=No def read_hdf5_global(filePath, preprocess=None, join="outer", combine="nested", parallel=True, engine="h5netcdf", phony_dims="access", excludeAxis=[], maxFileNum=None, **kwargs): + """Read the global parameters and find scan axes, from HDF5 files in given path. + + :param filePath: The path of HDF5 files, which python glob.glob() can read. It has to end with '.h5'. + :type filePath: str + :param preprocess: The function you want to run for each file after read before combination, defaults to None + :type preprocess: a handle to function, optional + :param join: over write of the same argument in xarray.open_mfdataset, defaults to "outer" + :type join: str, optional + :param combine: over write of the same argument in xarray.open_mfdataset, defaults to "nested" + :type combine: str, optional + :param parallel: over write of the same argument in xarray.open_mfdataset, defaults to True + :type parallel: bool, optional + :param engine: The engine to read HDF5 file, defaults to "h5netcdf" + :type engine: str, optional + :param phony_dims: Please read the introduction of h5netcdf package, defaults to "access" + :type phony_dims: str, optional + :param excludeAxis: The name of axes, whose value changes together with scan axes, defaults to [] + :type excludeAxis: list, optional + :param maxFileNum: The maximal number of files to read, defaults to None + :type maxFileNum: int, optional + :return: A xarray dataSet contain the data read from specified HDF5 file. + :rtype: xarray DataSet + """ filePath = np.sort(np.atleast_1d(filePath)) @@ -330,6 +453,13 @@ def read_hdf5_global(filePath, preprocess=None, join="outer", combine="nested", def _read_csv_file_pandas(filePath, **kwargs): + """Read csv file using pandas package function read_csv() + + :param filePath:The path of csv files. + :type filePath: str + :return: A xarray DataSet stored the data + :rtype: xarray DataSet + """ res = pd.read_csv(filePath, **kwargs) @@ -338,6 +468,13 @@ def _read_csv_file_pandas(filePath, **kwargs): def _read_csv_file_dask(filePath, **kwargs): + """Read csv file using dask package function read_csv() + + :param filePath:The path of csv files. + :type filePath: str + :return: A xarray DataSet stored the data + :rtype: xarray DataSet + """ res = df.read_csv(filePath, **kwargs) @@ -346,6 +483,26 @@ def _read_csv_file_dask(filePath, **kwargs): def read_csv_file(filePath, maxFileNum=None, dask='parallelized', vectorize=True, csvEngine='pandas', daskKwargs={}, csvKwargs={}, **kwargs): + """Read the data from csv files in given path. + + :param filePath: The path of csv files, which python glob.glob() can read. It has to end with '.csv'. + :type filePath: str + :param maxFileNum: The maximal number of files to read, defaults to None + :type maxFileNum: int, optional + :param dask: over write of the same argument in xarray.apply_ufunc, defaults to 'parallelized' + :type dask: str, optional + :param vectorize: over write of the same argument in xarray.apply_ufunc, defaults to True + :type vectorize: bool, optional + :param csvEngine: The engine to read csv file, defaults to 'pandas' + :type csvEngine: str, optional + :param daskKwargs: over write of the same argument in xarray.apply_ufunc, defaults to {} + :type daskKwargs: dict, optional + :param csvKwargs: The kwargs send to csvEngine, defaults to {} + :type csvKwargs: dict, optional + :return: A xarray DataSet stored the data + :rtype: xarray DataSet + """ + filePath = np.sort(np.atleast_1d(filePath)) filePathAbs = [] diff --git a/ToolFunction/ToolFunction.py b/ToolFunction/ToolFunction.py index 1a333fc..2cdc246 100644 --- a/ToolFunction/ToolFunction.py +++ b/ToolFunction/ToolFunction.py @@ -9,7 +9,7 @@ import xarray as xr def get_mask(dataArray): - """generate a bool mask array for given dataArray + """Generate a bool mask array for given dataArray :param dataArray: The given dataArray :type dataArray: xarray DataArray @@ -20,7 +20,10 @@ def get_mask(dataArray): def remove_bad_shots(dataArray, **kwargs): - """copy and remove bad shots from the dataArray + """Copy and remove bad shots from the dataArray by setting the value to np.nan. + If you want fully delete those nan data, please use the function xarray.DataArray.dropna() (see this link https://docs.xarray.dev/en/stable/generated/xarray.DataArray.dropna.html). + Here is an example for indexing the bad shots: remove_bad_shots(dataArray, axis_1 = the value (not index) of axis_1, axis_2 = the value of axis_2, ...) + For more detials please read 'Positional indexing' section in this link https://docs.xarray.dev/en/stable/user-guide/indexing.html#positional-indexing. :param dataArray: The given dataArray :type dataArray: xarray DataArray @@ -33,7 +36,7 @@ def remove_bad_shots(dataArray, **kwargs): def auto_rechunk(dataSet): - """rechunk the dataSet or dataArray using auto rechunk function + """Rechunk the dataSet or dataArray using auto rechunk function :param dataSet: The given dataArray or dataSet :type dataSet: xarray DataArray or xarray DataSet @@ -48,7 +51,7 @@ def auto_rechunk(dataSet): def copy_chunk(dataSet, dataChunk): - """copy the chunk and apply to another dataArray or dataSet + """Copy the chunk and apply to another dataArray or dataSet :param dataSet: The dataArray or dataSet will be chunked :type dataSet: xarray DataArray or xarray DataSet @@ -66,16 +69,16 @@ def copy_chunk(dataSet, dataChunk): def get_h5_file_path(folderpath, maxFileNum=None, filename='*.h5',): - """_summary_ + """Get all the path of HDF5 files in specific folder - :param folderpath: _description_ - :type folderpath: _type_ - :param maxFileNum: _description_, defaults to None - :type maxFileNum: _type_, optional - :param filename: _description_, defaults to '*.h5' + :param folderpath: the path of the folder + :type folderpath: str + :param maxFileNum: the maximal number of returned files, defaults to None + :type maxFileNum: int, optional + :param filename: a string to specify the type of the file to read, defaults to '*.h5' :type filename: str, optional - :return: _description_ - :rtype: _type_ + :return: the found file path + :rtype: 1D numpy array """ filepath = np.sort(glob.glob(folderpath + filename)) if maxFileNum is None: @@ -85,15 +88,42 @@ def get_h5_file_path(folderpath, maxFileNum=None, filename='*.h5',): def get_date(): + """Return the date of today in a format compatible with file path + + :return: the date of today in a format compatible with file path + :rtype: str + """ today = date.today() return today.strftime("%Y/%m/%d") def _combine_uncertainty(value, std): + """Give a list of value and standard deviation, combine them to a number with unceuncertainty (ufloat), and return them in another list. + See this link https://pythonhosted.org/uncertainties/ + + :param value: The value + :type value: float, or array like + :param std: The standard deviation + :type std: float, or array like + :return: The combined value and standard deviation + :rtype: ufloat, or uncertainties uarray + """ return unp.uarray(value, std) def combine_uncertainty(value, std, dask='parallelized', **kwargs): + """Give a xarray DataArray of value and standard deviation, combine them to a number with unceuncertainty (ufloat), and return them in another xarray DataArray . + See this link https://pythonhosted.org/uncertainties/ + + :param value: The value + :type value: xarray DataArray + :param std: The standard deviation + :type std: xarray DataArray + :param dask: over write of the same argument in xarray.apply_ufunc, defaults to 'parallelized' + :type dask: str, optional + :return: The combined value and standard deviation + :rtype: xarray DataArray + """ kwargs.update( { @@ -105,15 +135,38 @@ def combine_uncertainty(value, std, dask='parallelized', **kwargs): def _seperate_uncertainty_single(data): + """From a number with unceuncertainty, read out the value and standard deviation + + :param data: The number with unceuncertainty + :type data: ufloat + :return: a tuple of (value, standard deviations) + :rtype: tuple of two floats + """ return data.n, data.s def _seperate_uncertainty(data): + """From a list of numbers with unceuncertainty, read out the values and standard deviations + + :param data: The list of numbers with unceuncertainty + :type data: ufloat, or uncertainties uarray + :return: a tuple of (a numpy array of value, a numpy array of standard deviations) + :rtype: tuple of two numpy arrays + """ func = np.vectorize(_seperate_uncertainty_single) return func(data) def seperate_uncertainty(data, dask='parallelized', **kwargs): + """From a xarray DataArray of numbers with unceuncertainty, read out the values and standard deviations + + :param data: The xarray DataArray of numbers with unceuncertainty + :type data: xarray DataArray + :param dask: over write of the same argument in xarray.apply_ufunc, defaults to 'parallelized' + :type dask: str, optional + :return: a tuple of (a xarray DataArray of value, a xarray DataArray of standard deviations) + :rtype: tuple of two xarray DataArray + """ kwargs.update( { @@ -126,6 +179,14 @@ def seperate_uncertainty(data, dask='parallelized', **kwargs): def get_scanAxis(dataSet): + """Give a numpy array of names of scan axes. + + :param dataSet: The xarray DataSet stored the data. + :type dataSet: xarray DataSet + :return: The names of scan axes + :rtype: a numpy array + """ + res = dataSet.scanAxis if len(res) == 0: @@ -139,6 +200,11 @@ def get_scanAxis(dataSet): def print_scanAxis(dataSet): + """Print the names and the values of scan axes. + + :param dataSet: The xarray DataSet stored the data. + :type dataSet: xarray DataSet + """ scanAxis = dataSet.scanAxis scan = {} @@ -153,9 +219,21 @@ def print_scanAxis(dataSet): ) print("The detected scaning axes and values are: \n") print(scan) + + +def print_scanAxis_original(dataSet): + # print the original (unsorted) scan axes. + pass def calculate_mean(dataSet): + """Calculte the mean along repetition axis 'runs' + + :param dataSet: The xarray DataSet or DataArray stored the data. + :type dataSet: xarray DataSet or DataArray + :return: The mean value + :rtype: xarray DataSet or DataArray + """ if 'runs' in dataSet.dims: return dataSet.mean(dim='runs') else: @@ -163,6 +241,13 @@ def calculate_mean(dataSet): def calculate_std(dataSet): + """Calculte the standard deviation along repetition axis 'runs' + + :param dataSet: The xarray DataSet or DataArray stored the data. + :type dataSet: xarray DataSet or DataArray + :return: The standard deviation + :rtype: xarray DataSet or DataArray + """ if 'runs' in dataSet.dims: return dataSet.std(dim='runs') else: @@ -170,13 +255,22 @@ def calculate_std(dataSet): def extract_temperature_from_fit(): + # by giving the shot number, read data, fit the center of could and fit with ToF, give the temperature of the clound pass def extract_condensate_fraction_from_fit(): + # by giving the shot number, read data, fit the of could, give the condenstate fraction of the clound pass def swap_xy(dataSet): + """Swap the x ans y axis. + + :param dataSet: The xarray DataSet or DataArray stored the data. + :type dataSet: xarray DataSet or DataArray + :return: The xarray DataSet or DataArray with swapped x and y axis. + :rtype: xarray DataSet or DataArray + """ dataSet = dataSet.rename_dims(dict(x='__x')) dataSet = dataSet.rename_dims(dict(y='x')) dataSet = dataSet.rename_dims(dict(__x='y'))