analyseScript/ToolFunction/ToolFunction.py

import glob
from datetime import date
import copy

import numpy as np
from uncertainties import unumpy as unp

import xarray as xr


def get_mask(dataArray):
    """Generate a bool mask array for given dataArray

    :param dataArray: The given dataArray
    :type dataArray: xarray DataArray
    :return: the mask array
    :rtype: numpy array of bool elements
    """
    return np.ones(dataArray.shape, dtype=bool)


def remove_bad_shots(dataArray, **kwargs):
    """Copy and remove bad shots from the dataArray by setting the value to np.nan.
    If you want fully delete those nan data, please use the function xarray.DataArray.dropna() (see this link https://docs.xarray.dev/en/stable/generated/xarray.DataArray.dropna.html).
    Here is an example for indexing the bad shots: remove_bad_shots(dataArray, axis_1 = the value (not index) of axis_1, axis_2 = the value of axis_2, ...)
    For more detials please read 'Positional indexing' section in this link https://docs.xarray.dev/en/stable/user-guide/indexing.html#positional-indexing.

    :param dataArray: The given dataArray
    :type dataArray: xarray DataArray
    :return: The dataArray after removement
    :rtype: xarray DataArray
    """
    dataArray = copy.deepcopy(dataArray)
    dataArray.loc[dict(kwargs)] = np.nan
    return dataArray


def auto_rechunk(dataSet):
    """Rechunk the dataSet or dataArray using auto rechunk function

    :param dataSet: The given dataArray or dataSet
    :type dataSet: xarray DataArray or xarray DataSet
    :return: The chuncked dataArray or dataSet
    :rtype: xarray DataArray or xarray DataSet
    """
    kwargs = {
        key: "auto"
        for key in dataSet.dims
    }
    return dataSet.chunk(**kwargs)


def copy_chunk(dataSet, dataChunk):
    """Copy the chunk and apply to another dataArray or dataSet

    :param dataSet: The dataArray or dataSet will be chunked
    :type dataSet: xarray DataArray or xarray DataSet
    :param dataChunk: The dataArray or dataSet giving the chunk
    :type dataChunk: xarray DataArray or xarray DataSet
    :return: The chuncked dataArray or dataSet
    :rtype: xarray DataArray or xarray DataSet
    """
    kwargs = {
        key: dataChunk.chunksizes[key]
        for key in dataChunk.chunksizes
        if key in dataSet.dims
    }
    return dataSet.chunk(**kwargs)


def get_h5_file_path(folderpath, maxFileNum=None, filename='*.h5',):
    """Get all the path of HDF5 files in specific folder

    :param folderpath: the path of the folder
    :type folderpath: str
    :param maxFileNum: the maximal number of returned files, defaults to None
    :type maxFileNum: int, optional
    :param filename: a string to specify the type of the file to read, defaults to '*.h5'
    :type filename: str, optional
    :return: the found file path
    :rtype: 1D numpy array
    """
    filepath = np.sort(glob.glob(folderpath + filename))
    if maxFileNum is None:
        return filepath
    else:
        return filepath[:maxFileNum]


def get_date():
    """Return the date of today in a format compatible with file path

    :return: the date of today in a format compatible with file path
    :rtype: str
    """
    today = date.today()
    return today.strftime("%Y/%m/%d")


def _combine_uncertainty(value, std):
    """Give a list of value and standard deviation, combine them to a number with unceuncertainty (ufloat), and return them in another list.
    See this link https://pythonhosted.org/uncertainties/

    :param value: The value
    :type value: float, or array like
    :param std: The standard deviation
    :type std: float, or array like
    :return: The combined value and standard deviation
    :rtype: ufloat, or uncertainties uarray
    """
    return unp.uarray(value, std)


def combine_uncertainty(value, std, dask='parallelized', **kwargs):
    """Give a xarray DataArray of value and standard deviation, combine them to a number with unceuncertainty (ufloat), and return them in another xarray DataArray .
    See this link https://pythonhosted.org/uncertainties/

    :param value: The value
    :type value: xarray DataArray
    :param std: The standard deviation
    :type std: xarray DataArray
    :param dask: over write of the same argument in xarray.apply_ufunc, defaults to 'parallelized'
    :type dask: str, optional
    :return: The combined value and standard deviation
    :rtype: xarray DataArray
    """

    kwargs.update(
            {
                "dask": dask,
            }
        )

    return xr.apply_ufunc(_combine_uncertainty, value, std, **kwargs)

    
def _seperate_uncertainty_single(data):
    """From a number with unceuncertainty, read out the value and standard deviation

    :param data: The number with unceuncertainty 
    :type data: ufloat
    :return: a tuple of (value, standard deviations)
    :rtype: tuple of two floats
    """
    return data.n, data.s


def _seperate_uncertainty(data):
    """From a list of numbers with unceuncertainty, read out the values and standard deviations

    :param data: The list of numbers with unceuncertainty
    :type data: ufloat, or uncertainties uarray
    :return:  a tuple of (a numpy array of value, a numpy array of standard deviations)
    :rtype:  tuple of two numpy arrays
    """
    func = np.vectorize(_seperate_uncertainty_single)
    return func(data)


def seperate_uncertainty(data, dask='parallelized', **kwargs):
    """From a xarray DataArray of numbers with unceuncertainty, read out the values and standard deviations 

    :param data: The xarray DataArray of numbers with unceuncertainty
    :type data: xarray DataArray
    :param dask: over write of the same argument in xarray.apply_ufunc, defaults to 'parallelized'
    :type dask: str, optional
    :return: a tuple of (a xarray DataArray of value, a xarray DataArray of standard deviations)
    :rtype: tuple of two xarray DataArray
    """
    
    kwargs.update(
            {
                "dask": dask,
                "output_core_dims": [[], []],
            }
        )

    return xr.apply_ufunc(_seperate_uncertainty, data, **kwargs)


def get_scanAxis(dataSet):
    """Give a numpy array of names of scan axes.

    :param dataSet: The xarray DataSet stored the data.
    :type dataSet: xarray DataSet
    :return: The names of scan axes
    :rtype: a numpy array
    """
    
    res = dataSet.scanAxis

    if len(res) == 0:
        res = [None, None]
    elif len(res) == 1:
        res = [res[0], None]
    elif len(res) == 2 and res[0] == 'runs':
        res = [res[1], res[0]]
    
    return res


def print_scanAxis(dataSet):
    """Print the names and the values of scan axes.

    :param dataSet: The xarray DataSet stored the data.
    :type dataSet: xarray DataSet
    """
    scanAxis = dataSet.scanAxis

    scan = {}

    for key in scanAxis:
        scanValue = np.array(dataSet[key])
        scanValue, indices = np.unique(scanValue, return_index=True)
        scan.update(
            {
                key: scanValue[indices]
            }
        )
    print("The detected scaning axes and values are: \n")
    print(scan)
  
    
def print_scanAxis_original(dataSet):
    # print the original (unsorted) scan axes.
    pass
    

def calculate_mean(dataSet):
    """Calculte the mean along repetition axis 'runs'

    :param dataSet: The xarray DataSet or DataArray stored the data.
    :type dataSet: xarray DataSet or DataArray
    :return: The mean value
    :rtype: xarray DataSet or DataArray
    """
    if 'runs' in dataSet.dims:
        return dataSet.mean(dim='runs')
    else:
        return dataSet
    

def calculate_std(dataSet):
    """Calculte the standard deviation along repetition axis 'runs'

    :param dataSet: The xarray DataSet or DataArray stored the data.
    :type dataSet: xarray DataSet or DataArray
    :return: The standard deviation
    :rtype: xarray DataSet or DataArray
    """
    if 'runs' in dataSet.dims:
        return dataSet.std(dim='runs')
    else:
        return None


def extract_temperature_from_fit():
    # by giving the shot number, read data, fit the center of could and fit with ToF, give the temperature of the clound
    pass


def extract_condensate_fraction_from_fit():
    # by giving the shot number, read data, fit the of could, give the condenstate fraction of the clound 
    pass

def swap_xy(dataSet):
    """Swap the x ans y axis.

    :param dataSet: The xarray DataSet or DataArray stored the data.
    :type dataSet: xarray DataSet or DataArray
    :return: The xarray DataSet or DataArray with swapped x and y axis.
    :rtype: xarray DataSet or DataArray
    """
    dataSet = dataSet.rename_dims(dict(x='__x'))
    dataSet = dataSet.rename_dims(dict(y='x'))
    dataSet = dataSet.rename_dims(dict(__x='y'))
    return dataSet
add tool function 2023-05-04 19:16:35 +02:00			`import glob`
			`from datetime import date`
regular backup 2023-07-03 19:32:51 +02:00			`import copy`
first working version 2023-05-04 18:32:17 +02:00
debug and nwe functions 2023-05-07 00:38:52 +02:00			`import numpy as np`
			`from uncertainties import unumpy as unp`

			`import xarray as xr`

first working version 2023-05-04 18:32:17 +02:00
add tool function 2023-05-04 19:16:35 +02:00			`def get_mask(dataArray):`
regular backup 2023-07-05 12:15:03 +02:00			`"""Generate a bool mask array for given dataArray`
regular backup 2023-07-03 19:32:51 +02:00
			`:param dataArray: The given dataArray`
			`:type dataArray: xarray DataArray`
			`:return: the mask array`
			`:rtype: numpy array of bool elements`
			`"""`
add tool function 2023-05-04 19:16:35 +02:00			`return np.ones(dataArray.shape, dtype=bool)`
first working version 2023-05-04 18:32:17 +02:00

add tool function 2023-05-04 19:16:35 +02:00			`def remove_bad_shots(dataArray, **kwargs):`
regular backup 2023-07-05 12:15:03 +02:00			`"""Copy and remove bad shots from the dataArray by setting the value to np.nan.`
			`If you want fully delete those nan data, please use the function xarray.DataArray.dropna() (see this link https://docs.xarray.dev/en/stable/generated/xarray.DataArray.dropna.html).`
			`Here is an example for indexing the bad shots: remove_bad_shots(dataArray, axis_1 = the value (not index) of axis_1, axis_2 = the value of axis_2, ...)`
			`For more detials please read 'Positional indexing' section in this link https://docs.xarray.dev/en/stable/user-guide/indexing.html#positional-indexing.`
regular backup 2023-07-03 19:32:51 +02:00
			`:param dataArray: The given dataArray`
			`:type dataArray: xarray DataArray`
			`:return: The dataArray after removement`
			`:rtype: xarray DataArray`
			`"""`
			`dataArray = copy.deepcopy(dataArray)`
add tool function 2023-05-04 19:16:35 +02:00			`dataArray.loc[dict(kwargs)] = np.nan`
regular backup 2023-07-03 19:32:51 +02:00			`return dataArray`
first working version 2023-05-04 18:32:17 +02:00

add tool function 2023-05-04 19:16:35 +02:00			`def auto_rechunk(dataSet):`
regular backup 2023-07-05 12:15:03 +02:00			`"""Rechunk the dataSet or dataArray using auto rechunk function`
regular backup 2023-07-03 19:32:51 +02:00
			`:param dataSet: The given dataArray or dataSet`
			`:type dataSet: xarray DataArray or xarray DataSet`
			`:return: The chuncked dataArray or dataSet`
			`:rtype: xarray DataArray or xarray DataSet`
			`"""`
add tool function 2023-05-04 19:16:35 +02:00			`kwargs = {`
			`key: "auto"`
not finished 2023-05-05 18:25:03 +02:00			`for key in dataSet.dims`
add tool function 2023-05-04 19:16:35 +02:00			`}`
			`return dataSet.chunk(**kwargs)`
first working version 2023-05-04 18:32:17 +02:00

not finish get_fit_result 2023-05-06 11:23:38 +02:00			`def copy_chunk(dataSet, dataChunk):`
regular backup 2023-07-05 12:15:03 +02:00			`"""Copy the chunk and apply to another dataArray or dataSet`
regular backup 2023-07-03 19:32:51 +02:00
			`:param dataSet: The dataArray or dataSet will be chunked`
			`:type dataSet: xarray DataArray or xarray DataSet`
			`:param dataChunk: The dataArray or dataSet giving the chunk`
			`:type dataChunk: xarray DataArray or xarray DataSet`
			`:return: The chuncked dataArray or dataSet`
			`:rtype: xarray DataArray or xarray DataSet`
			`"""`
not finish get_fit_result 2023-05-06 11:23:38 +02:00			`kwargs = {`
			`key: dataChunk.chunksizes[key]`
			`for key in dataChunk.chunksizes`
			`if key in dataSet.dims`
			`}`
			`return dataSet.chunk(**kwargs)`


add tool function 2023-05-04 19:16:35 +02:00			`def get_h5_file_path(folderpath, maxFileNum=None, filename='*.h5',):`
regular backup 2023-07-05 12:15:03 +02:00			`"""Get all the path of HDF5 files in specific folder`
regular backup 2023-07-03 19:32:51 +02:00
regular backup 2023-07-05 12:15:03 +02:00			`:param folderpath: the path of the folder`
			`:type folderpath: str`
			`:param maxFileNum: the maximal number of returned files, defaults to None`
			`:type maxFileNum: int, optional`
			`:param filename: a string to specify the type of the file to read, defaults to '*.h5'`
regular backup 2023-07-03 19:32:51 +02:00			`:type filename: str, optional`
regular backup 2023-07-05 12:15:03 +02:00			`:return: the found file path`
			`:rtype: 1D numpy array`
regular backup 2023-07-03 19:32:51 +02:00			`"""`
add tool function 2023-05-04 19:16:35 +02:00			`filepath = np.sort(glob.glob(folderpath + filename))`
			`if maxFileNum is None:`
			`return filepath`
			`else:`
			`return filepath[:maxFileNum]`


			`def get_date():`
regular backup 2023-07-05 12:15:03 +02:00			`"""Return the date of today in a format compatible with file path`

			`:return: the date of today in a format compatible with file path`
			`:rtype: str`
			`"""`
add tool function 2023-05-04 19:16:35 +02:00			`today = date.today()`
Debug 2023-05-08 11:47:35 +02:00			`return today.strftime("%Y/%m/%d")`
not finished 2023-05-05 18:25:03 +02:00
not finish get_fit_result 2023-05-06 11:23:38 +02:00
debug and nwe functions 2023-05-07 00:38:52 +02:00			`def _combine_uncertainty(value, std):`
regular backup 2023-07-05 12:15:03 +02:00			`"""Give a list of value and standard deviation, combine them to a number with unceuncertainty (ufloat), and return them in another list.`
			`See this link https://pythonhosted.org/uncertainties/`

			`:param value: The value`
			`:type value: float, or array like`
			`:param std: The standard deviation`
			`:type std: float, or array like`
			`:return: The combined value and standard deviation`
			`:rtype: ufloat, or uncertainties uarray`
			`"""`
debug and nwe functions 2023-05-07 00:38:52 +02:00			`return unp.uarray(value, std)`


			`def combine_uncertainty(value, std, dask='parallelized', **kwargs):`
regular backup 2023-07-05 12:15:03 +02:00			`"""Give a xarray DataArray of value and standard deviation, combine them to a number with unceuncertainty (ufloat), and return them in another xarray DataArray .`
			`See this link https://pythonhosted.org/uncertainties/`

			`:param value: The value`
			`:type value: xarray DataArray`
			`:param std: The standard deviation`
			`:type std: xarray DataArray`
			`:param dask: over write of the same argument in xarray.apply_ufunc, defaults to 'parallelized'`
			`:type dask: str, optional`
			`:return: The combined value and standard deviation`
			`:rtype: xarray DataArray`
			`"""`
debug and nwe functions 2023-05-07 00:38:52 +02:00
			`kwargs.update(`
			`{`
			`"dask": dask,`
			`}`
			`)`

			`return xr.apply_ufunc(_combine_uncertainty, value, std, **kwargs)`

not finished 2023-05-05 18:25:03 +02:00
debug and nwe functions 2023-05-07 00:38:52 +02:00			`def _seperate_uncertainty_single(data):`
regular backup 2023-07-05 12:15:03 +02:00			`"""From a number with unceuncertainty, read out the value and standard deviation`

			`:param data: The number with unceuncertainty`
			`:type data: ufloat`
			`:return: a tuple of (value, standard deviations)`
			`:rtype: tuple of two floats`
			`"""`
debug and nwe functions 2023-05-07 00:38:52 +02:00			`return data.n, data.s`

regular backup 2023-05-08 17:48:53 +02:00
debug and nwe functions 2023-05-07 00:38:52 +02:00			`def _seperate_uncertainty(data):`
regular backup 2023-07-05 12:15:03 +02:00			`"""From a list of numbers with unceuncertainty, read out the values and standard deviations`

			`:param data: The list of numbers with unceuncertainty`
			`:type data: ufloat, or uncertainties uarray`
			`:return: a tuple of (a numpy array of value, a numpy array of standard deviations)`
			`:rtype: tuple of two numpy arrays`
			`"""`
debug and nwe functions 2023-05-07 00:38:52 +02:00			`func = np.vectorize(_seperate_uncertainty_single)`
			`return func(data)`

regular backup 2023-05-08 17:48:53 +02:00
debug and nwe functions 2023-05-07 00:38:52 +02:00			`def seperate_uncertainty(data, dask='parallelized', **kwargs):`
regular backup 2023-07-05 12:15:03 +02:00			`"""From a xarray DataArray of numbers with unceuncertainty, read out the values and standard deviations`

			`:param data: The xarray DataArray of numbers with unceuncertainty`
			`:type data: xarray DataArray`
			`:param dask: over write of the same argument in xarray.apply_ufunc, defaults to 'parallelized'`
			`:type dask: str, optional`
			`:return: a tuple of (a xarray DataArray of value, a xarray DataArray of standard deviations)`
			`:rtype: tuple of two xarray DataArray`
			`"""`
not finished 2023-05-05 18:25:03 +02:00
debug and nwe functions 2023-05-07 00:38:52 +02:00			`kwargs.update(`
			`{`
			`"dask": dask,`
			`"output_core_dims": [[], []],`
			`}`
			`)`

The first working version 2023-05-07 23:41:31 +02:00			`return xr.apply_ufunc(_seperate_uncertainty, data, **kwargs)`


			`def get_scanAxis(dataSet):`
regular backup 2023-07-05 12:15:03 +02:00			`"""Give a numpy array of names of scan axes.`

			`:param dataSet: The xarray DataSet stored the data.`
			`:type dataSet: xarray DataSet`
			`:return: The names of scan axes`
			`:rtype: a numpy array`
			`"""`

The first working version 2023-05-07 23:41:31 +02:00			`res = dataSet.scanAxis`

			`if len(res) == 0:`
			`res = [None, None]`
			`elif len(res) == 1:`
			`res = [res[0], None]`
			`elif len(res) == 2 and res[0] == 'runs':`
			`res = [res[1], res[0]]`

			`return res`

regular backup 2023-05-08 17:48:53 +02:00
The first working version 2023-05-07 23:41:31 +02:00			`def print_scanAxis(dataSet):`
regular backup 2023-07-05 12:15:03 +02:00			`"""Print the names and the values of scan axes.`

			`:param dataSet: The xarray DataSet stored the data.`
			`:type dataSet: xarray DataSet`
			`"""`
The first working version 2023-05-07 23:41:31 +02:00			`scanAxis = dataSet.scanAxis`

			`scan = {}`

			`for key in scanAxis:`
			`scanValue = np.array(dataSet[key])`
			`scanValue, indices = np.unique(scanValue, return_index=True)`
			`scan.update(`
			`{`
			`key: scanValue[indices]`
			`}`
			`)`
bugs fixing 2023-05-08 16:57:58 +02:00			`print("The detected scaning axes and values are: \n")`
Add function to calculate mean and std 2023-05-08 11:52:21 +02:00			`print(scan)`
regular backup 2023-07-05 12:15:03 +02:00

			`def print_scanAxis_original(dataSet):`
			`# print the original (unsorted) scan axes.`
			`pass`
Add function to calculate mean and std 2023-05-08 11:52:21 +02:00

			`def calculate_mean(dataSet):`
regular backup 2023-07-05 12:15:03 +02:00			`"""Calculte the mean along repetition axis 'runs'`

			`:param dataSet: The xarray DataSet or DataArray stored the data.`
			`:type dataSet: xarray DataSet or DataArray`
			`:return: The mean value`
			`:rtype: xarray DataSet or DataArray`
			`"""`
Add function to calculate mean and std 2023-05-08 11:52:21 +02:00			`if 'runs' in dataSet.dims:`
			`return dataSet.mean(dim='runs')`
			`else:`
			`return dataSet`


			`def calculate_std(dataSet):`
regular backup 2023-07-05 12:15:03 +02:00			`"""Calculte the standard deviation along repetition axis 'runs'`

			`:param dataSet: The xarray DataSet or DataArray stored the data.`
			`:type dataSet: xarray DataSet or DataArray`
			`:return: The standard deviation`
			`:rtype: xarray DataSet or DataArray`
			`"""`
Add function to calculate mean and std 2023-05-08 11:52:21 +02:00			`if 'runs' in dataSet.dims:`
debug 2023-05-25 19:11:19 +02:00			`return dataSet.std(dim='runs')`
Add function to calculate mean and std 2023-05-08 11:52:21 +02:00			`else:`
regular backup 2023-05-08 17:48:53 +02:00			`return None`


			`def extract_temperature_from_fit():`
regular backup 2023-07-05 12:15:03 +02:00			`# by giving the shot number, read data, fit the center of could and fit with ToF, give the temperature of the clound`
regular backup 2023-05-08 17:48:53 +02:00			`pass`


			`def extract_condensate_fraction_from_fit():`
regular backup 2023-07-05 12:15:03 +02:00			`# by giving the shot number, read data, fit the of could, give the condenstate fraction of the clound`
Implentment new function 2023-05-16 15:51:13 +02:00			`pass`

			`def swap_xy(dataSet):`
regular backup 2023-07-05 12:15:03 +02:00			`"""Swap the x ans y axis.`

			`:param dataSet: The xarray DataSet or DataArray stored the data.`
			`:type dataSet: xarray DataSet or DataArray`
			`:return: The xarray DataSet or DataArray with swapped x and y axis.`
			`:rtype: xarray DataSet or DataArray`
			`"""`
Implentment new function 2023-05-16 15:51:13 +02:00			`dataSet = dataSet.rename_dims(dict(x='__x'))`
			`dataSet = dataSet.rename_dims(dict(y='x'))`
			`dataSet = dataSet.rename_dims(dict(__x='y'))`
			`return dataSet`