2023-05-04 19:16:35 +02:00
import glob
from datetime import date
2023-07-03 19:32:51 +02:00
import copy
2023-05-04 18:32:17 +02:00
2023-05-07 00:38:52 +02:00
import numpy as np
from uncertainties import unumpy as unp
import xarray as xr
2023-05-04 18:32:17 +02:00
2023-05-04 19:16:35 +02:00
def get_mask ( dataArray ) :
2023-07-05 12:15:03 +02:00
""" Generate a bool mask array for given dataArray
2023-07-03 19:32:51 +02:00
: param dataArray : The given dataArray
: type dataArray : xarray DataArray
: return : the mask array
: rtype : numpy array of bool elements
"""
2023-05-04 19:16:35 +02:00
return np . ones ( dataArray . shape , dtype = bool )
2023-05-04 18:32:17 +02:00
2023-05-04 19:16:35 +02:00
def remove_bad_shots ( dataArray , * * kwargs ) :
2023-07-05 12:15:03 +02:00
""" Copy and remove bad shots from the dataArray by setting the value to np.nan.
If you want fully delete those nan data , please use the function xarray . DataArray . dropna ( ) ( see this link https : / / docs . xarray . dev / en / stable / generated / xarray . DataArray . dropna . html ) .
Here is an example for indexing the bad shots : remove_bad_shots ( dataArray , axis_1 = the value ( not index ) of axis_1 , axis_2 = the value of axis_2 , . . . )
For more detials please read ' Positional indexing ' section in this link https : / / docs . xarray . dev / en / stable / user - guide / indexing . html #positional-indexing.
2023-07-03 19:32:51 +02:00
: param dataArray : The given dataArray
: type dataArray : xarray DataArray
: return : The dataArray after removement
: rtype : xarray DataArray
"""
dataArray = copy . deepcopy ( dataArray )
2023-05-04 19:16:35 +02:00
dataArray . loc [ dict ( kwargs ) ] = np . nan
2023-07-03 19:32:51 +02:00
return dataArray
2023-05-04 18:32:17 +02:00
2023-05-04 19:16:35 +02:00
def auto_rechunk ( dataSet ) :
2023-07-05 12:15:03 +02:00
""" Rechunk the dataSet or dataArray using auto rechunk function
2023-07-03 19:32:51 +02:00
: param dataSet : The given dataArray or dataSet
: type dataSet : xarray DataArray or xarray DataSet
: return : The chuncked dataArray or dataSet
: rtype : xarray DataArray or xarray DataSet
"""
2023-05-04 19:16:35 +02:00
kwargs = {
key : " auto "
2023-05-05 18:25:03 +02:00
for key in dataSet . dims
2023-05-04 19:16:35 +02:00
}
return dataSet . chunk ( * * kwargs )
2023-05-04 18:32:17 +02:00
2023-05-06 11:23:38 +02:00
def copy_chunk ( dataSet , dataChunk ) :
2023-07-05 12:15:03 +02:00
""" Copy the chunk and apply to another dataArray or dataSet
2023-07-03 19:32:51 +02:00
: param dataSet : The dataArray or dataSet will be chunked
: type dataSet : xarray DataArray or xarray DataSet
: param dataChunk : The dataArray or dataSet giving the chunk
: type dataChunk : xarray DataArray or xarray DataSet
: return : The chuncked dataArray or dataSet
: rtype : xarray DataArray or xarray DataSet
"""
2023-05-06 11:23:38 +02:00
kwargs = {
key : dataChunk . chunksizes [ key ]
for key in dataChunk . chunksizes
if key in dataSet . dims
}
return dataSet . chunk ( * * kwargs )
2023-05-04 19:16:35 +02:00
def get_h5_file_path ( folderpath , maxFileNum = None , filename = ' *.h5 ' , ) :
2023-07-05 12:15:03 +02:00
""" Get all the path of HDF5 files in specific folder
2023-07-03 19:32:51 +02:00
2023-07-05 12:15:03 +02:00
: param folderpath : the path of the folder
: type folderpath : str
: param maxFileNum : the maximal number of returned files , defaults to None
: type maxFileNum : int , optional
: param filename : a string to specify the type of the file to read , defaults to ' *.h5 '
2023-07-03 19:32:51 +02:00
: type filename : str , optional
2023-07-05 12:15:03 +02:00
: return : the found file path
: rtype : 1 D numpy array
2023-07-03 19:32:51 +02:00
"""
2023-05-04 19:16:35 +02:00
filepath = np . sort ( glob . glob ( folderpath + filename ) )
if maxFileNum is None :
return filepath
else :
return filepath [ : maxFileNum ]
def get_date ( ) :
2023-07-05 12:15:03 +02:00
""" Return the date of today in a format compatible with file path
: return : the date of today in a format compatible with file path
: rtype : str
"""
2023-05-04 19:16:35 +02:00
today = date . today ( )
2023-05-08 11:47:35 +02:00
return today . strftime ( " % Y/ % m/ %d " )
2023-05-05 18:25:03 +02:00
2023-05-06 11:23:38 +02:00
2023-05-07 00:38:52 +02:00
def _combine_uncertainty ( value , std ) :
2023-07-05 12:15:03 +02:00
""" Give a list of value and standard deviation, combine them to a number with unceuncertainty (ufloat), and return them in another list.
See this link https : / / pythonhosted . org / uncertainties /
: param value : The value
: type value : float , or array like
: param std : The standard deviation
: type std : float , or array like
: return : The combined value and standard deviation
: rtype : ufloat , or uncertainties uarray
"""
2023-05-07 00:38:52 +02:00
return unp . uarray ( value , std )
def combine_uncertainty ( value , std , dask = ' parallelized ' , * * kwargs ) :
2023-07-05 12:15:03 +02:00
""" Give a xarray DataArray of value and standard deviation, combine them to a number with unceuncertainty (ufloat), and return them in another xarray DataArray .
See this link https : / / pythonhosted . org / uncertainties /
: param value : The value
: type value : xarray DataArray
: param std : The standard deviation
: type std : xarray DataArray
: param dask : over write of the same argument in xarray . apply_ufunc , defaults to ' parallelized '
: type dask : str , optional
: return : The combined value and standard deviation
: rtype : xarray DataArray
"""
2023-05-07 00:38:52 +02:00
kwargs . update (
{
" dask " : dask ,
}
)
return xr . apply_ufunc ( _combine_uncertainty , value , std , * * kwargs )
2023-05-05 18:25:03 +02:00
2023-05-07 00:38:52 +02:00
def _seperate_uncertainty_single ( data ) :
2023-07-05 12:15:03 +02:00
""" From a number with unceuncertainty, read out the value and standard deviation
: param data : The number with unceuncertainty
: type data : ufloat
: return : a tuple of ( value , standard deviations )
: rtype : tuple of two floats
"""
2023-05-07 00:38:52 +02:00
return data . n , data . s
2023-05-08 17:48:53 +02:00
2023-05-07 00:38:52 +02:00
def _seperate_uncertainty ( data ) :
2023-07-05 12:15:03 +02:00
""" From a list of numbers with unceuncertainty, read out the values and standard deviations
: param data : The list of numbers with unceuncertainty
: type data : ufloat , or uncertainties uarray
: return : a tuple of ( a numpy array of value , a numpy array of standard deviations )
: rtype : tuple of two numpy arrays
"""
2023-05-07 00:38:52 +02:00
func = np . vectorize ( _seperate_uncertainty_single )
return func ( data )
2023-05-08 17:48:53 +02:00
2023-05-07 00:38:52 +02:00
def seperate_uncertainty ( data , dask = ' parallelized ' , * * kwargs ) :
2023-07-05 12:15:03 +02:00
""" From a xarray DataArray of numbers with unceuncertainty, read out the values and standard deviations
: param data : The xarray DataArray of numbers with unceuncertainty
: type data : xarray DataArray
: param dask : over write of the same argument in xarray . apply_ufunc , defaults to ' parallelized '
: type dask : str , optional
: return : a tuple of ( a xarray DataArray of value , a xarray DataArray of standard deviations )
: rtype : tuple of two xarray DataArray
"""
2023-05-05 18:25:03 +02:00
2023-05-07 00:38:52 +02:00
kwargs . update (
{
" dask " : dask ,
" output_core_dims " : [ [ ] , [ ] ] ,
}
)
2023-05-07 23:41:31 +02:00
return xr . apply_ufunc ( _seperate_uncertainty , data , * * kwargs )
def get_scanAxis ( dataSet ) :
2023-07-05 12:15:03 +02:00
""" Give a numpy array of names of scan axes.
: param dataSet : The xarray DataSet stored the data .
: type dataSet : xarray DataSet
: return : The names of scan axes
: rtype : a numpy array
"""
2023-05-07 23:41:31 +02:00
res = dataSet . scanAxis
if len ( res ) == 0 :
res = [ None , None ]
elif len ( res ) == 1 :
res = [ res [ 0 ] , None ]
elif len ( res ) == 2 and res [ 0 ] == ' runs ' :
res = [ res [ 1 ] , res [ 0 ] ]
return res
2023-05-08 17:48:53 +02:00
2023-05-07 23:41:31 +02:00
def print_scanAxis ( dataSet ) :
2023-07-05 12:15:03 +02:00
""" Print the names and the values of scan axes.
: param dataSet : The xarray DataSet stored the data .
: type dataSet : xarray DataSet
"""
2023-05-07 23:41:31 +02:00
scanAxis = dataSet . scanAxis
scan = { }
for key in scanAxis :
scanValue = np . array ( dataSet [ key ] )
scanValue , indices = np . unique ( scanValue , return_index = True )
scan . update (
{
key : scanValue [ indices ]
}
)
2023-05-08 16:57:58 +02:00
print ( " The detected scaning axes and values are: \n " )
2023-05-08 11:52:21 +02:00
print ( scan )
2023-07-05 12:15:03 +02:00
def print_scanAxis_original ( dataSet ) :
# print the original (unsorted) scan axes.
pass
2023-05-08 11:52:21 +02:00
def calculate_mean ( dataSet ) :
2023-07-05 12:15:03 +02:00
""" Calculte the mean along repetition axis ' runs '
: param dataSet : The xarray DataSet or DataArray stored the data .
: type dataSet : xarray DataSet or DataArray
: return : The mean value
: rtype : xarray DataSet or DataArray
"""
2023-05-08 11:52:21 +02:00
if ' runs ' in dataSet . dims :
return dataSet . mean ( dim = ' runs ' )
else :
return dataSet
def calculate_std ( dataSet ) :
2023-07-05 12:15:03 +02:00
""" Calculte the standard deviation along repetition axis ' runs '
: param dataSet : The xarray DataSet or DataArray stored the data .
: type dataSet : xarray DataSet or DataArray
: return : The standard deviation
: rtype : xarray DataSet or DataArray
"""
2023-05-08 11:52:21 +02:00
if ' runs ' in dataSet . dims :
2023-05-25 19:11:19 +02:00
return dataSet . std ( dim = ' runs ' )
2023-05-08 11:52:21 +02:00
else :
2023-05-08 17:48:53 +02:00
return None
def extract_temperature_from_fit ( ) :
2023-07-05 12:15:03 +02:00
# by giving the shot number, read data, fit the center of could and fit with ToF, give the temperature of the clound
2023-05-08 17:48:53 +02:00
pass
def extract_condensate_fraction_from_fit ( ) :
2023-07-05 12:15:03 +02:00
# by giving the shot number, read data, fit the of could, give the condenstate fraction of the clound
2023-05-16 15:51:13 +02:00
pass
def swap_xy ( dataSet ) :
2023-07-05 12:15:03 +02:00
""" Swap the x ans y axis.
: param dataSet : The xarray DataSet or DataArray stored the data .
: type dataSet : xarray DataSet or DataArray
: return : The xarray DataSet or DataArray with swapped x and y axis .
: rtype : xarray DataSet or DataArray
"""
2023-05-16 15:51:13 +02:00
dataSet = dataSet . rename_dims ( dict ( x = ' __x ' ) )
dataSet = dataSet . rename_dims ( dict ( y = ' x ' ) )
dataSet = dataSet . rename_dims ( dict ( __x = ' y ' ) )
return dataSet