HOWTO define the DA script for NNDy

complete commenting added with instructions for writing an analysis script that is compatible with NNDy optimizer and returns the cost associated with the run just performed
This commit is contained in:
castaneda 2025-03-21 16:19:10 +01:00
parent c50b21677b
commit 3c2255a589

View File

@ -1,141 +1,157 @@
from lyse import Run as lyse_run from lyse import Run as lyse_run
import h5py import h5py
import sys import sys
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import xarray as xr import xarray as xr
import lmfit import lmfit
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
new_path = r'C:\Users\DyLab\PrepLab\Data_Analysis' #here it looks for the DA file, if not in the same folder as NNDy you can specify the folder in new_path
if new_path not in sys.path: new_path = r'C:\Users\DyLab\PrepLab\Data_Analysis'
sys.path.append(new_path) if new_path not in sys.path:
sys.path.append(new_path)
from Analyser.FitAnalyser import Gaussian2dModel
from Analyser.FitAnalyser import FitAnalyser from Analyser.FitAnalyser import Gaussian2dModel
from ToolFunction.ToolFunction import get_scanAxis from Analyser.FitAnalyser import FitAnalyser
from ToolFunction.ToolFunction import get_scanAxis
free_std = 1.815672e-6 #THz
free_freq = 438.585992 #THz #reference values measured without AOM for 5 minutes
free_intensity_max = 186 free_std = 1.815672e-6 #THz
free_freq = 438.585992 #THz
cost_scaling_factor = { free_intensity_max = 186
'red_chi2': 0.01,
'std_freq': 1,
'peak_intensity': 10 #handling relative weight in the cost scaling
} cost_scaling_factor = {
'red_chi2': 0.01,
'std_freq': 1,
def cost(hdf_outfile): 'peak_intensity': 10
}
output = analysis(hdf_outfile)
print(f'output:\n{output}') # THE COST FUNCTION
#
if output['bad']: #in every DA script for NNDy we need a cost function defined as below
return { def cost(hdf_outfile):
'cost': np.inf, #takes as argument:
'bad': True # the hdf5 file of the shot, where it can read the data of the shot and the global variables
} #return:
# dictionary with 'cost' item - and uncertainity if available- or 'bad' : True for a bad run
cost_value = ( #it would be better if you return all three items everytime, but the code can handle if you don't
(1 + abs( output['red_chi2'] - 1 ))
* cost_scaling_factor['red_chi2'] + #then the role of the cost function is to initiate the analysis and obtain the values of the observables of interest from which you want to calculate the cost
output = analysis(hdf_outfile)
output['std_freq'] / free_std print(f'output:\n{output}')
* cost_scaling_factor['std_freq'] +
# recommended way to handle bad runs
free_intensity_max / output['peak_intensity'] if output['bad']:
* cost_scaling_factor['peak_intensity'] return {
) / 3 'cost': np.inf,
'bad': True
#print('doing return now') }
return {
'cost': cost_value, cost_value = (
'bad': False (1 + abs( output['red_chi2'] - 1 ))
} * cost_scaling_factor['red_chi2'] +
def analysis(hdf_outfile): output['std_freq'] / free_std
bad = False * cost_scaling_factor['std_freq'] +
group_name = 'DA'
lyse_run_obj = lyse_run(hdf_outfile) free_intensity_max / output['peak_intensity']
lyse_run_obj.set_group(group_name) * cost_scaling_factor['peak_intensity']
#print(type(hdf_outfile)) ) / 3
with h5py.File(hdf_outfile, 'r') as f: #print('doing return now')
data = f['results/wlm']['wlm_df'][:] # Load as NumPy array return {
'cost': cost_value,
df = pd.DataFrame(data, 'bad': False
columns = ['timestamp [s]','wavelength [nm]', 'frequency [THz]']) }
#we're conservative and declare a false measurement if at any point the device is over or underexposed
if (df < 0 ).any().any(): #this function is called within cost and is the one which handles the analysis
bad = True #it should be substituted with the imports that will execute the analysis or a custom function like this one
return {'bad': bad } def analysis(hdf_outfile):
bad = False
#print(f'df imported, all right {df}') group_name = 'DA'
lyse_run_obj = lyse_run(hdf_outfile)
mean_wl = df['wavelength [nm]'].mean() lyse_run_obj.set_group(group_name)
mean_freq = df['frequency [THz]'].mean() #print(type(hdf_outfile))
std_wl = df['wavelength [nm]'].std() with h5py.File(hdf_outfile, 'r') as f:
std_freq = df['frequency [THz]'].std() data = f['results/wlm']['wlm_df'][:] # Load as NumPy array
df = pd.DataFrame(data,
columns = ['timestamp [s]','wavelength [nm]', 'frequency [THz]'])
img = lyse_run_obj.get_image('Camera','camera_snap','image')
#we're conservative and declare a false measurement if at any point the device is over or underexposed
peak_intensity, wx, wy, red_chi2, fitted_img = img_Analyzer(img) if (df < 0 ).any().any():
bad = True
#save fitted image return {'bad': bad }
#print(f"Saving frame(s) {group_name}/{frametype}.")
frametype = 'fitted_beam_shape' #print(f'df imported, all right {df}')
with h5py.File(hdf_outfile, 'r+') as f:
group = f.require_group(group_name) mean_wl = df['wavelength [nm]'].mean()
dset = group.create_dataset( mean_freq = df['frequency [THz]'].mean()
frametype, data=fitted_img, dtype='uint16', compression='gzip'
) std_wl = df['wavelength [nm]'].std()
# Specify this dataset should be viewed as an image std_freq = df['frequency [THz]'].std()
dset.attrs['CLASS'] = np.bytes_('IMAGE')
dset.attrs['IMAGE_VERSION'] = np.bytes_('1.2')
dset.attrs['IMAGE_SUBCLASS'] = np.bytes_('IMAGE_GRAYSCALE')
dset.attrs['IMAGE_WHITE_IS_ZERO'] = np.uint8(0) img = lyse_run_obj.get_image('Camera','camera_snap','image')
output = {'mean_wl': mean_wl, 'std_wl': std_wl, peak_intensity, wx, wy, red_chi2, fitted_img = img_Analyzer(img)
'mean_freq': mean_freq, 'std_freq': std_freq,
'peak_intensity': peak_intensity, #save fitted image
'wx': wx, 'wy': wy, #print(f"Saving frame(s) {group_name}/{frametype}.")
'red_chi2': red_chi2, frametype = 'fitted_beam_shape'
with h5py.File(hdf_outfile, 'r+') as f:
'bad': bad} group = f.require_group(group_name)
dset = group.create_dataset(
lyse_run_obj.save_result('output', output) frametype, data=fitted_img, dtype='uint16', compression='gzip'
)
#print(output) # Specify this dataset should be viewed as an image
return output dset.attrs['CLASS'] = np.bytes_('IMAGE')
dset.attrs['IMAGE_VERSION'] = np.bytes_('1.2')
dset.attrs['IMAGE_SUBCLASS'] = np.bytes_('IMAGE_GRAYSCALE')
def img_Analyzer(image): dset.attrs['IMAGE_WHITE_IS_ZERO'] = np.uint8(0)
dimensions = ['x', 'y'] output = {'mean_wl': mean_wl, 'std_wl': std_wl,
# Converting image from NumPy array to Xarray DataArray 'mean_freq': mean_freq, 'std_freq': std_freq,
image = xr.DataArray(image, dims = dimensions) 'peak_intensity': peak_intensity,
'wx': wx, 'wy': wy,
fitModel = Gaussian2dModel() 'red_chi2': red_chi2,
fitAnalyser = FitAnalyser(fitModel, fitDim=2)
'bad': bad}
params = fitAnalyser.guess(image)
#print(params.item()) lyse_run_obj.save_result('output', output)
fitResult = fitAnalyser.fit(image, params).load()
#lmfit.report_fit(fitResult.item()) #print(output)
fitted_image = fitResult.item().eval(x = image.x, y = image.y) return output
fitted_image = xr.DataArray(fitted_image, dims = dimensions)
#subprocess called within the analysis routine
val = fitAnalyser.get_fit_value(fitResult) def img_Analyzer(image):
#print(val)
dimensions = ['x', 'y']
#print(image.max()) # Converting image from NumPy array to Xarray DataArray
image = xr.DataArray(image, dims = dimensions)
return np.float64(image.max()), np.float64(val['sigmax']), np.float64(val['sigmay']), np.float64(fitResult.item().redchi), fitted_image
fitModel = Gaussian2dModel()
fitAnalyser = FitAnalyser(fitModel, fitDim=2)
params = fitAnalyser.guess(image)
#print(params.item())
fitResult = fitAnalyser.fit(image, params).load()
#lmfit.report_fit(fitResult.item())
fitted_image = fitResult.item().eval(x = image.x, y = image.y)
fitted_image = xr.DataArray(fitted_image, dims = dimensions)
val = fitAnalyser.get_fit_value(fitResult)
#print(val)
#print(image.max())
return np.float64(image.max()), np.float64(val['sigmax']), np.float64(val['sigmay']), np.float64(fitResult.item().redchi), fitted_image