HOWTO define the DA script for NNDy

complete commenting added with instructions for writing an analysis script that is compatible with NNDy optimizer and returns the cost associated with the run just performed
2025-03-21 16:19:10 +01:00 · 2025-03-21 16:19:10 +01:00 · 3c2255a589
commit 3c2255a589
parent c50b21677b
1 changed files with 157 additions and 141 deletions
--- a/scripts/DA/TestDA.py
+++ b/scripts/DA/TestDA.py
@ -1,141 +1,157 @@
-from lyse import Run as lyse_run
+from lyse import Run as lyse_run
-import h5py
+import h5py
-import sys
+import sys
-import pandas as pd
+import pandas as pd
-
+
-import numpy as np
+import numpy as np
-import xarray as xr
+import xarray as xr
-import lmfit
+import lmfit
-from matplotlib import pyplot as plt
+from matplotlib import pyplot as plt
-
+
-
+
-new_path = r'C:\Users\DyLab\PrepLab\Data_Analysis' 
+#here it looks for the DA file, if not in the same folder as NNDy you can specify the folder in new_path
-if new_path not in sys.path:
+new_path = r'C:\Users\DyLab\PrepLab\Data_Analysis' 
-    sys.path.append(new_path)
+if new_path not in sys.path:
-
+    sys.path.append(new_path)
-from Analyser.FitAnalyser import Gaussian2dModel
+
-from Analyser.FitAnalyser import FitAnalyser
+from Analyser.FitAnalyser import Gaussian2dModel
-from ToolFunction.ToolFunction import get_scanAxis
+from Analyser.FitAnalyser import FitAnalyser
-
+from ToolFunction.ToolFunction import get_scanAxis
-free_std = 1.815672e-6 #THz
+
-free_freq = 438.585992 #THz
+#reference values measured without AOM for 5 minutes
-free_intensity_max = 186
+free_std = 1.815672e-6 #THz
-
+free_freq = 438.585992 #THz
-cost_scaling_factor = {
+free_intensity_max = 186
-    'red_chi2': 0.01,
+
-    'std_freq': 1,
+
-    'peak_intensity': 10
+#handling relative weight in the cost scaling
-}
+cost_scaling_factor = {
-
+    'red_chi2': 0.01,
-
+    'std_freq': 1,
-def cost(hdf_outfile):
+    'peak_intensity': 10
-
+}
-    output = analysis(hdf_outfile)
+
-    print(f'output:\n{output}')
+#    THE COST FUNCTION
-
+#
-    if output['bad']:
+#in every DA script for NNDy we need a cost function defined as below
-        return {
+def cost(hdf_outfile):
-            'cost': np.inf,
+    #takes as argument:
-            'bad': True
+    #       the hdf5 file of the shot, where it can read the data of the shot and the global variables
-        }
+    #return:
-    
+    #       dictionary with 'cost' item - and uncertainity if available- or 'bad' : True for a bad run
-    cost_value = ( 
+    #it would be better if you return all three items everytime, but the code can handle if you don't
-          (1 + abs( output['red_chi2'] - 1 )) 
+
-            * cost_scaling_factor['red_chi2'] + 
+    #then the role of the cost function is to initiate the analysis and obtain the values of the observables of interest from which you want to calculate the cost
-
+    output = analysis(hdf_outfile)
-          output['std_freq'] / free_std 
+    print(f'output:\n{output}')
-            * cost_scaling_factor['std_freq'] + 
+
-
+    # recommended way to handle bad runs
-          free_intensity_max  / output['peak_intensity'] 
+    if output['bad']:
-            * cost_scaling_factor['peak_intensity']
+        return {
-        ) / 3
+            'cost': np.inf,
-
+            'bad': True
-    #print('doing return now')
+        }
-    return {
+    
-        'cost': cost_value, 
+    cost_value = ( 
-        'bad': False 
+          (1 + abs( output['red_chi2'] - 1 )) 
-    }
+            * cost_scaling_factor['red_chi2'] + 
-
+
-def analysis(hdf_outfile):
+          output['std_freq'] / free_std 
-    bad = False
+            * cost_scaling_factor['std_freq'] + 
-    group_name = 'DA'
+
-    lyse_run_obj = lyse_run(hdf_outfile)
+          free_intensity_max  / output['peak_intensity'] 
-    lyse_run_obj.set_group(group_name)
+            * cost_scaling_factor['peak_intensity']
-    #print(type(hdf_outfile))
+        ) / 3
-
+
-    with h5py.File(hdf_outfile, 'r') as f:
+    #print('doing return now')
-        data = f['results/wlm']['wlm_df'][:]  # Load as NumPy array
+    return {
-
+        'cost': cost_value, 
-    df = pd.DataFrame(data, 
+        'bad': False 
-                      columns = ['timestamp [s]','wavelength [nm]', 'frequency [THz]'])
+    }
-    
+
-    #we're conservative and declare a false measurement if at any point the device is over or underexposed
+
-    if (df < 0 ).any().any():
+#this function is called within cost and is the one which handles the analysis
-        bad = True
+#it should be substituted with the imports that will execute the analysis or a custom function like this one
-        return  {'bad': bad }
+def analysis(hdf_outfile):
-    
+    bad = False
-    #print(f'df imported, all right {df}')
+    group_name = 'DA'
-    
+    lyse_run_obj = lyse_run(hdf_outfile)
-    mean_wl = df['wavelength [nm]'].mean()
+    lyse_run_obj.set_group(group_name)
-    mean_freq = df['frequency [THz]'].mean()
+    #print(type(hdf_outfile))
-
+
-    std_wl = df['wavelength [nm]'].std()
+    with h5py.File(hdf_outfile, 'r') as f:
-    std_freq = df['frequency [THz]'].std()
+        data = f['results/wlm']['wlm_df'][:]  # Load as NumPy array
-
+
-    
+    df = pd.DataFrame(data, 
-
+                      columns = ['timestamp [s]','wavelength [nm]', 'frequency [THz]'])
-    img = lyse_run_obj.get_image('Camera','camera_snap','image')
+    
-
+    #we're conservative and declare a false measurement if at any point the device is over or underexposed
-    peak_intensity, wx, wy, red_chi2, fitted_img = img_Analyzer(img) 
+    if (df < 0 ).any().any():
-
+        bad = True
-    #save fitted image
+        return  {'bad': bad }
-    #print(f"Saving frame(s) {group_name}/{frametype}.")
+    
-    frametype = 'fitted_beam_shape'
+    #print(f'df imported, all right {df}')
-    with h5py.File(hdf_outfile, 'r+') as f:
+    
-        group = f.require_group(group_name)
+    mean_wl = df['wavelength [nm]'].mean()
-        dset = group.create_dataset(
+    mean_freq = df['frequency [THz]'].mean()
-                    frametype, data=fitted_img, dtype='uint16', compression='gzip'
+
-                )
+    std_wl = df['wavelength [nm]'].std()
-        # Specify this dataset should be viewed as an image
+    std_freq = df['frequency [THz]'].std()
-        dset.attrs['CLASS'] = np.bytes_('IMAGE')
+
-        dset.attrs['IMAGE_VERSION'] = np.bytes_('1.2')
+    
-        dset.attrs['IMAGE_SUBCLASS'] = np.bytes_('IMAGE_GRAYSCALE')
+
-        dset.attrs['IMAGE_WHITE_IS_ZERO'] = np.uint8(0)
+    img = lyse_run_obj.get_image('Camera','camera_snap','image')
-
+
-    output = {'mean_wl': mean_wl, 'std_wl': std_wl,
+    peak_intensity, wx, wy, red_chi2, fitted_img = img_Analyzer(img) 
-            'mean_freq': mean_freq, 'std_freq': std_freq,
+
-            'peak_intensity': peak_intensity,
+    #save fitted image
-            'wx': wx, 'wy': wy,
+    #print(f"Saving frame(s) {group_name}/{frametype}.")
-            'red_chi2': red_chi2,
+    frametype = 'fitted_beam_shape'
-
+    with h5py.File(hdf_outfile, 'r+') as f:
-            'bad': bad}
+        group = f.require_group(group_name)
-
+        dset = group.create_dataset(
-    lyse_run_obj.save_result('output', output)
+                    frametype, data=fitted_img, dtype='uint16', compression='gzip'
-
+                )
-    #print(output)
+        # Specify this dataset should be viewed as an image
-    return output
+        dset.attrs['CLASS'] = np.bytes_('IMAGE')
-
+        dset.attrs['IMAGE_VERSION'] = np.bytes_('1.2')
-
+        dset.attrs['IMAGE_SUBCLASS'] = np.bytes_('IMAGE_GRAYSCALE')
-def img_Analyzer(image):
+        dset.attrs['IMAGE_WHITE_IS_ZERO'] = np.uint8(0)
-
+
-    dimensions = ['x', 'y']
+    output = {'mean_wl': mean_wl, 'std_wl': std_wl,
-    # Converting image from NumPy array to Xarray DataArray    
+            'mean_freq': mean_freq, 'std_freq': std_freq,
-    image = xr.DataArray(image, dims = dimensions)
+            'peak_intensity': peak_intensity,
-    
+            'wx': wx, 'wy': wy,
-    fitModel = Gaussian2dModel()
+            'red_chi2': red_chi2,
-    fitAnalyser = FitAnalyser(fitModel, fitDim=2)
+
-
+            'bad': bad}
-    params = fitAnalyser.guess(image)
+
-    #print(params.item())
+    lyse_run_obj.save_result('output', output)
-    fitResult = fitAnalyser.fit(image, params).load()
+
-    #lmfit.report_fit(fitResult.item())
+    #print(output)
-    fitted_image = fitResult.item().eval(x = image.x, y = image.y)   
+    return output
-    fitted_image = xr.DataArray(fitted_image, dims = dimensions)
+
-
+#subprocess called within the analysis routine
-    val = fitAnalyser.get_fit_value(fitResult)
+def img_Analyzer(image):
-    #print(val)
+
-
+    dimensions = ['x', 'y']
-    #print(image.max())
+    # Converting image from NumPy array to Xarray DataArray    
-
+    image = xr.DataArray(image, dims = dimensions)
-    return np.float64(image.max()), np.float64(val['sigmax']), np.float64(val['sigmay']), np.float64(fitResult.item().redchi), fitted_image
+    
-
+    fitModel = Gaussian2dModel()
    fitAnalyser = FitAnalyser(fitModel, fitDim=2)
    params = fitAnalyser.guess(image)
    #print(params.item())
    fitResult = fitAnalyser.fit(image, params).load()
    #lmfit.report_fit(fitResult.item())
    fitted_image = fitResult.item().eval(x = image.x, y = image.y)   
    fitted_image = xr.DataArray(fitted_image, dims = dimensions)
    val = fitAnalyser.get_fit_value(fitResult)
    #print(val)
    #print(image.max())
    return np.float64(image.max()), np.float64(val['sigmax']), np.float64(val['sigmay']), np.float64(fitResult.item().redchi), fitted_image