Added safeguards against out-of-memory errors by prior estimation of required RAM - still needs an alternative method to read and analyse datasets too large to load in to memory.

This commit is contained in:
Karthik 2025-08-23 21:11:42 +02:00
parent ae9af32eda
commit 2abbefb3a7
6 changed files with 134 additions and 30 deletions

View File

@ -106,6 +106,6 @@ function conductPCA(od_imgs, scan_reference_values, scan_parameter_values, doPlo
%% --- ANOVA Test ---
p = anova1(score(:,1), arrayfun(@num2str, scan_parameter_values, 'UniformOutput', false), 'off');
fprintf('ANOVA p-value for PC1 score differences between groups: %.4e\n', p);
fprintf('[INFO] ANOVA p-value for PC1 score differences between groups: %.4e\n', p);
end

View File

@ -47,7 +47,7 @@ function [results, scan_parameter_values] = performAnalysis(options)
[od_imgs, scan_parameter_values, ~] = Helper.collectODImages(options);
% Conduct spectral analysis
fprintf('\nInitiating spectral analysis...\n');
fprintf('\n[INFO] Initiating spectral analysis...\n');
spectral_analysis_results = Analyzer.conductSpectralAnalysis(od_imgs, scan_parameter_values, options);
@ -63,7 +63,7 @@ function [results, scan_parameter_values] = performAnalysis(options)
spectral_analysis_results.angular_spectral_distribution, ...
scan_parameter_values, N_shots, options.N_angular_bins);
fprintf('\nSpectral analysis complete!\n');
fprintf('\n[INFO] Spectral analysis complete!\n');
% PCA

View File

@ -9,6 +9,9 @@ function results_all = batchAnalyze(dataSources, options)
options.baseDataFolder = '//DyLabNAS/Data';
end
% ===== Estimate dataset memory and get per-run estimates =====
[options.SAVE_TO_WORKSPACE, runMemoryGB] = estimateDatasetMemory(dataSources, options);
results_all = {}; % one element per folder
for i = 1:numel(dataSources)
@ -37,6 +40,13 @@ function results_all = batchAnalyze(dataSources, options)
error('Unsupported type for run entry: %s', class(runItem));
end
% --- Check per-run memory before processing ---
if runMemoryGB(j) > 0.5 * getAvailableRAM()
fprintf('[WARNING] Skipping run %s/%s due to high memory requirement (%.2f GB)\n', ...
ds.sequence, runID, runMemoryGB(j));
continue; % skip this run
end
% Build folder path
folderPath = fullfile(baseFolder, runID);
options.folderPath = folderPath;
@ -75,7 +85,6 @@ function results_all = batchAnalyze(dataSources, options)
end
end
%% ---- Local function for saving results ----
function saveResultStruct(result, saveDirectory)
% Define results folder
@ -107,3 +116,13 @@ function saveResultStruct(result, saveDirectory)
nextIdx = nextIdx + 1;
save(indexFile, "nextIdx");
end
%% ---- Local function to get available RAM ----
function availableRAM = getAvailableRAM()
if ispc
[~, sys] = memory;
availableRAM = sys.PhysicalMemory.Available;
else
availableRAM = 16e9; % fallback: 16 GB if not Windows
end
end

View File

@ -25,6 +25,12 @@ function [od_imgs, scan_parameter_values, file_list] = collectODImages(options)
reuseVarsExist = evalin('base', ...
'exist(''od_imgs'',''var'') && exist(''scan_parameter_values'',''var'') && exist(''file_list'',''var'') && exist(''prior_options'',''var'')');
% --- Respect SAVE_TO_WORKSPACE flag from batchAnalyze ---
if isfield(options, 'SAVE_TO_WORKSPACE') && ~options.SAVE_TO_WORKSPACE
% Force reprocessing: skip all workspace reuse
reuseVarsExist = false;
end
if reuseVarsExist
prior_options = evalin('base','prior_options');
@ -32,7 +38,7 @@ function [od_imgs, scan_parameter_values, file_list] = collectODImages(options)
critical_fields = {'folderPath','cam','angle','ImagingMode','PulseDuration','center','span','fraction','removeFringes','skipUnshuffling','scan_reference_values'};
if ~haveOptionsChanged(options, prior_options, critical_fields)
fprintf('\nReusing processed OD images, scan parameters, and file list from memory.\n');
fprintf('\n[INFO] Reusing processed OD images, scan parameters, and file list from memory.\n');
od_imgs = evalin('base','od_imgs');
scan_parameter_values = evalin('base','scan_parameter_values');
file_list = evalin('base','file_list');
@ -44,7 +50,7 @@ function [od_imgs, scan_parameter_values, file_list] = collectODImages(options)
return; % safe to exit now
else
fprintf('\nProcessed-data-related options changed. Reprocessing full OD image dataset...\n');
fprintf('\n[INFO] Processed-data-related options changed. Reprocessing full OD image dataset...\n');
end
end
@ -55,6 +61,11 @@ function [od_imgs, scan_parameter_values, file_list] = collectODImages(options)
evalin('base', 'exist(''raw_file_list'', ''var'')') && ...
evalin('base', 'exist(''prior_options'',''var'')');
% --- Respect SAVE_TO_WORKSPACE flag ---
if isfield(options, 'SAVE_TO_WORKSPACE') && ~options.SAVE_TO_WORKSPACE
fullDataExists = false; % force recompute even if workspace vars exist
end
if fullDataExists
% Both required datasets exist, check if raw-data options changed
prior_options = evalin('base','prior_options');
@ -63,36 +74,40 @@ function [od_imgs, scan_parameter_values, file_list] = collectODImages(options)
critical_raw_fields = {'folderPath','cam','angle','ImagingMode','PulseDuration'};
if ~haveOptionsChanged(options, prior_options, critical_raw_fields)
fprintf('\nReusing full OD image dataset and scan parameters from memory.\n');
fprintf('\n[INFO] Reusing full OD image dataset and scan parameters from memory.\n');
full_od_imgs = evalin('base', 'full_od_imgs');
full_bkg_imgs = evalin('base', 'full_bkg_imgs');
raw_scan_parameter_values = evalin('base', 'raw_scan_parameter_values');
raw_file_list = evalin('base', 'raw_file_list');
else
fprintf('\nRaw-data-related options changed. Recomputing full OD image dataset...\n');
fprintf('\n[INFO] Raw-data-related options changed. Recomputing full OD image dataset...\n');
[full_od_imgs, full_bkg_imgs, raw_scan_parameter_values, raw_file_list] = Helper.processRawData(options);
% Save raw full dataset for reuse
if options.SAVE_TO_WORKSPACE
assignin('base', 'full_od_imgs', full_od_imgs);
assignin('base', 'full_bkg_imgs', full_bkg_imgs);
assignin('base', 'raw_scan_parameter_values', raw_scan_parameter_values);
assignin('base', 'raw_file_list', raw_file_list);
end
fprintf('\n[INFO] Completed recomputing OD images. Stored in workspace for reuse.\n');
end
else
% Either dataset is missing, process raw HDF5 files completely
fprintf('\n[INFO] Full OD image dataset or scan parameters not found in memory.\n');
[full_od_imgs, full_bkg_imgs, raw_scan_parameter_values, raw_file_list] = Helper.processRawData(options);
% Save raw full dataset for reuse
if options.SAVE_TO_WORKSPACE
assignin('base', 'full_od_imgs', full_od_imgs);
assignin('base', 'full_bkg_imgs', full_bkg_imgs);
assignin('base', 'raw_scan_parameter_values', raw_scan_parameter_values);
assignin('base', 'raw_file_list', raw_file_list);
fprintf('\nCompleted recomputing OD images. Stored in workspace for reuse.\n');
end
else
% Either dataset is missing, process raw HDF5 files completely
fprintf('\nFull OD image dataset or scan parameters not found in memory.\n');
[full_od_imgs, full_bkg_imgs, raw_scan_parameter_values, raw_file_list] = Helper.processRawData(options);
% Save raw full dataset for reuse
assignin('base', 'full_od_imgs', full_od_imgs);
assignin('base', 'full_bkg_imgs', full_bkg_imgs);
assignin('base', 'raw_scan_parameter_values', raw_scan_parameter_values);
assignin('base', 'raw_file_list', raw_file_list);
fprintf('\nCompleted computing OD images. Images will be stored in workspace for reuse.\n');
fprintf('\n[INFO] Completed computing OD images. Images will be stored in workspace for reuse.\n');
end
fprintf('\nCropping and subtracting background from images...\n');
fprintf('\n[INFO] Cropping and subtracting background from images...\n');
nFiles = size(full_od_imgs, 3);
% --- Preallocate arrays for processed images ---
@ -144,18 +159,18 @@ function [od_imgs, scan_parameter_values, file_list] = collectODImages(options)
% --- Optional fringe removal ---
if isfield(options, 'removeFringes') && options.removeFringes
fprintf('\nApplying fringe removal to processed images...\n');
fprintf('\n[INFO] Applying fringe removal to processed images...\n');
optrefimages = Helper.removeFringesInImage(absimages, refimages);
absimages_fringe_removed = absimages - optrefimages;
od_imgs = arrayfun(@(i) absimages_fringe_removed(:,:,i), 1:nFiles, 'UniformOutput', false);
fprintf('\nFringe removal completed.\n');
fprintf('\n[INFO] Fringe removal completed.\n');
else
od_imgs = arrayfun(@(i) absimages(:,:,i), 1:nFiles, 'UniformOutput', false);
end
% --- Optional unshuffling based on scan reference values ---
if isfield(options, 'skipUnshuffling') && ~options.skipUnshuffling
fprintf('\nReordering images according to scan parameter reference values...\n');
fprintf('\n[INFO] Reordering images according to scan parameter reference values...\n');
n_values = length(options.scan_reference_values);
n_total = length(raw_scan_parameter_values);
@ -186,7 +201,7 @@ function [od_imgs, scan_parameter_values, file_list] = collectODImages(options)
od_imgs = ordered_od_imgs;
scan_parameter_values = ordered_scan_parameter_values;
file_list = ordered_file_list;
fprintf('\nImage reordering completed.\n');
fprintf('\n[INFO] Image reordering completed.\n');
else
% No unshuffling: keep original order
scan_parameter_values = raw_scan_parameter_values;
@ -204,7 +219,7 @@ function [od_imgs, scan_parameter_values, file_list] = collectODImages(options)
saveODFigures(od_imgs, options.saveDirectory);
end
fprintf('\nOD image dataset ready for further analysis.\n');
fprintf('\n[INFO] OD image dataset ready for further analysis.\n');
end
@ -234,11 +249,11 @@ function saveODFigures(od_imgs, saveDirectory)
filesExist = all(arrayfun(@(k) isfile(fullfile(odFolder, sprintf('OD_img_%03d.fig', k))), 1:nImgs));
if filesExist
fprintf('\nOD figures already exist in %s. Skipping save.\n', odFolder);
fprintf('\n[INFO] OD figures already exist in %s. Skipping save.\n', odFolder);
return;
end
fprintf('\nSaving OD figures to %s ...\n', odFolder);
fprintf('\n[INFO] Saving OD figures to %s ...\n', odFolder);
for k = 1:nImgs
img = od_imgs{k};
@ -252,5 +267,5 @@ function saveODFigures(od_imgs, saveDirectory)
savefig(hFig, fileName);
close(hFig);
end
fprintf('OD figures saved successfully.\n');
fprintf('[INFO] OD figures saved successfully.\n');
end

View File

@ -0,0 +1,70 @@
function [SAVE_TO_WORKSPACE, runMemoryGB] = estimateDatasetMemory(dataSources, options)
% Estimate per-run memory and decide whether to save dataset to workspace
% --- Measured memory per image (bytes) ---
bytesPerFullImage = 37.75 * 1e6; % full OD image
bytesPerCroppedImage = 0.16 * 1e6; % cropped OD image
% --- Check available RAM on Windows ---
if ispc
[~, sys] = memory;
availableRAM = sys.PhysicalMemory.Available;
else
availableRAM = 16e9; % fallback: 16 GB if not Windows
end
SAVE_TO_WORKSPACE = true; % default, may change per run
runMemoryGB = []; % store per-run memory
% --- Loop over all data sources and runs ---
for i_ds = 1:numel(dataSources)
ds = dataSources{i_ds};
if isfield(ds, 'baseFolder') && ~isempty(ds.baseFolder)
baseFolder = fullfile(ds.baseFolder, ds.sequence, ds.date);
else
baseFolder = fullfile(options.baseDataFolder, ds.sequence, ds.date);
end
for j_run = 1:numel(ds.runs)
runItem = ds.runs(j_run);
% Convert runItem to string runID
if isnumeric(runItem)
runID = sprintf('%04d', runItem);
elseif isstring(runItem)
runID = runItem;
elseif ischar(runItem)
runID = string(runItem);
elseif iscell(runItem)
runID = string(runItem{1});
else
error('Unsupported run type');
end
runFolder = fullfile(baseFolder, runID);
if isfolder(runFolder)
files = dir(fullfile(runFolder, '*.h5'));
nFiles = numel(files);
if nFiles == 0
continue;
end
% Memory estimate for this run (full + cropped)
runBytes = nFiles * (bytesPerFullImage + bytesPerCroppedImage);
runMemoryGB(end+1,1) = runBytes/1e9;
% Decide workspace flag per run by comparing with 50% of available RAM
if runBytes > 0.5 * availableRAM
SAVE_TO_WORKSPACE = false;
fprintf('[INFO] Estimated size on memory of Run %s/%s too large (%.2f GB). Not saving to workspace.\n', ...
ds.sequence, runID, runBytes/1e9);
else
fprintf('[INFO] Estimated size on memory of Run %s/%s = %.2f GB. Will save to workspace.\n', ...
ds.sequence, runID, runBytes/1e9);
end
end
end
end
end

View File

@ -6,7 +6,7 @@ function [full_od_imgs, full_bkg_imgs, raw_scan_parameter_values, raw_file_list]
% Returns the OD images and scan parameters immediately in memory.
% This function does NOT do cropping or fringe removal.
fprintf('\nProcessing raw data files at %s ...\n', options.folderPath);
fprintf('\n[INFO] Processing raw data files at %s ...\n', options.folderPath);
% ===== Group paths in HDF5 files =====
groupList = ["/images/MOT_3D_Camera/in_situ_absorption", ...