MAJOR update to how raw data is handled - saves full od images to disk if it detects computer does not have enough RAM and allows for quick post-processing.

This commit is contained in:
Karthik 2025-08-27 19:17:09 +02:00
parent fb12be8e22
commit 4971e488c4
4 changed files with 193 additions and 171 deletions

View File

@ -17,31 +17,18 @@ function [od_imgs, scan_parameter_values, file_list] = collectODImages(options)
% .scan_reference_values: reference values for unshuffling
%
% Outputs:
% od_imgs : cell array of processed OD images
% scan_parameter_values: vector of scan parameter values
% file_list : cell array of file names
% --- Early exit if processed data already exist AND options match ---
reuseVarsExist = evalin('base', ...
% od_imgs : cell array of processed OD images
% scan_parameter_values: vector of scan parameter values
% file_list : cell array of file names
% --- Early exit if processed OD images already exist AND options match ---
reuseExistingODImages = evalin('base', ...
'exist(''od_imgs'',''var'') && exist(''scan_parameter_values'',''var'') && exist(''file_list'',''var'') && exist(''prior_options'',''var'')');
if ~isfield(options, 'SAVE_TO_WORKSPACE')
% ===== Estimate dataset memory and get per-run estimates =====
dataSource = makeDataSource(options.folderPath);
[options.SAVE_TO_WORKSPACE, ~] = Helper.estimateDatasetMemory(dataSource, options);
end
% --- Respect SAVE_TO_WORKSPACE flag ---
if ~options.SAVE_TO_WORKSPACE
% Force reprocessing: skip all workspace reuse
reuseVarsExist = false;
end
if reuseVarsExist
if reuseExistingODImages
prior_options = evalin('base','prior_options');
% Define which fields are critical for reuse
critical_fields = {'folderPath','cam','angle','ImagingMode','PulseDuration','center','span','fraction','removeFringes','skipUnshuffling','scan_reference_values'};
critical_fields = {'folderPath','cam','angle','ImagingMode','PulseDuration',...
'center','span','fraction','removeFringes','skipUnshuffling','scan_reference_values'};
if ~haveOptionsChanged(options, prior_options, critical_fields)
fprintf('\n[INFO] Reusing processed OD images, scan parameters, and file list from memory.\n');
@ -49,117 +36,110 @@ function [od_imgs, scan_parameter_values, file_list] = collectODImages(options)
scan_parameter_values = evalin('base','scan_parameter_values');
file_list = evalin('base','file_list');
% --- Ensure figures exist if requested now ---
if ~options.skipSaveOD
saveODFigures(od_imgs, options.saveDirectory);
end
return; % safe to exit now
return; % bypass cropping/background subtraction
else
fprintf('\n[INFO] Processed-data-related options changed. Reprocessing full OD image dataset...\n');
end
end
% --- Check if the full OD dataset and scan parameters exist in workspace ---
% --- Determine RawData folder ---
raw_data_folder = fullfile(options.saveDirectory,'RawData');
% --- Check if workspace full dataset exists ---
fullDataExists = evalin('base', 'exist(''full_od_imgs'', ''var'')') && ...
evalin('base', 'exist(''full_bkg_imgs'', ''var'')') && ...
evalin('base', 'exist(''raw_scan_parameter_values'', ''var'')') && ...
evalin('base', 'exist(''raw_file_list'', ''var'')') && ...
evalin('base', 'exist(''prior_options'',''var'')');
% --- Respect SAVE_TO_WORKSPACE flag ---
if ~options.SAVE_TO_WORKSPACE
fullDataExists = false; % force recompute even if workspace vars exist
if ~isfield(options,'SAVE_TO_WORKSPACE') || ~options.SAVE_TO_WORKSPACE
fullDataExists = false;
end
% --- Prepare full_od_imgs, full_bkg_imgs, scan values, file list ---
if fullDataExists
% Both required datasets exist, check if raw-data options changed
prior_options = evalin('base','prior_options');
% Define critical fields that affect raw-data computation
critical_raw_fields = {'folderPath','cam','angle','ImagingMode','PulseDuration'};
if ~haveOptionsChanged(options, prior_options, critical_raw_fields)
fprintf('\n[INFO] Reusing full OD image dataset and scan parameters from memory.\n');
full_od_imgs = evalin('base', 'full_od_imgs');
full_bkg_imgs = evalin('base', 'full_bkg_imgs');
raw_scan_parameter_values = evalin('base', 'raw_scan_parameter_values');
raw_file_list = evalin('base', 'raw_file_list');
else
fprintf('\n[INFO] Raw-data-related options changed. Recomputing full OD image dataset...\n');
[full_od_imgs, full_bkg_imgs, raw_scan_parameter_values, raw_file_list] = Helper.processRawData(options);
% Save raw full dataset for reuse
if options.SAVE_TO_WORKSPACE
assignin('base', 'full_od_imgs', full_od_imgs);
assignin('base', 'full_bkg_imgs', full_bkg_imgs);
assignin('base', 'raw_scan_parameter_values', raw_scan_parameter_values);
assignin('base', 'raw_file_list', raw_file_list);
end
fprintf('\n[INFO] Completed recomputing OD images. Stored in workspace for reuse.\n');
end
fprintf('\n[INFO] Reusing full OD image dataset and scan parameters from memory.\n');
full_od_imgs = evalin('base', 'full_od_imgs');
full_bkg_imgs = evalin('base', 'full_bkg_imgs');
raw_scan_parameter_values = evalin('base', 'raw_scan_parameter_values');
raw_file_list = evalin('base', 'raw_file_list');
nFiles = size(full_od_imgs,3);
fprintf('\n[INFO] Cropping and subtracting background from images...\n');
elseif exist(raw_data_folder,'dir')
mat_files = dir(fullfile(raw_data_folder,'*.mat'));
nFiles = numel(mat_files);
raw_scan_parameter_values = zeros(1,nFiles);
raw_file_list = strings(1,nFiles);
fprintf('\n[INFO] Cropping and subtracting background from images in raw data folder on disk...\n');
else
% Either dataset is missing, process raw HDF5 files completely
fprintf('\n[INFO] Full OD image dataset or scan parameters not found in memory.\n');
fprintf('\n[INFO] No raw data found in either the workspace or on disk.\n');
[full_od_imgs, full_bkg_imgs, raw_scan_parameter_values, raw_file_list] = Helper.processRawData(options);
% Save raw full dataset for reuse
if options.SAVE_TO_WORKSPACE
if isfield(options,'SAVE_TO_WORKSPACE') && options.SAVE_TO_WORKSPACE
assignin('base', 'full_od_imgs', full_od_imgs);
assignin('base', 'full_bkg_imgs', full_bkg_imgs);
assignin('base', 'raw_scan_parameter_values', raw_scan_parameter_values);
assignin('base', 'raw_file_list', raw_file_list);
fprintf('\n[INFO] Completed computing OD images. Images will be stored in workspace for reuse.\n');
else
fprintf('\n[INFO] Completed computing OD images. Images will be stored on disk for reuse.\n');
end
fprintf('\n[INFO] Completed computing OD images. Images will be stored in workspace for reuse.\n');
nFiles = size(full_od_imgs,3);
fprintf('\n[INFO] Cropping and subtracting background from images...\n');
end
fprintf('\n[INFO] Cropping and subtracting background from images...\n');
nFiles = size(full_od_imgs, 3);
% --- Preallocate arrays for processed images ---
% --- Unified cropping & background subtraction ---
absimages = zeros(options.span(1)+1, options.span(2)+1, nFiles, 'single');
refimages = zeros(options.span(1)+1, options.span(2)+1, nFiles, 'single');
% --- Progress bar ---
if isfield(options, 'showProgressBar') && options.showProgressBar
showPB = isfield(options,'showProgressBar') && options.showProgressBar;
if showPB
pb = Helper.ProgressBar();
pb.run('Progress: ');
end
% --- Process each image: crop and subtract background ---
for k = 1:nFiles
full_od_img = full_od_imgs(:,:,k); % original full OD image, never modified
full_bkg_img = full_bkg_imgs(:,:,k); % original full background image, never modified
if any(isnan(full_od_img(:)))
if fullDataExists || exist(raw_data_folder,'dir')
if fullDataExists
od_mat = full_od_imgs(:,:,k);
bkg_mat = full_bkg_imgs(:,:,k);
else
data = load(fullfile(raw_data_folder, mat_files(k).name));
od_mat = data.OD;
bkg_mat = data.BKG;
raw_scan_parameter_values(k) = data.Scan;
raw_file_list(k) = data.File;
end
else
od_mat = full_od_imgs(:,:,k);
bkg_mat = full_bkg_imgs(:,:,k);
end
% --- Crop and subtract background ---
if any(isnan(od_mat(:)))
absimages(:,:,k) = nan(options.span(1)+1, options.span(2)+1, 'single');
continue
else
cropped_od = Helper.cropODImage(od_mat, options.center, options.span);
absimages(:,:,k) = Helper.subtractBackgroundOffset(cropped_od, options.fraction)';
end
if any(isnan(full_bkg_img(:)))
if any(isnan(bkg_mat(:)))
refimages(:,:,k) = nan(options.span(1)+1, options.span(2)+1, 'single');
continue
else
cropped_bkg = Helper.cropODImage(bkg_mat, options.center, options.span);
refimages(:,:,k) = Helper.subtractBackgroundOffset(cropped_bkg, options.fraction)';
end
% Crop image around the region of interest
cropped_absimage = Helper.cropODImage(full_od_img, options.center, options.span);
cropped_refimage = Helper.cropODImage(full_bkg_img, options.center, options.span);
% Subtract background offset based on fraction
processed_absimage = Helper.subtractBackgroundOffset(cropped_absimage, options.fraction);
processed_refimage = Helper.subtractBackgroundOffset(cropped_refimage, options.fraction);
% Store processed image (transpose to match orientation)
absimages(:,:,k) = processed_absimage';
refimages(:,:,k) = processed_refimage';
% Update progress bar
if isfield(options, 'showProgressBar') && options.showProgressBar
progressPercent = round(k / nFiles * 100);
if showPB
progressPercent = round(k/nFiles*100);
pb.run(progressPercent);
end
end
% Finish progress bar
if isfield(options, 'showProgressBar') && options.showProgressBar
if showPB
pb.run(' Done!');
end
@ -168,12 +148,12 @@ function [od_imgs, scan_parameter_values, file_list] = collectODImages(options)
fprintf('\n[INFO] Applying fringe removal to processed images...\n');
optrefimages = Helper.removeFringesInImage(absimages, refimages);
absimages_fringe_removed = absimages - optrefimages;
od_imgs = arrayfun(@(i) absimages_fringe_removed(:,:,i), 1:nFiles, 'UniformOutput', false);
od_imgs = arrayfun(@(i) absimages_fringe_removed(:,:,i), 1:size(absimages,3), 'UniformOutput', false);
fprintf('\n[INFO] Fringe removal completed.\n');
else
od_imgs = arrayfun(@(i) absimages(:,:,i), 1:nFiles, 'UniformOutput', false);
od_imgs = arrayfun(@(i) absimages(:,:,i), 1:size(absimages,3), 'UniformOutput', false);
end
% --- Optional unshuffling based on scan reference values ---
if isfield(options, 'skipUnshuffling') && ~options.skipUnshuffling
fprintf('\n[INFO] Reordering images according to scan parameter reference values...\n');
@ -209,24 +189,22 @@ function [od_imgs, scan_parameter_values, file_list] = collectODImages(options)
file_list = ordered_file_list;
fprintf('\n[INFO] Image reordering completed.\n');
else
% No unshuffling: keep original order
scan_parameter_values = raw_scan_parameter_values;
file_list = raw_file_list;
scan_parameter_values = raw_scan_parameter_values;
file_list = raw_file_list;
end
% --- Save processed dataset and options to workspace for reuse ---
assignin('base', 'od_imgs', od_imgs);
assignin('base', 'scan_parameter_values', scan_parameter_values);
assignin('base', 'file_list', file_list);
assignin('base', 'prior_options', options);
% --- Save OD images as figures to disk if requested ---
if ~options.skipSaveOD
saveODFigures(od_imgs, options.saveDirectory);
end
fprintf('\n[INFO] OD image dataset ready for further analysis.\n');
end
%% --- Local helper functions ---

View File

@ -58,10 +58,10 @@ function [SAVE_TO_WORKSPACE, runMemoryGB] = estimateDatasetMemory(dataSources, o
% Decide workspace flag per run by comparing with 50% of available RAM
if runBytes > 0.75 * availableRAM
SAVE_TO_WORKSPACE = false;
fprintf('\n[INFO] Estimated size on memory of Run %s/%s too large (%.2f GB). Will save partially to workspace if not done so already.\n', ...
fprintf('\n[INFO] Estimated size on memory of Run %s/%s too large (%.2f GB). Will save raw data to disk and partially to workspace if not done so already.\n', ...
ds.sequence, runID, runBytes/1e9);
else
fprintf('\n[INFO] Estimated size on memory of Run %s/%s = %.2f GB. Will save completely to workspace if not done so already.\n', ...
fprintf('\n[INFO] Estimated size on memory of Run %s/%s = %.2f GB. Will save raw data completely to workspace if not done so already.\n', ...
ds.sequence, runID, runBytes/1e9);
end
end

View File

@ -1,90 +1,132 @@
function [full_od_imgs, full_bkg_imgs, raw_scan_parameter_values, raw_file_list] = processRawData(options)
%% Reads HDF5 files, computes OD images
%
% Inputs: options.folderPath, options.cam, options.angle, ImagingMode, PulseDuration, scan_parameter, etc.
%
% Returns the OD images and scan parameters immediately in memory.
% This function does NOT do cropping or fringe removal.
%% Reads HDF5 files, computes OD images, supports disk-backed storage in blocks
fprintf('\n[INFO] Processing raw data files at %s ...\n', options.folderPath);
% ===== Group paths in HDF5 files =====
groupList = ["/images/MOT_3D_Camera/in_situ_absorption", ...
"/images/ODT_1_Axis_Camera/in_situ_absorption", ...
"/images/ODT_2_Axis_Camera/in_situ_absorption", ...
"/images/Horizontal_Axis_Camera/in_situ_absorption", ...
"/images/Vertical_Axis_Camera/in_situ_absorption"];
% ===== Find files =====
files = dir(fullfile(options.folderPath, '*.h5'));
nFiles = length(files);
% --- Validate camera index ---
if options.cam < 1 || options.cam > numel(groupList)
error('Invalid camera index: %d', options.cam);
end
files = dir(fullfile(options.folderPath, '*.h5'));
nFiles = numel(files);
if nFiles == 0
error('\nNo HDF5 files found in %s', options.folderPath);
error('No HDF5 files found in %s', options.folderPath);
end
% Determine image size from first file
testFile = fullfile(files(1).folder, files(1).name);
atm_test = double(imrotate(h5read(testFile, append(groupList(options.cam), "/atoms")), options.angle));
[ny, nx] = size(atm_test);
% Determine image size
testFile = fullfile(files(1).folder, files(1).name);
atm_test = double(imrotate(h5read(testFile, append(groupList(options.cam), "/atoms")), options.angle, 'bilinear', 'crop'));
[ny, nx] = size(atm_test);
% --- Preallocate in-memory arrays ---
full_od_imgs = nan(ny, nx, nFiles, 'single');
full_bkg_imgs = nan(ny, nx, nFiles, 'single');
raw_scan_parameter_values = zeros(1, nFiles);
% --- Progress bar ---
if isfield(options, 'showProgressBar') && options.showProgressBar
pb = Helper.ProgressBar();
pb.run('Progress: ');
raw_file_list = string(zeros(1,nFiles)); % always string array
if options.SAVE_TO_WORKSPACE
fprintf('\n[INFO] Creating in-memory arrays of raw data...\n');
full_od_imgs = nan(ny, nx, nFiles, 'single');
full_bkg_imgs = nan(ny, nx, nFiles, 'single');
else
fprintf('\n[INFO] Creating folder of raw data on disk (per-image MAT files)...\n');
rawFolder = fullfile(options.saveDirectory, 'RawData');
if ~exist(rawFolder,'dir'), mkdir(rawFolder); end
full_od_imgs = rawFolder;
full_bkg_imgs = rawFolder;
end
raw_file_list = strings(1, nFiles); % store full file paths
% ===== Loop over files =====
for k = 1:nFiles
fullFileName = fullfile(files(k).folder, files(k).name);
raw_file_list(k) = fullFileName; % track original file
if ~isfield(options, 'showProgressBar') || ~options.showProgressBar
fprintf('Now reading %s\n', fullFileName);
% --- Prepare file names ---
fullFileNames = string(arrayfun(@(f) fullfile(f.folder, f.name), files, 'UniformOutput', false));
% --- Check for Parallel Computing Toolbox ---
useParallel = license('test','Distrib_Computing_Toolbox') && ~options.SAVE_TO_WORKSPACE;
if useParallel
fprintf('\n[INFO] Parallel Computing Toolbox detected. Using parallelization for raw data processing...\n');
% --- Preallocate scan parameters and file list for parallel mode ---
raw_scan_parameter_values = nan(1, nFiles, 'single');
raw_file_list = fullFileNames;
% --- Parallel loop without progress bar ---
parfor k = 1:nFiles
[od_img, bkg_img, val] = readAndComputeOD(fullFileNames(k), options, groupList, ny, nx);
writeRawImagesToDisk(rawFolder, od_img, bkg_img, val, fullFileNames(k), k);
raw_scan_parameter_values(k) = val;
end
else
% --- Standard for-loop with your progress bar ---
showPB = isfield(options,'showProgressBar') && options.showProgressBar;
if showPB && options.SAVE_TO_WORKSPACE
pb = Helper.ProgressBar();
pb.run('Progress: ');
end
try
atm_img = double(imrotate(h5read(fullFileName, append(groupList(options.cam), "/atoms")), options.angle));
bkg_img = double(imrotate(h5read(fullFileName, append(groupList(options.cam), "/background")), options.angle));
dark_img = double(imrotate(h5read(fullFileName, append(groupList(options.cam), "/dark")), options.angle));
od_img = Helper.calculateODImage(atm_img, bkg_img, dark_img, options.ImagingMode, options.PulseDuration);
full_od_imgs(:, :, k) = single(od_img);
full_bkg_imgs(:, :, k) = single(bkg_img);
catch
warning('Missing data in %s, storing NaNs.', fullFileName);
full_od_imgs(:, :, k) = nan(ny, nx, 1, 'single');
full_bkg_imgs(:, :, k) = nan(ny, nx, 1, 'single');
continue;
end
for k = 1:nFiles
[od_img, bkg_img, val] = readAndComputeOD(fullFileNames(k), options, groupList, ny, nx);
% Extract scan parameter
info = h5info(fullFileName, '/globals');
for i = 1:length(info.Attributes)
if strcmp(info.Attributes(i).Name, options.scan_parameter)
if strcmp(options.scan_parameter, 'ps_rot_mag_fin_pol_angle')
raw_scan_parameter_values(k) = 180 - info.Attributes(i).Value;
else
raw_scan_parameter_values(k) = info.Attributes(i).Value;
end
if options.SAVE_TO_WORKSPACE
full_od_imgs(:,:,k) = single(od_img);
full_bkg_imgs(:,:,k) = single(bkg_img);
else
writeRawImagesToDisk(rawFolder, od_img, bkg_img, val, fullFileNames(k), k);
end
raw_scan_parameter_values(k) = val;
raw_file_list(k) = fullFileNames(k);
if showPB && options.SAVE_TO_WORKSPACE
progressPercent = round(k/nFiles*100);
pb.run(progressPercent);
end
end
% Update progress bar
if isfield(options, 'showProgressBar') && options.showProgressBar
progressPercent = round(k / nFiles * 100);
pb.run(progressPercent);
if showPB && options.SAVE_TO_WORKSPACE
pb.run(' Done!');
end
end
end
% Finish progress bar
if isfield(options, 'showProgressBar') && options.showProgressBar
pb.run(' Done!');
function [od_img, bkg_img, val] = readAndComputeOD(fullFileName, options, groupList, ny, nx)
try
atm_img = double(imrotate(h5read(fullFileName, append(groupList(options.cam), "/atoms")), options.angle, 'bilinear', 'crop'));
bkg_img = double(imrotate(h5read(fullFileName, append(groupList(options.cam), "/background")), options.angle, 'bilinear', 'crop'));
dark_img = double(imrotate(h5read(fullFileName, append(groupList(options.cam), "/dark")), options.angle, 'bilinear', 'crop'));
od_img = Helper.calculateODImage(atm_img, bkg_img, dark_img, options.ImagingMode, options.PulseDuration);
catch
warning('\nMissing data in %s, storing NaNs.', fullFileName);
od_img = nan(ny, nx);
bkg_img = nan(ny, nx);
end
% --- Extract scan parameter safely ---
try
info = h5info(fullFileName, '/globals');
attrNames = string({info.Attributes.Name});
if ismember(options.scan_parameter, attrNames)
val = h5readatt(fullFileName, '/globals', options.scan_parameter);
if strcmp(options.scan_parameter,'ps_rot_mag_fin_pol_angle')
val = 180 - val;
end
else
val = NaN;
end
catch
val = NaN;
end
end
function writeRawImagesToDisk(rawFolder, od_img, bkg_img, scan_val, file_name, idx)
% Writes a single OD/BKG image + scan parameter to a MAT file
matFilePath = fullfile(rawFolder, sprintf('Image_%04d.mat', idx));
OD = single(od_img);
BKG = single(bkg_img);
Scan = single(scan_val);
File = string(file_name);
save(matFilePath, 'OD','BKG','Scan','File','-v7.3');
end

View File

@ -2,9 +2,9 @@
% Specify data location to run analysis on
dataSources = {
struct('sequence', 'StructuralPhaseTransition', ...
'date', '2025/08/16', ...
'runs', [8]) % specify run numbers as a string in "" or just as a numeric value
struct('sequence', 'TwoDGas', ...
'date', '2025/06/23', ...
'runs', [300]) % specify run numbers as a string in "" or just as a numeric value
};
options = struct();
@ -18,7 +18,7 @@ options.saveDirectory = fileparts(scriptFullPath);
% Camera / imaging
options.cam = 5;
options.angle = 0;
options.center = [1420, 2050];
options.center = [1410, 2030];
options.span = [200, 200];
options.fraction = [0.1, 0.1];
options.pixel_size = 5.86e-6; % in meters
@ -53,21 +53,23 @@ switch options.measurementName
options.scan_reference_values = [2.45, 2.44, 2.43, 2.42, 2.4, 2.39, 2.38, 2.37, 2.36, 2.35, 2.34, 2.32, 2.3, 2.28, 2.25, 2.2, 2.15, 2.10, 2.0, 1.90, 1.8];
options.titleString = 'BEC to Stripes';
case 'DropletsToStripes'
options.scan_reference_values = [0, 5, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 35, 40];
options.scan_reference_values = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45];
options.titleString = 'Droplets to Stripes';
case 'StripesToDroplets'
options.scan_reference_values = fliplr([0, 5, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 35, 40]);
options.scan_reference_values = fliplr([0, 5, 10, 15, 20, 25, 30, 35, 40, 45]);
options.titleString = 'Stripes to Droplets';
end
% Flags
options.skipNormalization = false;
options.skipUnshuffling = false;
options.skipUnshuffling = true;
options.skipPreprocessing = true;
options.skipMasking = true;
options.skipIntensityThresholding = true;
options.skipBinarization = true;
options.skipSaveFigures = true;
options.skipSaveData = false;
options.skipSaveOD = true;
options.skipLivePlot = false;
options.showProgressBar = true;