function [full_od_imgs, full_bkg_imgs, raw_scan_parameter_values, raw_file_list, scan_parameter_names, scan_reference_values] = processRawData(options) %% processRawData % Author: Karthik % Date: 2025-09-12 % Version: 1.0 % % Description: % Reads HDF5 files, computes OD images, supports disk-backed storage in blocks. % % Notes: % Optional notes, references. fprintf('\n[INFO] Processing raw data files at %s ...\n', options.folderPath); groupList = ["/images/ODT_1_Axis_Camera/in_situ_absorption", ... "/images/ODT_2_Axis_Camera/in_situ_absorption", ... "/images/Horizontal_Axis_Camera/in_situ_absorption", ... "/images/Vertical_Axis_Camera/in_situ_absorption"]; % --- Validate camera index --- if options.cam < 1 || options.cam > numel(groupList) error('Invalid camera index: %d', options.cam); end files = dir(fullfile(options.folderPath, '*.h5')); nFiles = numel(files); if nFiles == 0 error('No HDF5 files found in %s', options.folderPath); end % Determine image size testFile = fullfile(files(1).folder, files(1).name); cameraGroup = groupList(options.cam); try info = h5info(testFile, cameraGroup); % Check if group exists catch error('Group "%s" not found in file "%s". Aborting.', cameraGroup, testFile); end datasetNames = {info.Datasets.Name}; if ~ismember('atoms', datasetNames) error('Dataset "%s/atoms" not found in file "%s". Aborting.', cameraGroup, testFile); end % If we reach here, the dataset exists atm_test = double(imrotate(h5read(testFile, append(cameraGroup, "/atoms")), ... options.angle, 'bilinear', 'crop')); [ny, nx] = size(atm_test); full_od_imgs = []; full_bkg_imgs = []; raw_scan_parameter_values = []; raw_file_list = string(zeros(1,nFiles)); % always string array if options.SAVE_TO_WORKSPACE fprintf('\n[INFO] Creating in-memory arrays of raw data...\n'); full_od_imgs = nan(ny, nx, nFiles, 'single'); full_bkg_imgs = nan(ny, nx, nFiles, 'single'); else % --- Create uniquely identified full OD image folder --- dataSource = makeDataSourceStruct(options.folderPath); runID = sprintf('%s_%s_Run%04d', ... dataSource{1}.sequence, ... strrep(dataSource{1}.date,'/','-'), ... dataSource{1}.runs); if isfield(options, 'FullODImagesFolder') && ... isfolder(options.FullODImagesFolder) && ... ~isempty(options.FullODImagesFolder) parentFolder = options.FullODImagesFolder; else parentFolder = options.saveDirectory; end fullODImageFolder = fullfile(parentFolder, ['FullODImages_' runID]); if ~exist(fullODImageFolder,'dir'), mkdir(fullODImageFolder); end fprintf('\n[INFO] Creating folder of full OD images on disk: %s\n', fullODImageFolder); % --- Save metadata for this run --- metadata.options = options; metadata.timestamp = datetime; metadata.runID = runID; metadata.imageSize = [ny, nx]; metadata.fileList = string(arrayfun(@(f) fullfile(f.folder, f.name), files, 'UniformOutput', false)); save(fullfile(fullODImageFolder,'metadata.mat'),'metadata','-v7.3'); end % --- Prepare file names --- fullFileNames = string(arrayfun(@(f) fullfile(f.folder, f.name), files, 'UniformOutput', false)); % --- Use specified scan parameter, auto-detect if not specified --- if isfield(options,'scan_parameter') && ~isempty(options.scan_parameter) fprintf('\n[INFO] Using user-specified scan parameter(s): '); if iscell(options.scan_parameter) fprintf('%s\n', strjoin(options.scan_parameter, ', ')); scan_parameter_names = options.scan_parameter; nParams = numel(scan_parameter_names); else fprintf('%s\n', options.scan_parameter); scan_parameter_names = options.scan_parameter; nParams = 1; end else [scan_parameter_names_unfiltered, nParams] = detectScanParametersFromFiles(fullFileNames); if isfield(options,'ignore_scan_parameter') && ~isempty(options.ignore_scan_parameter) fprintf('\n[INFO] Ignoring the following scan parameter(s): %s\n', strjoin(options.ignore_scan_parameter, ', ')); scan_parameter_names = scan_parameter_names_unfiltered(~ismember(scan_parameter_names_unfiltered, options.ignore_scan_parameter)); else scan_parameter_names = scan_parameter_names_unfiltered; end end if ~exist('nParams','var') nParams = 1; end % --- Preallocate temp scan values for parfor --- temp_scan_vals = cell(nFiles,1); % --- Check for Parallel Computing Toolbox --- useParallel = license('test','Distrib_Computing_Toolbox') && ~options.SAVE_TO_WORKSPACE; if useParallel fprintf('\n[INFO] Parallel Computing Toolbox detected. Using parallelization for raw data processing...\n'); raw_file_list = fullFileNames; parfor k = 1:nFiles [od_img, bkg_img, val] = readAndComputeOD(fullFileNames(k), options, groupList, ny, nx, scan_parameter_names); temp_scan_vals{k} = val(:).'; if options.SAVE_TO_WORKSPACE full_od_imgs(:,:,k) = single(od_img); full_bkg_imgs(:,:,k) = single(bkg_img); else writeFullODImagesToDisk(fullODImageFolder, od_img, bkg_img, scan_parameter_names, val, fullFileNames(k), k); end end else showPB = isfield(options,'showProgressBar') && options.showProgressBar; if showPB && options.SAVE_TO_WORKSPACE pb = Helper.ProgressBar(); pb.run('Progress: '); end for k = 1:nFiles [od_img, bkg_img, val] = readAndComputeOD(fullFileNames(k), options, groupList, ny, nx, scan_parameter_names); temp_scan_vals{k} = val(:).'; if options.SAVE_TO_WORKSPACE full_od_imgs(:,:,k) = single(od_img); full_bkg_imgs(:,:,k) = single(bkg_img); else writeFullODImagesToDisk(fullODImageFolder, od_img, bkg_img, scan_parameter_names, val, fullFileNames(k), k); end raw_file_list(k) = fullFileNames(k); if showPB && options.SAVE_TO_WORKSPACE progressPercent = round(k/nFiles*100); pb.run(progressPercent); end end if showPB && options.SAVE_TO_WORKSPACE pb.run(' Done!'); end end % --- Convert cell array to numeric matrix after parfor --- if nParams == 1 raw_scan_parameter_values = cellfun(@double, temp_scan_vals); % enforce double here raw_scan_parameter_values = reshape(raw_scan_parameter_values, 1, []); % row vector else raw_scan_parameter_values = cellfun(@double, temp_scan_vals, 'UniformOutput', false); raw_scan_parameter_values = vertcat(raw_scan_parameter_values{:}); % rows = files, cols = parameters end % --- Determine scan reference values --- if (~isfield(options,'scan_reference_values') || isempty(options.scan_reference_values)) if isvector(raw_scan_parameter_values) % Single parameter case scan_reference_values = unique(raw_scan_parameter_values(:), 'stable'); scan_reference_values = scan_reference_values(:).'; % row vector else % Multi-parameter case: unique rows scan_reference_values = unique(raw_scan_parameter_values, 'rows', 'stable'); end else % Ensure anything coming from options is also double **once, here** scan_reference_values = double(options.scan_reference_values); end end %% --- Local helper functions --- function [od_img, bkg_img, val] = readAndComputeOD(fullFileName, options, groupList, ny, nx, scanParamNames) try atm_img = double(imrotate(h5read(fullFileName, append(groupList(options.cam), "/atoms")), options.angle, 'bilinear', 'crop')); bkg_img = double(imrotate(h5read(fullFileName, append(groupList(options.cam), "/background")), options.angle, 'bilinear', 'crop')); dark_img = double(imrotate(h5read(fullFileName, append(groupList(options.cam), "/dark")), options.angle, 'bilinear', 'crop')); od_img = Helper.calculateODImage(atm_img, bkg_img, dark_img, options.ImagingMode, options.PulseDuration); catch warning('\nMissing data in %s, storing NaNs.', fullFileName); od_img = nan(ny, nx); bkg_img = nan(ny, nx); end % --- Read scan parameter(s) for this file --- try if iscell(scanParamNames) val = NaN(1,numel(scanParamNames)); for j = 1:numel(scanParamNames) val(j) = h5readatt(fullFileName, '/globals', scanParamNames{j}); end else val = h5readatt(fullFileName, '/globals', scanParamNames); end catch val = NaN; end end function [scanParamNames, nParams] = detectScanParametersFromFiles(fileNames, minFilesToCheck) % Detect scan parameter(s) by checking which numeric attributes vary across multiple files % % Inputs: % fileNames - string array or cell array of full HDF5 file paths % minFilesToCheck - minimum number of files to compare (default: 3) % % Outputs: % scanParamNames - char array (single) or cell array of char arrays (multiple) % nParams - number of detected scan parameters if nargin < 2 minFilesToCheck = 3; end % Ensure fileNames is a string array if iscell(fileNames) fileNames = string(fileNames); end nFiles = numel(fileNames); nCheck = min(minFilesToCheck, nFiles); fprintf('\n[INFO] Detecting scan parameter(s)...\n'); % Read attribute names from first file info = h5info(fileNames(1)); if any(strcmp({info.Groups.Name}, '/globals')) globalsGroup = info.Groups(strcmp({info.Groups.Name}, '/globals')); else warning('\n[WARNING] /globals group not found in first file.'); scanParamNames = NaN; nParams = 0; return; end attrNames = string({globalsGroup.Attributes.Name}); numericAttrNames = strings(0,1); % Identify numeric attributes for j = 1:numel(attrNames) if attrNames(j) == "runs" continue; end val = h5readatt(fileNames(1), '/globals', attrNames(j)); if isnumeric(val) numericAttrNames(end+1) = attrNames(j); %#ok end end if isempty(numericAttrNames) fprintf('\n[WARNING] No numeric attributes in /globals.\n'); scanParamNames = NaN; nParams = 0; return; end % --- Check which numeric attributes vary across first few files --- varyingParams = strings(0,1); for j = 1:numel(numericAttrNames) attrName = numericAttrNames(j); values = NaN(1, nCheck); for k = 1:nCheck try val = h5readatt(fileNames(k), '/globals', attrName); if isnumeric(val) values(k) = val(1); % use first element if array end catch values(k) = NaN; end end % Ignore NaNs when checking for variation if numel(unique(values(~isnan(values)))) > 1 varyingParams(end+1) = attrName; %#ok end end % --- Return result --- nParams = numel(varyingParams); if nParams == 0 fprintf('\n[INFO] No varying scan parameters detected.\n'); scanParamNames = NaN; elseif nParams == 1 scanParamNames = char(varyingParams); % single char array fprintf('\n[INFO] Single scan parameter detected: %s\n', scanParamNames); else scanParamNames = cellstr(varyingParams); % cell array of char arrays fprintf('\n[INFO] Multiple scan parameters detected: %s\n', strjoin(varyingParams, ', ')); end end function writeFullODImagesToDisk(fullODImageFolder, od_img, bkg_img, param, val, file_name, idx) % Writes OD/BKG image + scan parameter(s) to a MAT file matFilePath = fullfile(fullODImageFolder, sprintf('Image_%04d.mat', idx)); OD = single(od_img); BKG = single(bkg_img); File = string(file_name); Scan_Param = param; Scan_Val = single(val); save(matFilePath, 'OD','BKG', 'Scan_Param', 'Scan_Val','File','-v7.3'); end function dataSource = makeDataSourceStruct(folderPath) % Split by file separators (handles / or \) parts = regexp(folderPath, '[\\/]', 'split'); % Remove empty parts caused by leading slashes parts = parts(~cellfun('isempty', parts)); % Extract sequence, date, and run number % Now the indices are correct: % parts = {'DyLabNAS', 'Data', 'StructuralPhaseTransition', '2025', '08', '13', '0062'} sequence = parts{3}; % "StructuralPhaseTransition" year = parts{4}; % "2025" month = parts{5}; % "08" day = parts{6}; % "13" runStr = parts{7}; % "0062" % Build date string dateStr = sprintf('%s/%s/%s', year, month, day); % Convert run string to number runNum = str2double(runStr); % Construct struct inside a cell array dataSource = { struct('sequence', sequence, ... 'date', dateStr, ... 'runs', runNum) }; end