function results_all = batchAnalyze(dataSources, options) %% batchAnalyze % Author: Karthik % Date: 2025-09-12 % Version: 1.0 % % Description: % Brief description of the script functionality. % % Notes: % Optional notes, references. arguments dataSources (1,:) cell options struct end % Default base folder if not specified if ~isfield(options, 'baseDataFolder') options.baseDataFolder = '//DyLabNAS/Data'; end % Default skip flag if ~isfield(options, 'skipFullODImagesFolderUse') options.skipFullODImagesFolderUse = false; end % Determine whether to use FullODImagesFolder or raw baseDataFolder useFullOD = false; if isfield(options, 'FullODImagesFolder') && isfolder(options.FullODImagesFolder) if ~isfolder(options.baseDataFolder) if ~options.skipFullODImagesFolderUse % Case 1a: raw folder missing, fallback to FullODImagesFolder useFullOD = true; fprintf('\n[INFO] Raw data folder not found but found full OD Images folder.\n'); fprintf('\n[INFO] Using full OD Images folder: %s\n', options.FullODImagesFolder); else % Case 1b: raw folder missing, fallback to % FullODImagesFolder but user overrides error('Raw data folder not found, found full OD Images folder which cannot be used (set skipFullODImagesFolderUse=false to override). Aborting.\n'); end elseif ~options.skipFullODImagesFolderUse % Case 2: both exist, prioritize FullODImagesFolder unless skipped useFullOD = true; fprintf('\n[INFO] Both raw data folder (%s) and full OD Images folder (%s) found.\n', ... options.baseDataFolder, options.FullODImagesFolder); fprintf('\n[INFO] Prioritizing full OD Images folder (set skipFullODImagesFolderUse=true to override).\n'); else % Case 3: both exist, prioritize raw data folder because of overrride fprintf('\n[INFO] Both raw data folder (%s) and full OD Images folder (%s) found.\n', ... options.baseDataFolder, options.FullODImagesFolder); fprintf('\n[INFO] Prioritizing raw data folder (set skipFullODImagesFolderUse=false to override).\n'); end elseif isfolder(options.baseDataFolder) % 🚨 Raw data folder exists, full OD images does not fprintf('\n[INFO] Full OD Images folder not found but found raw data folder.\n'); fprintf('\n[INFO] Using raw data folder: %s\n', options.baseDataFolder); end % 🚨 Sanity check if neither folder exists if ~useFullOD && ~isfolder(options.baseDataFolder) warning('\n[ERROR] Neither raw data folder (%s) nor a valid full OD Images folder were found. Exiting.', ... options.baseDataFolder); results_all = {}; return; end % ===== Estimate dataset memory and get per-run estimates ===== [options.SAVE_TO_WORKSPACE, ~] = Helper.estimateDatasetMemory(dataSources, options); results_all = {}; % one element per folder % --- FULL OD MODE --- if useFullOD % --- List available FullODImages_* folders --- fullODFolders = dir(fullfile(options.FullODImagesFolder, 'FullODImages_*')); fullODFolders = fullODFolders([fullODFolders.isdir]); ds = dataSources{1}; % only one dataSources struct % Ensure sequences, dates, runs are cell arrays for uniform processing sequences = ds.sequence; if ischar(sequences), sequences = {sequences}; end dates = ds.date; if ischar(dates), dates = {dates}; end runs = ds.runs; if isnumeric(runs), runs = num2cell(runs); end if isstring(runs), runs = cellstr(runs); end % Loop over all combinations of sequence × date × run for seqIdx = 1:numel(sequences) for dateIdx = 1:numel(dates) for runIdx = 1:numel(runs) targetSequence = sequences{seqIdx}; targetDate = dates{dateIdx}; targetRun = runs{runIdx}; matched = false; for i = 1:numel(fullODFolders) selectedPath = fullfile(fullODFolders(i).folder, fullODFolders(i).name); % Load metadata for run info metaFile = fullfile(selectedPath, 'metadata.mat'); if ~isfile(metaFile) warning('No metadata.mat in %s, skipping.', selectedPath); continue; end S = load(metaFile, 'metadata'); % Reconstruct sequence/date/run from stored folderPath dataSourceMeta = makeDataSourceStruct(S.metadata.options.folderPath); % Check for match: measurementName and run info if isfield(S.metadata.options, 'measurementName') && ... isfield(options, 'measurementName') && ... strcmp(S.metadata.options.measurementName, options.measurementName) && ... strcmp(dataSourceMeta{1}.sequence, targetSequence) && ... strcmp(dataSourceMeta{1}.date, targetDate) && ... dataSourceMeta{1}.runs == targetRun fprintf('\n[INFO] Found matching full OD images subfolder: %s\n', fullODFolders(i).name); options.selectedPath = selectedPath; options.folderPath = S.metadata.options.folderPath; matched = true; break; end end if ~matched warning('No matching full OD images subfolder found for sequence %s, date %s, run %s, measurementName %s.', ... targetSequence, targetDate, targetRun, options.measurementName); continue; % skip this run but continue to next combination end % ✅ Proceed to analysis for this combination try args = [fieldnames(options), struct2cell(options)]'; args = args(:)'; [analysisResults, scan_parameter_values, scan_reference_values] = Analyzer.performAnalysis(args{:}); result = struct(); result.sequence = targetSequence; result.date = targetDate; result.run = targetRun; result.path = options.folderPath; result.options = options; result.results = analysisResults; result.scan_parameter_values = scan_parameter_values; result.scan_reference_values = scan_reference_values; % Save dataset as MAT if ~isfield(options, 'skipSaveData') || ~options.skipSaveData saveResultStruct(result, options.saveDirectory); end results_all{end+1,1} = result; catch ME warning("Error processing %s: %s", options.folderPath, ME.message); end end end end return; % ✅ handled all in FullOD mode end % --- RAW MODE (default) --- ds = dataSources{1}; % single struct % Ensure sequences, dates, and runs are cell arrays sequences = ds.sequence; if ischar(sequences), sequences = {sequences}; end dates = ds.date; if ischar(dates), dates = {dates}; end runs = ds.runs; if isnumeric(runs), runs = num2cell(runs); end if isstring(runs), runs = cellstr(runs); end % Loop over all combinations of sequence × date × run for seqIdx = 1:numel(sequences) for dateIdx = 1:numel(dates) for runIdx = 1:numel(runs) targetSequence = sequences{seqIdx}; targetDate = dates{dateIdx}; runItem = runs{runIdx}; % Convert runItem to string with leading zeros if numeric if isnumeric(runItem) runID = sprintf('%04d', runItem); elseif isstring(runItem) runID = runItem; elseif ischar(runItem) runID = string(runItem); elseif iscell(runItem) runID = string(runItem{1}); else error('Unsupported type for run entry: %s', class(runItem)); end % Determine base folder if isfield(ds, 'baseFolder') && ~isempty(ds.baseFolder) baseFolder = fullfile(ds.baseFolder, targetSequence, targetDate); else baseFolder = fullfile(options.baseDataFolder, targetSequence, targetDate); end % Build folder path folderPath = fullfile(baseFolder, runID); options.folderPath = folderPath; try % Convert struct -> name-value args args = [fieldnames(options), struct2cell(options)]'; args = args(:)'; % Perform analysis [analysisResults, scan_parameter_values] = Analyzer.performAnalysis(args{:}); % Store results result = struct(); result.sequence = targetSequence; result.date = targetDate; result.run = runID; result.path = folderPath; result.options = options; result.results = analysisResults; result.scan_parameter_values = scan_parameter_values; % Save dataset if ~isfield(options, 'skipSaveData') || ~options.skipSaveData saveResultStruct(result, options.saveDirectory); end % Append to output results_all{end+1,1} = result; catch ME warning("Error processing %s/%s/%s: %s", ... targetSequence, targetDate, runID, ME.message); end end end end end %% --- Local helper functions --- function saveResultStruct(result, saveDirectory) % Define results folder resultsFolder = fullfile(saveDirectory, "Results", "SavedData"); if ~exist(resultsFolder, 'dir') mkdir(resultsFolder); end % Path to index file indexFile = fullfile(resultsFolder, "datasetsIndex.mat"); % Load or initialize index if isfile(indexFile) S = load(indexFile, "nextIdx"); nextIdx = S.nextIdx; else nextIdx = 1; end % Variable name and file path varName = sprintf('Dataset_%d', nextIdx); savePath = fullfile(resultsFolder, varName + ".mat"); % Save dataset as struct inside MAT file S.(varName) = result; save(savePath, '-struct', 'S'); % Update index nextIdx = nextIdx + 1; save(indexFile, "nextIdx"); end function dataSource = makeDataSourceStruct(folderPath) % Split by file separators (handles / or \) parts = regexp(folderPath, '[\\/]', 'split'); % Remove empty parts caused by leading slashes parts = parts(~cellfun('isempty', parts)); % Extract sequence, date, and run number % Now the indices are correct: % parts = {'DyLabNAS', 'Data', 'StructuralPhaseTransition', '2025', '08', '13', '0062'} sequence = parts{3}; % "StructuralPhaseTransition" year = parts{4}; % "2025" month = parts{5}; % "08" day = parts{6}; % "13" runStr = parts{7}; % "0062" % Build date string dateStr = sprintf('%s/%s/%s', year, month, day); % Convert run string to number runNum = str2double(runStr); % Construct struct inside a cell array dataSource = { struct('sequence', sequence, ... 'date', dateStr, ... 'runs', runNum) }; end