Calculations/Data-Analyzer/+Helper/batchAnalyze.m

303 lines
12 KiB
Matlab
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

function results_all = batchAnalyze(dataSources, options)
%% batchAnalyze
% Author: Karthik
% Date: 2025-09-12
% Version: 1.0
%
% Description:
% Brief description of the script functionality.
%
% Notes:
% Optional notes, references.
arguments
dataSources (1,:) cell
options struct
end
% Default base folder if not specified
if ~isfield(options, 'baseDataFolder')
options.baseDataFolder = '//DyLabNAS/Data';
end
% Default skip flag
if ~isfield(options, 'skipFullODImagesFolderUse')
options.skipFullODImagesFolderUse = false;
end
% Determine whether to use FullODImagesFolder or raw baseDataFolder
useFullOD = false;
if isfield(options, 'FullODImagesFolder') && isfolder(options.FullODImagesFolder)
if ~isfolder(options.baseDataFolder)
if ~options.skipFullODImagesFolderUse
% Case 1a: raw folder missing, fallback to FullODImagesFolder
useFullOD = true;
fprintf('\n[INFO] Raw data folder not found but found full OD Images folder.\n');
fprintf('\n[INFO] Using full OD Images folder: %s\n', options.FullODImagesFolder);
else
% Case 1b: raw folder missing, fallback to
% FullODImagesFolder but user overrides
error('Raw data folder not found, found full OD Images folder which cannot be used (set skipFullODImagesFolderUse=false to override). Aborting.\n');
end
elseif ~options.skipFullODImagesFolderUse
% Case 2: both exist, prioritize FullODImagesFolder unless skipped
useFullOD = true;
fprintf('\n[INFO] Both raw data folder (%s) and full OD Images folder (%s) found.\n', ...
options.baseDataFolder, options.FullODImagesFolder);
fprintf('\n[INFO] Prioritizing full OD Images folder (set skipFullODImagesFolderUse=true to override).\n');
else
% Case 3: both exist, prioritize raw data folder because of overrride
fprintf('\n[INFO] Both raw data folder (%s) and full OD Images folder (%s) found.\n', ...
options.baseDataFolder, options.FullODImagesFolder);
fprintf('\n[INFO] Prioritizing raw data folder (set skipFullODImagesFolderUse=false to override).\n');
end
elseif isfolder(options.baseDataFolder)
% 🚨 Raw data folder exists, full OD images does not
fprintf('\n[INFO] Full OD Images folder not found but found raw data folder.\n');
fprintf('\n[INFO] Using raw data folder: %s\n', options.baseDataFolder);
end
% 🚨 Sanity check if neither folder exists
if ~useFullOD && ~isfolder(options.baseDataFolder)
warning('\n[ERROR] Neither raw data folder (%s) nor a valid full OD Images folder were found. Exiting.', ...
options.baseDataFolder);
results_all = {};
return;
end
% ===== Estimate dataset memory and get per-run estimates =====
[options.SAVE_TO_WORKSPACE, ~] = Helper.estimateDatasetMemory(dataSources, options);
results_all = {}; % one element per folder
% --- FULL OD MODE ---
if useFullOD
% --- List available FullODImages_* folders ---
fullODFolders = dir(fullfile(options.FullODImagesFolder, 'FullODImages_*'));
fullODFolders = fullODFolders([fullODFolders.isdir]);
ds = dataSources{1}; % only one dataSources struct
% Ensure sequences, dates, runs are cell arrays for uniform processing
sequences = ds.sequence; if ischar(sequences), sequences = {sequences}; end
dates = ds.date; if ischar(dates), dates = {dates}; end
runs = ds.runs; if isnumeric(runs), runs = num2cell(runs); end
if isstring(runs), runs = cellstr(runs); end
% Loop over all combinations of sequence × date × run
for seqIdx = 1:numel(sequences)
for dateIdx = 1:numel(dates)
for runIdx = 1:numel(runs)
targetSequence = sequences{seqIdx};
targetDate = dates{dateIdx};
targetRun = runs{runIdx};
matched = false;
for i = 1:numel(fullODFolders)
selectedPath = fullfile(fullODFolders(i).folder, fullODFolders(i).name);
% Load metadata for run info
metaFile = fullfile(selectedPath, 'metadata.mat');
if ~isfile(metaFile)
warning('No metadata.mat in %s, skipping.', selectedPath);
continue;
end
S = load(metaFile, 'metadata');
% Reconstruct sequence/date/run from stored folderPath
dataSourceMeta = makeDataSourceStruct(S.metadata.options.folderPath);
% Check for match: measurementName and run info
if isfield(S.metadata.options, 'measurementName') && ...
isfield(options, 'measurementName') && ...
strcmp(S.metadata.options.measurementName, options.measurementName) && ...
strcmp(dataSourceMeta{1}.sequence, targetSequence) && ...
strcmp(dataSourceMeta{1}.date, targetDate) && ...
dataSourceMeta{1}.runs == targetRun
fprintf('\n[INFO] Found matching full OD images subfolder: %s\n', fullODFolders(i).name);
options.selectedPath = selectedPath;
options.folderPath = S.metadata.options.folderPath;
matched = true;
break;
end
end
if ~matched
warning('No matching full OD images subfolder found for sequence %s, date %s, run %s, measurementName %s.', ...
targetSequence, targetDate, targetRun, options.measurementName);
continue; % skip this run but continue to next combination
end
% ✅ Proceed to analysis for this combination
try
args = [fieldnames(options), struct2cell(options)]';
args = args(:)';
[analysisResults, scan_parameter_values, scan_reference_values] = Analyzer.performAnalysis(args{:});
result = struct();
result.sequence = targetSequence;
result.date = targetDate;
result.run = targetRun;
result.path = options.folderPath;
result.options = options;
result.results = analysisResults;
result.scan_parameter_values = scan_parameter_values;
result.scan_reference_values = scan_reference_values;
% Save dataset as MAT
if ~isfield(options, 'skipSaveData') || ~options.skipSaveData
saveResultStruct(result, options.saveDirectory);
end
results_all{end+1,1} = result;
catch ME
warning("Error processing %s: %s", options.folderPath, ME.message);
end
end
end
end
return; % ✅ handled all in FullOD mode
end
% --- RAW MODE (default) ---
ds = dataSources{1}; % single struct
% Ensure sequences, dates, and runs are cell arrays
sequences = ds.sequence; if ischar(sequences), sequences = {sequences}; end
dates = ds.date; if ischar(dates), dates = {dates}; end
runs = ds.runs; if isnumeric(runs), runs = num2cell(runs); end
if isstring(runs), runs = cellstr(runs); end
% Loop over all combinations of sequence × date × run
for seqIdx = 1:numel(sequences)
for dateIdx = 1:numel(dates)
for runIdx = 1:numel(runs)
targetSequence = sequences{seqIdx};
targetDate = dates{dateIdx};
runItem = runs{runIdx};
% Convert runItem to string with leading zeros if numeric
if isnumeric(runItem)
runID = sprintf('%04d', runItem);
elseif isstring(runItem)
runID = runItem;
elseif ischar(runItem)
runID = string(runItem);
elseif iscell(runItem)
runID = string(runItem{1});
else
error('Unsupported type for run entry: %s', class(runItem));
end
% Determine base folder
if isfield(ds, 'baseFolder') && ~isempty(ds.baseFolder)
baseFolder = fullfile(ds.baseFolder, targetSequence, targetDate);
else
baseFolder = fullfile(options.baseDataFolder, targetSequence, targetDate);
end
% Build folder path
folderPath = fullfile(baseFolder, runID);
options.folderPath = folderPath;
try
% Convert struct -> name-value args
args = [fieldnames(options), struct2cell(options)]';
args = args(:)';
% Perform analysis
[analysisResults, scan_parameter_values] = Analyzer.performAnalysis(args{:});
% Store results
result = struct();
result.sequence = targetSequence;
result.date = targetDate;
result.run = runID;
result.path = folderPath;
result.options = options;
result.results = analysisResults;
result.scan_parameter_values = scan_parameter_values;
% Save dataset
if ~isfield(options, 'skipSaveData') || ~options.skipSaveData
saveResultStruct(result, options.saveDirectory);
end
% Append to output
results_all{end+1,1} = result;
catch ME
warning("Error processing %s/%s/%s: %s", ...
targetSequence, targetDate, runID, ME.message);
end
end
end
end
end
%% --- Local helper functions ---
function saveResultStruct(result, saveDirectory)
% Define results folder
resultsFolder = fullfile(saveDirectory, "Results", "SavedData");
if ~exist(resultsFolder, 'dir')
mkdir(resultsFolder);
end
% Path to index file
indexFile = fullfile(resultsFolder, "datasetsIndex.mat");
% Load or initialize index
if isfile(indexFile)
S = load(indexFile, "nextIdx");
nextIdx = S.nextIdx;
else
nextIdx = 1;
end
% Variable name and file path
varName = sprintf('Dataset_%d', nextIdx);
savePath = fullfile(resultsFolder, varName + ".mat");
% Save dataset as struct inside MAT file
S.(varName) = result;
save(savePath, '-struct', 'S');
% Update index
nextIdx = nextIdx + 1;
save(indexFile, "nextIdx");
end
function dataSource = makeDataSourceStruct(folderPath)
% Split by file separators (handles / or \)
parts = regexp(folderPath, '[\\/]', 'split');
% Remove empty parts caused by leading slashes
parts = parts(~cellfun('isempty', parts));
% Extract sequence, date, and run number
% Now the indices are correct:
% parts = {'DyLabNAS', 'Data', 'StructuralPhaseTransition', '2025', '08', '13', '0062'}
sequence = parts{3}; % "StructuralPhaseTransition"
year = parts{4}; % "2025"
month = parts{5}; % "08"
day = parts{6}; % "13"
runStr = parts{7}; % "0062"
% Build date string
dateStr = sprintf('%s/%s/%s', year, month, day);
% Convert run string to number
runNum = str2double(runStr);
% Construct struct inside a cell array
dataSource = {
struct('sequence', sequence, ...
'date', dateStr, ...
'runs', runNum)
};
end