From cd96f83fb6a538d96821f1f6314a3290fb091de0 Mon Sep 17 00:00:00 2001 From: Karthik Chandrashekara Date: Sun, 24 Aug 2025 20:41:17 +0200 Subject: [PATCH] Latest fully functional script - added complete PCA functionality - computes the PCA results and packs in to results struct, included plotting functionality of the PCA results in addition. --- Data-Analyzer/+Analyzer/conductPCA.m | 117 +++--------------- Data-Analyzer/+Analyzer/performAnalysis.m | 16 ++- Data-Analyzer/+Helper/collectODImages.m | 46 ++++++- Data-Analyzer/+Helper/estimateDatasetMemory.m | 6 +- Data-Analyzer/+Plotter/plotPDF.m | 14 +-- .../BECToDroplets/plotAnalysisResults.m | 13 +- .../+Scripts/BECToDroplets/runFullAnalysis.m | 40 +++--- .../plotAnalysisResults.m | 4 +- .../BECToStripes/plotAnalysisResults.m | 4 +- .../plotAnalysisResults.m | 4 +- 10 files changed, 115 insertions(+), 149 deletions(-) diff --git a/Data-Analyzer/+Analyzer/conductPCA.m b/Data-Analyzer/+Analyzer/conductPCA.m index 1b2516e..a8f1a71 100644 --- a/Data-Analyzer/+Analyzer/conductPCA.m +++ b/Data-Analyzer/+Analyzer/conductPCA.m @@ -1,111 +1,26 @@ -function conductPCA(od_imgs, scan_reference_values, scan_parameter_values, doPlot, doSave, saveDir) - -%% Performs PCA on optical density images, visualizes and optionally saves results. +function results = conductPCA(od_imgs) +%% computePCAfromImages: Performs PCA on optical density images and returns results in a struct % % Inputs: -% od_imgs - cell array of OD images -% scan_reference_values - array of unique control parameter values -% scan_parameter_values - array mapping each image to a control parameter -% doPlot - logical, true to plot figures -% doSave - logical, true to save figures -% saveDir - directory to save figures if doSave is true +% od_imgs - cell array of OD images % -% Requires: -% +Calculator/computeCumulants.m +% Outputs: +% pcaResults - struct containing PCA outputs: +% .coeff - PCA coefficients (principal components) +% .score - PCA scores for each image +% .explained - variance explained by each PC +% .Nx, .Ny - dimensions of individual images - if nargin < 4, doPlot = true; end - if nargin < 5, doSave = false; end - if nargin < 6, saveDir = pwd; end - - %% PCA computation allImgs3D = cat(3, od_imgs{:}); [Nx, Ny] = size(allImgs3D(:,:,1)); Xall = reshape(allImgs3D, [], numel(od_imgs))'; [coeff, score, ~, ~, explained] = pca(Xall); - - figCount = 1; - - %% --- Figure 1: PC1 Image --- - if doPlot - pc1_vector = coeff(:,1); - pc1_image = reshape(pc1_vector, Nx, Ny); - figure(figCount); clf; set(gcf, 'Color', 'w', 'Position', [100 100 950 750]); - imagesc(pc1_image); axis image off; colormap(Colormaps.coolwarm()); colorbar; - title(sprintf('First Principal Component (PC1) Image - Explains %.2f%% Variance', explained(1))); - if doSave, saveas(gcf, fullfile(saveDir, 'PC1_Image.png')); end - figCount = figCount + 1; - end - - %% --- Figure 2: PC1 Scores Scatter --- - if doPlot - numGroups = numel(scan_reference_values); - colors = lines(numGroups); - figure(figCount); clf; set(gcf, 'Color', 'w', 'Position', [100 100 950 750]); hold on; - for g = 1:numGroups - idx = scan_parameter_values == scan_reference_values(g); - scatter(repmat(scan_reference_values(g), sum(idx),1), score(idx,1), 36, colors(g,:), 'filled'); - end - xlabel('Control Parameter'); ylabel('PC1 Score'); - title('Evolution of PC1 Scores'); grid on; - if doSave, saveas(gcf, fullfile(saveDir, 'PC1_Scatter.png')); end - figCount = figCount + 1; - end - - %% --- Figure 3: PC1 Distributions --- - if doPlot - figure(figCount); clf; set(gcf, 'Color', 'w', 'Position', [100 100 950 750]); - hold on; - for g = 1:numGroups - idx = scan_parameter_values == scan_reference_values(g); - data = score(idx,1); - histogram(data, 'Normalization', 'pdf', 'FaceColor', colors(g,:), 'FaceAlpha', 0.3); - [f, xi] = ksdensity(data); - plot(xi, f, 'Color', colors(g,:), 'LineWidth', 2); - end - xlabel('PC1 Score'); ylabel('Probability Density'); - title('PC1 Score Distributions by Group'); - legend(arrayfun(@num2str, scan_reference_values, 'UniformOutput', false), 'Location', 'Best'); - grid on; - if doSave, saveas(gcf, fullfile(saveDir, 'PC1_Distributions.png')); end - figCount = figCount + 1; - end - - %% --- Figure 4: Boxplot of PC1 Scores --- - if doPlot - figure(figCount); clf; set(gcf, 'Color', 'w', 'Position', [100 100 950 750]); - boxplot(score(:,1), scan_parameter_values); - xlabel('Control Parameter'); ylabel('PC1 Score'); - title('PC1 Score Boxplots by Group'); grid on; - if doSave, saveas(gcf, fullfile(saveDir, 'PC1_Boxplot.png')); end - figCount = figCount + 1; - end - - %% --- Figure 5: Mean ± SEM of PC1 Scores --- - if doPlot - meanScores = arrayfun(@(g) mean(score(scan_parameter_values == g,1)), scan_reference_values); - semScores = arrayfun(@(g) std(score(scan_parameter_values == g,1))/sqrt(sum(scan_parameter_values == g)), scan_reference_values); - figure(figCount); clf; set(gcf, 'Color', 'w', 'Position', [100 100 950 750]); - errorbar(scan_reference_values, meanScores, semScores, '-o', 'LineWidth', 2); - xlabel('Control Parameter'); ylabel('Mean PC1 Score ± SEM'); - title('Mean ± SEM of PC1 Scores'); grid on; - if doSave, saveas(gcf, fullfile(saveDir, 'PC1_MeanSEM.png')); end - figCount = figCount + 1; - end - - %% --- Figure 6: Binder Cumulant --- - if doPlot - binderVals = arrayfun(@(g) ... - Calculator.computeCumulants(score(scan_parameter_values == g,1)), ... - scan_reference_values); - figure(figCount); clf; set(gcf, 'Color', 'w', 'Position', [100 100 950 750]); - plot(scan_reference_values, binderVals, '-o', 'LineWidth', 2); - xlabel('Control Parameter'); ylabel('Binder Cumulant (PC1)'); - title('Binder Cumulant of PC1 Scores'); grid on; - if doSave, saveas(gcf, fullfile(saveDir, 'PC1_BinderCumulant.png')); end - end - - %% --- ANOVA Test --- - p = anova1(score(:,1), arrayfun(@num2str, scan_parameter_values, 'UniformOutput', false), 'off'); - fprintf('[INFO] ANOVA p-value for PC1 score differences between groups: %.4e\n', p); + results = struct( ... + 'coeff', coeff, ... + 'score', score, ... + 'explained', explained, ... + 'Nx', Nx, ... + 'Ny', Ny ... + ); end diff --git a/Data-Analyzer/+Analyzer/performAnalysis.m b/Data-Analyzer/+Analyzer/performAnalysis.m index f499179..a73e1cb 100644 --- a/Data-Analyzer/+Analyzer/performAnalysis.m +++ b/Data-Analyzer/+Analyzer/performAnalysis.m @@ -52,21 +52,26 @@ function [results, scan_parameter_values] = performAnalysis(options) spectral_analysis_results = Analyzer.conductSpectralAnalysis(od_imgs, scan_parameter_values, options); - N_shots = length(od_imgs); + N_shots = length(od_imgs); % Extract angular correlations - full_g2_results = Analyzer.extractAutocorrelation(... + full_g2_results = Analyzer.extractAutocorrelation(... spectral_analysis_results.theta_vals, ... spectral_analysis_results.angular_spectral_distribution, ... scan_parameter_values, N_shots, options.N_angular_bins); - custom_g_results = Analyzer.extractCustomCorrelation(... + custom_g_results = Analyzer.extractCustomCorrelation(... spectral_analysis_results.angular_spectral_distribution, ... scan_parameter_values, N_shots, options.N_angular_bins); fprintf('\n[INFO] Spectral analysis complete!\n'); - - % PCA + + % Conduct PCA + fprintf('\n[INFO] Initiating Principal Component Analysis...\n'); + + pca_results = Analyzer.conductPCA(od_imgs); + + fprintf('\n[INFO] Principal Component Analysis complete!\n'); % Lattice Reconstruction @@ -75,4 +80,5 @@ function [results, scan_parameter_values] = performAnalysis(options) results.spectral_analysis_results = spectral_analysis_results; results.full_g2_results = full_g2_results; results.custom_g_results = custom_g_results; + results.pca_results = pca_results; end \ No newline at end of file diff --git a/Data-Analyzer/+Helper/collectODImages.m b/Data-Analyzer/+Helper/collectODImages.m index baed8ca..2af5f3e 100644 --- a/Data-Analyzer/+Helper/collectODImages.m +++ b/Data-Analyzer/+Helper/collectODImages.m @@ -24,9 +24,15 @@ function [od_imgs, scan_parameter_values, file_list] = collectODImages(options) % --- Early exit if processed data already exist AND options match --- reuseVarsExist = evalin('base', ... 'exist(''od_imgs'',''var'') && exist(''scan_parameter_values'',''var'') && exist(''file_list'',''var'') && exist(''prior_options'',''var'')'); + + if ~isfield(options, 'SAVE_TO_WORKSPACE') + % ===== Estimate dataset memory and get per-run estimates ===== + dataSource = makeDataSource(options.folderPath); + [options.SAVE_TO_WORKSPACE, ~] = Helper.estimateDatasetMemory(dataSource, options); + end - % --- Respect SAVE_TO_WORKSPACE flag from batchAnalyze --- - if isfield(options, 'SAVE_TO_WORKSPACE') && ~options.SAVE_TO_WORKSPACE + % --- Respect SAVE_TO_WORKSPACE flag --- + if ~options.SAVE_TO_WORKSPACE % Force reprocessing: skip all workspace reuse reuseVarsExist = false; end @@ -62,7 +68,7 @@ function [od_imgs, scan_parameter_values, file_list] = collectODImages(options) evalin('base', 'exist(''prior_options'',''var'')'); % --- Respect SAVE_TO_WORKSPACE flag --- - if isfield(options, 'SAVE_TO_WORKSPACE') && ~options.SAVE_TO_WORKSPACE + if ~options.SAVE_TO_WORKSPACE fullDataExists = false; % force recompute even if workspace vars exist end @@ -208,13 +214,13 @@ function [od_imgs, scan_parameter_values, file_list] = collectODImages(options) file_list = raw_file_list; end - % --- Save processed dataset and options for reuse --- + % --- Save processed dataset and options to workspace for reuse --- assignin('base', 'od_imgs', od_imgs); assignin('base', 'scan_parameter_values', scan_parameter_values); assignin('base', 'file_list', file_list); assignin('base', 'prior_options', options); - % --- Save OD images as figures if requested --- + % --- Save OD images as figures to disk if requested --- if ~options.skipSaveOD saveODFigures(od_imgs, options.saveDirectory); end @@ -269,3 +275,33 @@ function saveODFigures(od_imgs, saveDirectory) end fprintf('[INFO] OD figures saved successfully.\n'); end + +function dataSources = makeDataSource(folderPath) + % Split by file separators (handles / or \) + parts = regexp(folderPath, '[\\/]', 'split'); + + % Remove empty parts caused by leading slashes + parts = parts(~cellfun('isempty', parts)); + + % Extract sequence, date, and run number + % Now the indices are correct: + % parts = {'DyLabNAS', 'Data', 'StructuralPhaseTransition', '2025', '08', '13', '0062'} + sequence = parts{3}; % "StructuralPhaseTransition" + year = parts{4}; % "2025" + month = parts{5}; % "08" + day = parts{6}; % "13" + runStr = parts{7}; % "0062" + + % Build date string + dateStr = sprintf('%s/%s/%s', year, month, day); + + % Convert run string to number + runNum = str2double(runStr); + + % Construct struct inside a cell array + dataSources = { + struct('sequence', sequence, ... + 'date', dateStr, ... + 'runs', runNum) + }; +end diff --git a/Data-Analyzer/+Helper/estimateDatasetMemory.m b/Data-Analyzer/+Helper/estimateDatasetMemory.m index 9188d2f..37b9066 100644 --- a/Data-Analyzer/+Helper/estimateDatasetMemory.m +++ b/Data-Analyzer/+Helper/estimateDatasetMemory.m @@ -10,7 +10,7 @@ function [SAVE_TO_WORKSPACE, runMemoryGB] = estimateDatasetMemory(dataSources, o [~, sys] = memory; availableRAM = sys.PhysicalMemory.Available; else - availableRAM = 16e9; % fallback: 16 GB if not Windows + availableRAM = 8e9; % fallback: 8 GB if not Windows end SAVE_TO_WORKSPACE = true; % default, may change per run @@ -58,10 +58,10 @@ function [SAVE_TO_WORKSPACE, runMemoryGB] = estimateDatasetMemory(dataSources, o % Decide workspace flag per run by comparing with 50% of available RAM if runBytes > 0.75 * availableRAM SAVE_TO_WORKSPACE = false; - fprintf('[INFO] Estimated size on memory of Run %s/%s too large (%.2f GB). Not saving to workspace.\n', ... + fprintf('\n[INFO] Estimated size on memory of Run %s/%s too large (%.2f GB). Will save partially to workspace if not done so already.\n', ... ds.sequence, runID, runBytes/1e9); else - fprintf('[INFO] Estimated size on memory of Run %s/%s = %.2f GB. Will save to workspace.\n', ... + fprintf('\n[INFO] Estimated size on memory of Run %s/%s = %.2f GB. Will save completely to workspace if not done so already.\n', ... ds.sequence, runID, runBytes/1e9); end end diff --git a/Data-Analyzer/+Plotter/plotPDF.m b/Data-Analyzer/+Plotter/plotPDF.m index d960511..7616177 100644 --- a/Data-Analyzer/+Plotter/plotPDF.m +++ b/Data-Analyzer/+Plotter/plotPDF.m @@ -4,8 +4,8 @@ function plotPDF(dataCell, referenceValues, varargin) % Usage: % Plotter.plotPDF(dataCell, referenceValues, ... % 'PlotType', 'histogram', ... % 'histogram' (default) or 'kde' -% 'NumBins', 50, ... % number of histogram bins -% 'NormalizeHist', true, ... % normalize hist counts to probability density +% 'NumberOfBins', 50, ... % number of histogram bins +% 'NormalizeHistogram', true, ... % normalize hist counts to probability density % 'Title', 'My Title', ... % 'XLabel', 'Scan Parameter', ... % 'YLabel', 'Data Values', ... @@ -35,8 +35,8 @@ function plotPDF(dataCell, referenceValues, varargin) addParameter(p, 'XLim', [], @(x) isempty(x) || numel(x)==2); addParameter(p, 'Colormap', @jet); addParameter(p, 'PlotType', 'histogram', @(x) any(validatestring(x,{'kde','histogram'}))); - addParameter(p, 'NumBins', 50, @isscalar); - addParameter(p, 'NormalizeHist', true, @islogical); + addParameter(p, 'NumberOfBins', 50, @isscalar); + addParameter(p, 'NormalizeHistogram', true, @islogical); parse(p, varargin{:}); opts = p.Results; @@ -56,7 +56,7 @@ function plotPDF(dataCell, referenceValues, varargin) y_grid = linspace(y_min, y_max, opts.NumPoints); pdf_matrix = zeros(numel(y_grid), N_params); else % Histogram - edges = linspace(y_min, y_max, opts.NumBins+1); + edges = linspace(y_min, y_max, opts.NumberOfBins+1); binCenters = (edges(1:end-1) + edges(2:end)) / 2; pdf_matrix = zeros(numel(binCenters), N_params); end @@ -72,7 +72,7 @@ function plotPDF(dataCell, referenceValues, varargin) pdf_matrix(:, i) = f; else % Histogram counts = histcounts(data, edges); - if opts.NormalizeHist + if opts.NormalizeHistogram binWidth = edges(2) - edges(1); counts = counts / (sum(counts) * binWidth); % probability density end @@ -99,7 +99,7 @@ function plotPDF(dataCell, referenceValues, varargin) if strcmpi(opts.PlotType,'kde') ylabel(c, 'PDF', 'Interpreter', 'latex', 'FontSize', opts.FontSize); else - if opts.NormalizeHist + if opts.NormalizeHistogram ylabel(c, 'Probability Density', 'Interpreter', 'latex', 'FontSize', opts.FontSize); else ylabel(c, 'Counts', 'Interpreter', 'latex', 'FontSize', opts.FontSize); diff --git a/Data-Analyzer/+Scripts/BECToDroplets/plotAnalysisResults.m b/Data-Analyzer/+Scripts/BECToDroplets/plotAnalysisResults.m index c046569..feb7daf 100644 --- a/Data-Analyzer/+Scripts/BECToDroplets/plotAnalysisResults.m +++ b/Data-Analyzer/+Scripts/BECToDroplets/plotAnalysisResults.m @@ -89,8 +89,8 @@ Plotter.plotPDF(compiled_results.custom_g_results.max_g2_all_per_scan_parameter_ 'SkipSaveFigures', options.skipSaveFigures, ... 'SaveFileName', 'PDF_MaxG2AcrossTransition.fig', ... 'SaveDirectory', figSaveDir, ... - 'NumBins', 20, ... - 'NormalizeHist', true, ... + 'NumberOfBins', 20, ... + 'NormalizeHistogram', true, ... 'DataRange', [0 1.5], ... 'Colormap', @Colormaps.coolwarm, ... 'XLim', [min(options.scan_reference_values) max(options.scan_reference_values)]); @@ -108,6 +108,15 @@ Plotter.plotCumulants(options.scan_reference_values, ... 'SkipSaveFigures', options.skipSaveFigures, ... 'SaveFileName', 'CumulantOfPeakOffsetAngularCorrelation.fig', ... 'SaveDirectory', figSaveDir); + +%% ------------------ 5. PCA ------------------ +Plotter.plotPCAResults(compiled_results.pca_results, scan_parameter_values, options.scan_reference_values, ... + 'FigNumRange', [7,8,9,10,11,12], ... + 'FontName', options.font, ... + 'SkipSaveFigures', options.skipSaveFigures, ... + 'SaveDirectory', figSaveDir); + +%% %{ %% ------------------ 6. Average of Spectra Plots ------------------ diff --git a/Data-Analyzer/+Scripts/BECToDroplets/runFullAnalysis.m b/Data-Analyzer/+Scripts/BECToDroplets/runFullAnalysis.m index 2d10bc0..0e40e90 100644 --- a/Data-Analyzer/+Scripts/BECToDroplets/runFullAnalysis.m +++ b/Data-Analyzer/+Scripts/BECToDroplets/runFullAnalysis.m @@ -10,29 +10,29 @@ dataSources = { options = struct(); % File / paths -options.baseDataFolder = '//DyLabNAS/Data'; -options.measurementName = 'BECToDroplets'; -scriptFullPath = mfilename('fullpath'); -options.saveDirectory = fileparts(scriptFullPath); +options.baseDataFolder = '//DyLabNAS/Data'; +options.measurementName = 'BECToDroplets'; +scriptFullPath = mfilename('fullpath'); +options.saveDirectory = fileparts(scriptFullPath); % Camera / imaging -options.cam = 5; -options.angle = 0; -options.center = [1420, 2050]; -options.span = [200, 200]; -options.fraction = [0.1, 0.1]; -options.pixel_size = 5.86e-6; % in meters -options.magnification = 24.6; -options.removeFringes = false; -options.ImagingMode = 'HighIntensity'; -options.PulseDuration = 5e-6; % in s +options.cam = 5; +options.angle = 0; +options.center = [1420, 2050]; +options.span = [200, 200]; +options.fraction = [0.1, 0.1]; +options.pixel_size = 5.86e-6; % in meters +options.magnification = 24.6; +options.removeFringes = false; +options.ImagingMode = 'HighIntensity'; +options.PulseDuration = 5e-6; % in s % Fourier analysis settings -options.theta_min = deg2rad(0); -options.theta_max = deg2rad(180); -options.N_radial_bins = 500; -options.Radial_Sigma = 2; -options.Radial_WindowSize = 5; % odd number +options.theta_min = deg2rad(0); +options.theta_max = deg2rad(180); +options.N_radial_bins = 500; +options.Radial_Sigma = 2; +options.Radial_WindowSize = 5; % odd number options.k_min = 1.2771; % μm⁻¹ options.k_max = 2.5541; % μm⁻¹ @@ -43,7 +43,7 @@ options.Angular_WindowSize = 5; options.zoom_size = 50; % Scan parameter -options.scan_parameter = 'rot_mag_field'; +options.scan_parameter = 'rot_mag_field'; switch options.measurementName case 'BECToDroplets' diff --git a/Data-Analyzer/+Scripts/BECToDropletsToStripes/plotAnalysisResults.m b/Data-Analyzer/+Scripts/BECToDropletsToStripes/plotAnalysisResults.m index e8ba6c7..34b93e8 100644 --- a/Data-Analyzer/+Scripts/BECToDropletsToStripes/plotAnalysisResults.m +++ b/Data-Analyzer/+Scripts/BECToDropletsToStripes/plotAnalysisResults.m @@ -89,8 +89,8 @@ Plotter.plotPDF(compiled_results.custom_g_results.max_g2_all_per_scan_parameter_ 'SkipSaveFigures', options.skipSaveFigures, ... 'SaveFileName', 'PDF_MaxG2AcrossTransition.fig', ... 'SaveDirectory', figSaveDir, ... - 'NumBins', 20, ... - 'NormalizeHist', true, ... + 'NumberOfBins', 20, ... + 'NormalizeHistogram', true, ... 'DataRange', [0 1.5], ... 'Colormap', @Colormaps.coolwarm, ... 'XLim', [min(options.scan_reference_values) max(options.scan_reference_values)]); diff --git a/Data-Analyzer/+Scripts/BECToStripes/plotAnalysisResults.m b/Data-Analyzer/+Scripts/BECToStripes/plotAnalysisResults.m index c046569..ab49a79 100644 --- a/Data-Analyzer/+Scripts/BECToStripes/plotAnalysisResults.m +++ b/Data-Analyzer/+Scripts/BECToStripes/plotAnalysisResults.m @@ -89,8 +89,8 @@ Plotter.plotPDF(compiled_results.custom_g_results.max_g2_all_per_scan_parameter_ 'SkipSaveFigures', options.skipSaveFigures, ... 'SaveFileName', 'PDF_MaxG2AcrossTransition.fig', ... 'SaveDirectory', figSaveDir, ... - 'NumBins', 20, ... - 'NormalizeHist', true, ... + 'NumberOfBins', 20, ... + 'NormalizeHistogram', true, ... 'DataRange', [0 1.5], ... 'Colormap', @Colormaps.coolwarm, ... 'XLim', [min(options.scan_reference_values) max(options.scan_reference_values)]); diff --git a/Data-Analyzer/+Scripts/BECToStripesToDroplets/plotAnalysisResults.m b/Data-Analyzer/+Scripts/BECToStripesToDroplets/plotAnalysisResults.m index c046569..ab49a79 100644 --- a/Data-Analyzer/+Scripts/BECToStripesToDroplets/plotAnalysisResults.m +++ b/Data-Analyzer/+Scripts/BECToStripesToDroplets/plotAnalysisResults.m @@ -89,8 +89,8 @@ Plotter.plotPDF(compiled_results.custom_g_results.max_g2_all_per_scan_parameter_ 'SkipSaveFigures', options.skipSaveFigures, ... 'SaveFileName', 'PDF_MaxG2AcrossTransition.fig', ... 'SaveDirectory', figSaveDir, ... - 'NumBins', 20, ... - 'NormalizeHist', true, ... + 'NumberOfBins', 20, ... + 'NormalizeHistogram', true, ... 'DataRange', [0 1.5], ... 'Colormap', @Colormaps.coolwarm, ... 'XLim', [min(options.scan_reference_values) max(options.scan_reference_values)]);