Calculations/Time Series Analyzer/Analyzer.py

334 lines
17 KiB
Python

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from scipy.signal import find_peaks, resample, detrend
import csv
"""
NOTES
When you compute the Fourier transform of a signal, you obtain a set of complex-valued coefficients corresponding to different frequency components present in the signal. These coefficients represent the amplitude and phase of sinusoidal components at specific frequencies.
The frequency range covered by the Fourier transform output is divided into discrete frequency bins, each representing a specific frequency component. The width of these bins depends on the sampling rate and the length of the input signal. In a typical implementation, the frequency bins are evenly spaced.
In practical terms, the power spectrum bins correspond to the frequency components at which the power spectral density (PSD) or magnitude squared of the Fourier coefficients are evaluated. These bins are used to represent the distribution of signal power across different frequency components, providing insights into the frequency content of the signal.
The bin width, also called the resolution bandwidth, is simply sampling rate / Total number of samples = (1/dt)/N
N can be re-written in terms of t, dt. Putting this in to the expression for Sxx = 2 * |fft|^2/(Resolution Bandwidth * Noise Power Bandwidth) We get the expression used here. Note that the fft computed above is scaled by N which results eventually in the factor dt^2/T.
The noise power bandwidth is typically 1 Hz if no windowing/tapering function is used.
Compute the broadband noise level in Vrms2/Hz by summing all the power spectrum bins, excluding any peaks and the DC component, and dividing the sum by the equivalent noise bandwidth of the window
The equivalent noise bandwidth (ENBW) of a window in the context of a power spectrum refers to a measure of the effective bandwidth of the window function applied to the signal before taking the Fourier transform.
When you compute the power spectrum of a signal using a windowing function (e.g., Hamming window, Hann window, etc.), the window modifies the original signal by tapering its edges. This tapering reduces the spectral leakage and improves frequency resolution but also introduces a smoothing effect, which can affect the estimation of the signal's power at different frequencies.
The equivalent noise bandwidth provides a way to quantify the effective bandwidth of the window function in terms of its impact on noise power. It represents the width of a rectangular filter that would have the same noise power as the windowed signal.
In practical terms, when calculating the power spectrum of a signal using a window, the ENBW is used to adjust the power spectrum to account for the smoothing effect of the window. Dividing the sum of the power spectrum bins by the ENBW yields an estimate of the noise power per unit frequency bandwidth.
ENBW is often used in the context of noise measurements or signal processing applications where accurate estimation of noise power is important. It helps ensure that the power spectrum accurately reflects the true power distribution of the signal, accounting for the effects of windowing.
The noise floor refers to the minimum level of signal that can be reliably distinguished from the background noise. It represents the lowest amplitude of a signal that can be detected or measured with reasonable accuracy.
The noise floor is often defined as the RMS (Root Mean Square) value of the background noise in a given frequency band.
To compute the noise floor value from the power spectral density (PSD) values, you typically need to analyze the portion of the PSD that corresponds to the background noise.
"""
def compute_autocorrelation(data):
print("Calculating autocorrelation...")
yunbiased = data-np.mean(data)
ynorm = np.sum(yunbiased**2)
acor = np.correlate(yunbiased, yunbiased, "same")/ynorm
return acor
def pre_process_data(data, new_sampling_rate):
# Resample the time series
# Define the new sampling rate and calculate the new time values
n_points = int((len(data[:, 0]) * (data[:, 0][1] - data[:, 0][0])) * new_sampling_rate)
t = np.linspace(data[:, 0][0], data[:, 0][-1], n_points)
x_array = resample(data[:, 1], n_points)
x = detrend(x_array - x_array.mean())
return np.column_stack((t, x))
def smoothen_data(data, window_size):
# Smooth data by doing a moving average
return np.convolve(data, np.ones(window_size, dtype=int)/window_size, mode='valid')
def compute_psd(data, new_sampling_rate, window_size = 21):
"""
A power spectral density (PSD) takes the amplitude of the FFT, multiplies it by its complex conjugate and normalizes it to the frequency bin width.
"""
processed_data = pre_process_data(data, new_sampling_rate)
t, x = processed_data[:, 0], processed_data[:, 1]
dt = t[1] - t[0] # Define the sampling interval
N = x.shape[0] # Define the total number of data points
T = N * dt # Define the total duration of the data
# Calculate fft
print("Calculating power spectrum...")
fft_ts = np.fft.fft(x) # Compute Fourier transform of x
Sxx = 2 * (dt ** 2 / T) * (fft_ts * fft_ts.conj()) # Compute spectrum
Sxx = Sxx[:int(len(x) / 2)] # Ignore negative frequencies, we have accounted for this by the scaling factor of 2 in the previous step
return processed_data, smoothen_data(Sxx.real, window_size)
def compute_RIN(time, voltages, Sxx_smooth):
dt = time[1] - time[0] # Define the sampling interval
N = voltages.shape[0] # Define the total number of data points
T = N * dt # Define the total duration of the data
df = 1 / T.max()
# Compute the average power
average_P = np.mean(np.squared(voltages))
# Calculate the RIN
RIN_Sxx_smooth = 10 * np.log10(Sxx_smooth / (average_P * df))
return RIN_Sxx_smooth
def find_noise_peaks(psd, faxis, freq_range, threshold):
"""
Compute the peak power in the specified frequency range.
Parameters:
psd_values: array-like
Power spectral density values.
faxis: array-like
Frequencies corresponding to the PSD values.
freq_range: tuple
Tuple containing the start and end frequencies of the range of interest.
threshold: scalar
Threshold for peak heights
Returns:
float: Peak power in the specified frequency range.
"""
start_freq, end_freq = freq_range
idx_start = np.argmax(faxis >= start_freq)
idx_end = np.argmax(faxis >= end_freq)
sliced_psd = psd[idx_start:idx_end]
sliced_faxis = faxis[idx_start:idx_end]
peak_indices, _ = find_peaks(sliced_psd, height=threshold)
peak_powers = 10 * np.log10(sliced_psd[peak_indices])
peak_frequencies = np.around(sliced_faxis[peak_indices], 2)
return peak_powers, peak_frequencies
def compute_noise_level(psd, resolution_bandwidth, exclude_peaks=False, faxis=None, freq_range=None, threshold=None):
"""
Compute the noise level from a power spectral density (PSD).
Parameters:
psd: array-like
One-sided power spectral density.
resolution_bandwidth: float
Bin width
Returns:
float: Noise level (Vrms^2).
"""
noise_level = None
# Exclude peaks from the sum
if exclude_peaks and threshold is not None:
threshold = 10**(threshold/10)
if freq_range is None:
peak_indices, _ = find_peaks(psd, height=threshold)
noise_level = resolution_bandwidth * np.sum([psd[i] for i in range(len(psd)) if i not in peak_indices])
else:
start_freq, end_freq = freq_range
idx_start = np.argmax(faxis >= start_freq)
idx_end = np.argmax(faxis >= end_freq)
sliced_psd = psd[idx_start:idx_end]
peak_indices, _ = find_peaks(sliced_psd, height=threshold)
noise_level = resolution_bandwidth * np.sum([sliced_psd[i] for i in range(len(sliced_psd)) if i not in peak_indices])
else:
if freq_range is None:
noise_level = resolution_bandwidth * np.sum([psd[i] for i in range(len(psd))])
else:
start_freq, end_freq = freq_range
idx_start = np.argmax(faxis >= start_freq)
idx_end = np.argmax(faxis >= end_freq)
sliced_psd = psd[idx_start:idx_end]
noise_level = resolution_bandwidth * np.sum([sliced_psd[i] for i in range(len(sliced_psd))])
return noise_level
def extract_data(filepath):
# Open the CSV file
with open(filepath, newline='') as csvfile:
# Skip the first line (header)
next(csvfile)
# Read the CSV file using csv.reader
reader = csv.reader(csvfile)
# Read the headers from the second line
next_header = next(reader)
string_number = next_header[-1]
try:
time_step = int(string_number)
except ValueError:
try:
time_step = float(string_number)
except ValueError:
print("The string does not represent a valid number.")
# Initialize lists to store the first and second values
first_column = []
second_column = []
# Iterate over each row in the CSV file
for row in reader:
# Extract the first and second values from the row and convert to float
first_value = float(row[0])
second_value = float(row[1])
# Append the values to their respective lists
first_column.append(first_value)
second_column.append(second_value)
# Convert the lists into numpy arrays
time_array = np.arange(0, len(first_column)*time_step, time_step)
voltage_array = np.array(second_column)
# Stack the arrays horizontally to form a single 2D array
data_array = np.column_stack((time_array, voltage_array))
return data_array
def plot_analysis(data, data_bkg, Sxx, Sxx_bkg, data_str, bkg_str, peak_find_threshold, window_size = 21, plot_only_psd = True):
time, voltages = data[:, 0], data[:, 1]
time_bkg, voltages_bkg = data_bkg[:, 0], data_bkg[:, 1]
dt = time[1] - time[0] # Define the sampling interval
N = voltages.shape[0] # Define the total number of data points
T = N * dt # Define the total duration of the data
df = 1 / T.max()
fNQ = 1 / dt / 2 # Determine Nyquist frequency
faxis = smoothen_data(np.linspace(0,fNQ,N//2), window_size) # Construct frequency axis
""" Noise levels in units of Vrms^2/Hz"""
# resolution_bandwidth = (1/dt)/N
# broadband_noise_level = compute_noise_level(Sxx, resolution_bandwidth) # Integrates across PSD from DC to Nyquist frequency, gives result in in units of Vrms^2/Hz
# noise_floor = np.mean(Sxx_bkg)
freq_range = (50, max(faxis))
threshold = 10**(peak_find_threshold/10)
peak_powers, peak_frequencies = find_noise_peaks(Sxx, faxis, freq_range, threshold)
if plot_only_psd:
plt.figure(figsize=(12, 8))
# Plot Power Spectrum in dB
plt.semilogx(faxis, 10 * np.log10(Sxx_bkg), color='orange', linewidth=0.5, label = bkg_str)
plt.semilogx(faxis, 10 * np.log10(Sxx), color='green', linewidth=2, label = data_str)
# plt.axhline(y=10 * np.log10(broadband_noise_level), color='red', linewidth=2, linestyle='--', label=f'Broadband cumulative noise level: {10 * np.log10(broadband_noise_level):.1f} dB')
# plt.axhline(y=10 * np.log10(noise_floor), color='blue', linewidth=2, linestyle='--', label=f'Broadband noise floor: {10 * np.log10(noise_floor):.1f} dB')
plt.plot(peak_frequencies, peak_powers, 'o', markerfacecolor='none', markeredgecolor='r', markersize=10) # Plot power against frequency as hollow circles
for freq, power in zip(peak_frequencies, peak_powers):
plt.text(freq, power, str(freq)+' Hz', verticalalignment='bottom', horizontalalignment='right') # Add text next to each circle indicating the frequency
plt.grid(True, which="both", linestyle='-', linewidth=0.5, color='gray') # Thin lines for non-decade grid
plt.grid(True, which="both", linestyle=':', linewidth=1, color='gray', axis='x') # Thick lines for decade grid
# Calculate the x-axis values for multiples of 10
x_multiples_of_10 = [10**i for i in range(int(np.log10(min(faxis[faxis > 0]))), int(np.log10(max(faxis[faxis > 0]))) + 1)]
# Add thick lines for multiples of 10
for val in x_multiples_of_10:
plt.axvline(x=val, color='black', linestyle='-', linewidth=2) # Thick lines for multiples of 10
f_sig_idx = np.argmax(Sxx)
# SNR_f = 10 * np.log10(Sxx[f_sig_idx] / np.sum(np.delete(Sxx, f_sig_idx)))
# SNR_f = 10 * np.log10(Sxx[f_sig_idx] / noise_floor)
plt.xlim([min(faxis), max(faxis)])
# plt.ylim([-100, 10])
plt.legend(loc = 3, fontsize=12)
plt.xlabel('Frequency [Hz]', fontsize=14)
plt.ylabel('Power Spectral Density [dBV/Hz]', fontsize=14)
# plt.title('SNR= %.2f dB' % (SNR_f), fontsize=14)
# Adjust layout
plt.tight_layout()
# Show plot
plt.show()
else:
# Create subplots
plt.figure(figsize=(12, 8))
gs = gridspec.GridSpec(2, 3, width_ratios=[1, 1, 1], height_ratios=[1, 1])
# Plot 1: Time vs Voltage
axs1 = plt.subplot(gs[0, 0:])
axs1.plot(time_bkg, voltages_bkg, marker='o', color='orange', linewidth=0.5, ms=1, label = bkg_str)
axs1.plot(time, voltages, marker='o', color='green', linewidth=0.5, ms=1, label = data_str)
axs1.set_ylim([-0.5, 0.5])
axs1.set_xlabel('Time (s)', fontsize=14)
axs1.set_ylabel('Voltage (V)', fontsize=14)
axs1.legend(loc = 1, fontsize=12)
axs1.autoscale(tight=True)
axs1.grid(True)
# Plot 2: Power Spectrum in dB
axs2 = plt.subplot(gs[1, 0:])
axs2.semilogx(faxis, 10 * np.log10(Sxx_bkg), color='orange', linewidth=0.5, label = bkg_str)
axs2.semilogx(faxis, 10 * np.log10(Sxx), color='green', linewidth=2, label = data_str)
# axs2.axhline(y=10 * np.log10(broadband_noise_level), color='red', linewidth=2, linestyle='--', label=f'Broadband cumulative noise level: {10 * np.log10(broadband_noise_level):.1f} dB')
# axs2.axhline(y=10 * np.log10(noise_floor), color='blue', linewidth=2, linestyle='--', label=f'Broadband noise floor: {10 * np.log10(noise_floor):.1f} dB')
axs2.plot(peak_frequencies, peak_powers, 'o', markerfacecolor='none', markeredgecolor='r', markersize=10) # Plot power against frequency as hollow circles
for freq, power in zip(peak_frequencies, peak_powers):
axs2.text(freq, power, str(freq)+' Hz', verticalalignment='bottom', horizontalalignment='right') # Add text next to each circle indicating the frequency
axs2.grid(True, which="both", linestyle='-', linewidth=0.5, color='gray') # Thin lines for non-decade grid
axs2.grid(True, which="both", linestyle=':', linewidth=1, color='gray', axis='x') # Thick lines for decade grid
# Calculate the x-axis values for multiples of 10
x_multiples_of_10 = [10**i for i in range(int(np.log10(min(faxis[faxis > 0]))), int(np.log10(max(faxis[faxis > 0]))) + 1)]
# Add thick lines for multiples of 10
for val in x_multiples_of_10:
axs2.axvline(x=val, color='black', linestyle='-', linewidth=2) # Thick lines for multiples of 10
f_sig_idx = np.argmax(Sxx)
# SNR_f = 10 * np.log10(Sxx[f_sig_idx] / np.sum(np.delete(Sxx, f_sig_idx)))
# SNR_f = 10 * np.log10(Sxx[f_sig_idx] / noise_floor)
axs2.set_xlim([min(faxis), max(faxis)])
# axs2.set_ylim([-100, 10])
axs2.legend(loc = 3, fontsize=12)
axs2.set_xlabel('Frequency [Hz]', fontsize=14)
axs2.set_ylabel('Power Spectral Density [dBV/Hz]', fontsize=14)
# axs2.set_title('SNR= %.2f dB' % (SNR_f), fontsize=14)
# Adjust layout
plt.tight_layout()
# Show plot
plt.show()