# 1.基本统计特征 import pandas as pd from scipy.stats import skew, kurtosis import numpy as np def ae_feature(raw, fs): mean = np.mean(raw) std_dev = np.std(raw) max_value = np.max(raw) min_value = np.min(raw) variance = np.var(raw) skewness = skew(raw) kurt = kurtosis(raw) range=np.max(raw) - np.min(raw) rms = np.sum(raw ** 2) / len(raw) ae_feature = pd.DataFrame() ae_feature['mean'] = [mean] ae_feature['std_dev'] = [std_dev] ae_feature['max_value'] = [max_value] ae_feature['min_value'] = [min_value] ae_feature['variance'] = [variance] ae_feature['skewness'] = [skewness] ae_feature['kurt'] = [kurt] ae_feature['range'] = [range] ae_feature['rms'] = [rms] return ae_feature # 读取CSV文件 df = pd.read_csv('扩充结果.csv') s=df.shape[1] p=df.shape[0] fs=300 # 设置提取特征值每组的数据量 # 初始化一个空的DataFrame来保存所有特征 all_features_df = pd.DataFrame() # 遍历每一列,按照每组300个数据计算基本统计特征 for i in range(int(df.shape[1])): for j in range(0, len(df), fs): end_index = min(j + fs, len(df)) data_test = np.array(df.iloc[j:end_index, i]) # 计算时域特征 sa=ae_feature(data_test,fs) all_features_df = pd.concat([all_features_df, sa], ignore_index=True) all_features_df.to_csv('特征值文件-统计.csv', index=False) # # 2.时间域特征 # import numpy as np # import pandas as pd # import math # from scipy import fftpack # # def time_features(raw, fs): # n = len(raw) # fft_raw = abs(fftpack.fft(raw, n) * 2 / n) # main_freq_index = np.where(fft_raw[0:math.floor(n / 2)] == max(fft_raw[0:math.floor(n / 2)])) # f = np.linspace(0, fs, n) * fs / n # peak_freq = f[main_freq_index[0][0]] # 峰值频率,单位Hz # # xm = np.mean(raw) # 均值 # xstd = np.std(raw) # 标准差 # kur = ((np.sum((raw - xm) ** 4)) / len(raw)) / (xstd ** 4) # 峭度 # cal_ave_amp = np.sum(np.abs(raw)) / n # power_2 = np.power(raw, 2) # sum_power = np.sum(power_2) # cal_rms = np.sqrt(sum_power / n) # cal_form = cal_rms / cal_ave_amp # 波峰因子,均方根/平均幅值 # cal_peak = np.max(np.abs(raw)) # 峰值 # cal_crest = cal_peak / cal_rms # 峰值因子 # # feature_df = pd.DataFrame() # feature_df['peak_freq_khz'] = [peak_freq / 1000] # 转换为kHz # feature_df['均值v'] = [xm] # feature_df['标准差'] = [xstd] # feature_df['峭度'] = [kur] # feature_df['平均幅值'] = [cal_ave_amp] # feature_df['均方根'] = [cal_rms] # feature_df['波峰因子'] = [cal_form] # feature_df['峰值'] = [cal_peak] # feature_df['峰值因子'] = [cal_crest] # # return feature_df # # 读取CSV文件 # df = pd.read_csv(r'E:\BaiduSyncdisk\zhiguan\01最近做的\算法调试\自己调试--Y\里程碑最终算法\02扩充\源代码\扩充后的数据-随机采样.csv') # s=df.shape[1] # p=df.shape[0] # fs=300 # 设置每组的数据量 # # 初始化一个空的DataFrame来保存所有特征 # all_features_df = pd.DataFrame() # # 遍历每一列,按照每组300个数据计算基本统计特征 # for i in range(int(df.shape[1])): # for j in range(0, len(df), fs): # end_index = min(j + fs, len(df)) # data_test = np.array(df.iloc[j:end_index, i]) # # 计算时域特征 # sa=time_features(data_test,fs) # all_features_df = pd.concat([all_features_df, sa], ignore_index=True) # all_features_df.to_csv('特征值文件-时域.csv', index=False) # # 3.频域特征 # import pandas as pd # from scipy.fft import fft, fftfreq # from scipy.stats import skew, kurtosis # import numpy as np # # def fe_feature(signal, fs): # n = len(signal) # freqs = fftfreq(n, d=1/fs) # 生成频率序列 # signal_fft = fft(signal) # 计算FFT # signal_fft_shifted = np.fft.fftshift(signal_fft) # 将FFT结果中心化 # # # 计算频域特征 # total_energy = np.sum(np.abs(signal_fft)**2) # total_power = total_energy / n # rms = np.sqrt(total_power) # freq_centroid = np.sum(np.abs(freqs) * np.abs(signal_fft_shifted)**2) / total_energy # freq_bandwidth = np.sum(np.abs(freqs[1:]**2 * signal_fft_shifted[1:]**2) - np.abs(freqs[:-1]**2 * signal_fft_shifted[:-1]**2)) / (2 * np.sum(np.abs(signal_fft_shifted)**2)) # peak_freq = (np.argmax(np.abs(signal_fft_shifted)) + 1) * fs / n # 峰值频率 # freq_skewness = skew(freqs[1:n//2] * (np.abs(signal_fft[1:n//2]) / np.max(np.abs(signal_fft[1:n//2])))) # freq_kurtosis = kurtosis(freqs[1:n//2] * (np.abs(signal_fft[1:n//2]) / np.max(np.abs(signal_fft[1:n//2])))) # # feature_df = pd.DataFrame({ # '总能量': [total_energy], # '总功率': [total_power], # '均方根': [rms], # '频率中心': [freq_centroid], # '频率带宽': [freq_bandwidth], # '峰值频率': [peak_freq], # '频率偏度': [freq_skewness], # '频率峰度': [freq_kurtosis] # }) # # return feature_df # # # 读取CSV文件 # df = pd.read_csv(r'E:\BaiduSyncdisk\zhiguan\01最近做的\算法调试\自己调试--Y\里程碑最终算法\02扩充\源代码\扩充后的数据-随机采样.csv') # # fs = 300 # 设置采样频率,这里假设为300Hz,根据实际情况调整 # all_features_df = pd.DataFrame() # # # 遍历每一列,按照每组300个数据计算频域特征 # for i in range(int(df.shape[1])): # for j in range(0, len(df), fs): # end_index = min(j + fs, len(df)) # data_test = df.iloc[j:end_index, i].to_numpy() # # 计算频域特征 # fa = fe_feature(data_test, fs) # all_features_df = pd.concat([all_features_df, fa], ignore_index=True) # # # 保存为CSV文件 # all_features_df.to_csv('特征值文件-频域.csv', index=False) #4.时频域特征 # import pandas as pd # from scipy.stats import skew, kurtosis # import numpy as np # from scipy.fft import fft, fftfreq # # def tf_features(raw, fs): # # 计算快速傅里叶变换 # fft_values = fft(raw) # fft_magnitude = np.abs(fft_values) # # # 计算频率向量 # n = len(raw) # freq = fftfreq(n, 1 / fs) # # # 时域特征 # mean = np.mean(raw) # std_dev = np.std(raw) # max_value = np.max(raw) # min_value = np.min(raw) # variance = np.var(raw) # skewness = skew(raw) # kurt = kurtosis(raw) # # # 频域特征 # power_spectrum = np.square(fft_magnitude) / n # mean_freq = np.mean(freq[power_spectrum > 0]) # std_freq = np.std(freq[power_spectrum > 0]) # freq_skewness = skew(freq[power_spectrum > 0]) # freq_kurt = kurtosis(freq[power_spectrum > 0]) # peak_freq_index = np.argmax(power_spectrum) # peak_freq = freq[peak_freq_index] # bandwidth = np.max(freq[power_spectrum > 0]) - np.min(freq[power_spectrum > 0]) # energy = np.sum(power_spectrum) # # tf_feature = pd.DataFrame() # tf_feature['mean'] = [mean] # tf_feature['std_dev'] = [std_dev] # tf_feature['max_value'] = [max_value] # tf_feature['min_value'] = [min_value] # tf_feature['variance'] = [variance] # tf_feature['skewness'] = [skewness] # tf_feature['kurt'] = [kurt] # tf_feature['mean_freq'] = [mean_freq] # tf_feature['std_freq'] = [std_freq] # tf_feature['freq_skewness'] = [freq_skewness] # tf_feature['freq_kurt'] = [freq_kurt] # tf_feature['peak_freq'] = [peak_freq] # tf_feature['bandwidth'] = [bandwidth] # tf_feature['energy'] = [energy] # return tf_feature # # # # 读取CSV文件 # df = pd.read_csv(r'E:\BaiduSyncdisk\zhiguan\01最近做的\算法调试\自己调试--Y\里程碑最终算法\02扩充\源代码\扩充后的数据-随机采样.csv') # fs = 300 # 假设的采样频率,根据实际情况调整 # # 初始化一个空的DataFrame来保存所有特征 # all_features_df = pd.DataFrame() # # # 遍历每一列,按照每组300个数据计算频域特征 # for i in range(int(df.shape[1])): # for j in range(0, len(df), fs): # end_index = min(j + fs, len(df)) # data_test = df.iloc[j:end_index, i].to_numpy() # # 计算频域特征 # fa = tf_features(data_test, fs) # all_features_df = pd.concat([all_features_df, fa], ignore_index=True) # # # 保存为CSV文件 # all_features_df.to_csv('特征值文件-时频域.csv', index=False) #5.形态学特征提取 # import pandas as pd # from scipy.signal import find_peaks # import numpy as np # # def morphological_features(raw): # nonzero_elements = np.count_nonzero(raw) # mean_slope = np.mean(np.diff(raw) / np.diff(np.arange(len(raw)))) # # peaks, _ = find_peaks(raw) # num_peaks = len(peaks) # troughs, _ = find_peaks(-raw) # 寻找谷值 # num_troughs = len(troughs) # # # 假设峰值和谷值的宽度为相邻峰值或谷值之间的距离 # if num_peaks > 1: # peak_widths = np.diff(np.sort(peaks)[1:] - np.sort(peaks)[:-1]) # else: # peak_widths = np.array([]) # 如果只有一个峰值,则没有宽度 # # if num_troughs > 1: # trough_widths = np.diff(np.sort(troughs)[1:] - np.sort(troughs)[:-1]) # else: # trough_widths = np.array([]) # 如果只有一个谷值,则没有宽度 # # # 计算曲率和加速度变化需要一阶和二阶导数 # first_derivative = np.diff(raw) / np.diff(np.arange(len(raw))) # second_derivative = np.diff(first_derivative) / np.diff(np.arange(len(raw) - 1)) # curvature = np.sum(second_derivative ** 2) # 曲率的简单估计 # acceleration_change = np.sum(np.abs(np.diff(first_derivative))) # 加速度变化的简单估计 # # morph_feature = pd.DataFrame() # morph_feature['nonzero_elements'] = [nonzero_elements] # morph_feature['mean_slope'] = [mean_slope] # morph_feature['num_peaks'] = [num_peaks] # morph_feature['num_troughs'] = [num_troughs] # morph_feature['mean_peak_width'] = [np.mean(peak_widths)] if peak_widths.size else [0] # morph_feature['mean_trough_width'] = [np.mean(trough_widths)] if trough_widths.size else [0] # morph_feature['curvature'] = [curvature] # morph_feature['acceleration_change'] = [acceleration_change] # return morph_feature # # # 读取CSV文件 # df = pd.read_csv(r'E:\BaiduSyncdisk\zhiguan\01最近做的\算法调试\自己调试--Y\里程碑最终算法\02扩充\源代码\扩充后的数据-随机采样.csv') # fs = 300 # 假设的采样频率,根据实际情况调整 # # 初始化一个空的DataFrame来保存所有特征 # all_features_df = pd.DataFrame() # # # 遍历每一列,按照每组300个数据计算频域特征 # for i in range(int(df.shape[1])): # for j in range(0, len(df), fs): # end_index = min(j + fs, len(df)) # data_test = df.iloc[j:end_index, i].to_numpy() # # 计算频域特征 # fa = morphological_features(data_test) # all_features_df = pd.concat([all_features_df, fa], ignore_index=True) # # # 保存为CSV文件 # all_features_df.to_csv('特征值文件-形态学.csv', index=False)