## 1.特征值聚类分析:评估聚类结果的质量和是否存在退化。 # 轮廓系数的范围是[-1, 1],接近1表示聚类效果好, # 肘部法则通过改变聚类的数量,计算不同聚类数的总内部平方和,然后选择一个点,在这个点之后WCSS下降的速率明显减慢,这个点就是合适的聚类数。 # Calinski-Harabasz指数衡量聚类间的分离度和聚类内的紧密度。CH指数越大,表示聚类效果越好。 # 戴维森堡丁指数衡量聚类内样本的相似度和聚类间样本的不相似度。DB指数越小,表示聚类效果越好。 import warnings warnings.filterwarnings('ignore', category=FutureWarning) from sklearn.cluster import KMeans import pandas as pd from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score import matplotlib matplotlib.use('Qt5Agg') import matplotlib.pyplot as plt # 读取特征值文件 df = pd.read_csv(r'D:\验收材料\空工大-装备性能退化评估和故障预测健康管理软件\里程碑最终算法\03特征提取\源代码\特征值文件-统计.csv') # 应用KMeans聚类 kmeans = KMeans(n_clusters=2, random_state=0).fit(df) df['cluster'] = kmeans.labels_ # 肘部法则 wcss = [] for i in range(1, 11): # 尝试1到10个聚类 kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0) kmeans.fit(df) wcss.append(kmeans.inertia_) plt.plot(range(1, 11), wcss) plt.title('Elbow Method') #如果WCSS随聚类数的增加而急剧下降,然后在某个点之后下降非常缓慢或几乎不变,这可能表明聚类数过多,导致模型开始“退化” plt.xlabel('Number of clusters') plt.ylabel('WCSS') plt.savefig('kmeans.jpg') plt.show() # 计算轮廓系数 silhouette_avg = silhouette_score(df, kmeans.labels_) # 计算戴维森堡丁指数 db_index = davies_bouldin_score(df, kmeans.labels_) # 计算Calinski-Harabasz指数 ch_index = calinski_harabasz_score(df, kmeans.labels_) # 创建一个包含计算结果的DataFrame results_df = pd.DataFrame({ 'Metric': ['Silhouette Coefficient', 'Davies-Bouldin Index', 'Calinski-Harabasz Index'], 'Value': [silhouette_avg, db_index, ch_index] }) # 保存结果到CSV文件 results_df.to_csv('kmeans_results.csv', index=False) # ## 2.基于统计的方法来评估退化:计算DataFrame中每一列的均值和标准差。并绘制每一列特征值的直方图 # import numpy as np # import pandas as pd # import matplotlib # matplotlib.use('Qt5Agg') # import matplotlib.pyplot as plt # # def statistical_degradation(df): # means = df.mean() # std_devs = df.std() # return means, std_devs # def save_to_csv(means, std_devs, file_path): # # 创建一个包含均值和标准差的DataFrame # stats_df = pd.DataFrame({'Mean': means, 'Standard Deviation': std_devs}) # # 将DataFrame存储到CSV文件 # stats_df.to_csv(file_path, index=False) # # def plot_feature_distribution(df, means, std_devs): # fig, axes = plt.subplots(nrows=len(df.columns), ncols=1, figsize=(10, 5 * len(df.columns))) # for i, (column, ax) in enumerate(zip(df.columns, axes)): # df[column].plot(kind='hist', ax=ax, bins=20, alpha=0.5) # ax.axvline(means[i], color='r', linestyle='--', label=f'Mean: {means[i]:.2f}') # ax.axvline(means[i] - std_devs[i], color='g', linestyle='-', label=f'-1 Std Dev') # ax.axvline(means[i] + std_devs[i], color='g', linestyle='-') # ax.set_title(f'Distribution of {column}') # ax.legend(loc='upper left') # if i != len(df.columns) - 1: # ax.xaxis.set_visible(False) # 隐藏除底部子图外的所有x轴标签 # if i == 0: # ax.set_ylabel('Frequency') # 只在第一个子图上显示y轴标签 # else: # ax.yaxis.set_visible(False) # 隐藏除左侧子图外的所有y轴标签 # fig.tight_layout() # plt.savefig('distribution.jpg') # plt.show() # # 读取CSV文件 # df = pd.read_csv(r'E:\BaiduSyncdisk\zhiguan\01最近做的\算法调试\自己调试--Y\里程碑最终算法\03特征提取\源代码\特征值文件-频域.csv') # # 计算统计数据 # means, std_devs = statistical_degradation(df) # save_to_csv(means, std_devs, 'statistics.csv') # # 绘制特征值分布图 # plot_feature_distribution(df, means, std_devs) # ## 3.基于趋势分析法:分析特征值随时间的趋势,如果斜率显著不为零,则可能表明退化。 # from sklearn.linear_model import LinearRegression # import numpy as np # import pandas as pd # import matplotlib # matplotlib.use('Qt5Agg') # import matplotlib.pyplot as plt # # # 趋势分析函数,适用于多列特征值 # def trend_analysis(df, some_threshold): # results_list = [] # for column in df.columns: # X = np.arange(len(df[column])).reshape(-1, 1) # y = df[column].values # model = LinearRegression().fit(X, y) # slope = model.coef_[0] # # print(f"Slope for {column}:", slope) # results_list.append({'Feature': column, 'Slope': slope, 'Significant': abs(slope) > some_threshold}) # results_df = pd.DataFrame(results_list) # return results_df # # # 读取CSV文件 # df = pd.read_csv(r'E:\BaiduSyncdisk\zhiguan\01最近做的\算法调试\自己调试--Y\里程碑最终算法\03特征提取\源代码\特征值文件-频域.csv') # # # 设置斜率显著性的阈值 # some_threshold = 0.01 # # # 进行趋势分析并获取结果 # slopes = trend_analysis(df, some_threshold) # # # 保存结果到CSV文件 # slopes.to_csv('trend_analysis_results.csv', index=False) # # # 筛选出显著的特征 # significant_columns = slopes[slopes['Significant'] == True]['Feature'] # num_features = len(significant_columns) # # # 动态设置图形的高度,每个子图的高度为4英寸 # plt.figure(figsize=(15, 4 * num_features)) # 总宽度15英寸,高度根据特征数量自适应 # for i, column in enumerate(significant_columns): # plt.subplot(num_features, 1, i+1) # 创建子图 # plt.scatter(range(len(df)), df[column], label='Data') # significant_slope = slopes[slopes['Feature'] == column]['Slope'].values[0] # plt.plot(range(len(df)), significant_slope * np.arange(len(df)) + df[column].iloc[0], # color='red', label=f'Trend line with slope {significant_slope:.4f}') # plt.xlabel('Time') # plt.ylabel(column) # plt.title(f'Trend Analysis for {column}') # plt.legend() # # plt.tight_layout() # 调整子图布局以避免重叠 # plt.savefig('trend_analysis.jpg') # plt.show() # # ## 4.基于时间序列分析的方法:识别特征值的周期性变化或异常模式。 # import numpy as np # import pandas as pd # import matplotlib # matplotlib.use('Qt5Agg') # import matplotlib.pyplot as plt # from statsmodels.tsa.arima.model import ARIMA # import warnings # warnings.filterwarnings('ignore', category=FutureWarning) # # # 多变量时间序列退化评估函数 # def time_series_degradation_multicolumn(df): # aic_values = pd.Series() # for column in df.columns: # data = df[column].values # model = ARIMA(data, order=(1, 1, 1)) # results = model.fit() # aic_value = results.aic # print(f"AIC for {column}:", aic_value) # aic_values[column] = aic_value # return aic_values # # # 读取CSV文件 # df = pd.read_csv(r'E:\BaiduSyncdisk\zhiguan\01最近做的\算法调试\自己调试--Y\里程碑最终算法\03特征提取\源代码\特征值文件-频域.csv') # # # 进行多变量时间序列退化评估 # aic_values = time_series_degradation_multicolumn(df) # # # 将AIC值保存到CSV文件 # aic_values.to_csv('aic_values.csv', index=True) # # # 选择AIC值最高的前N个特征 # N = 10 # top_features = aic_values.sort_values(ascending=False).index[:N] # # # 设置图形和子图的布局 # num_features = len(top_features) # fig, axes = plt.subplots(num_features, 1, figsize=(10, 4 * num_features), sharex=True) # # for i, column in enumerate(top_features): # df[column].plot(ax=axes[i], label=column) # axes[i].set_title(f'Time Series Plot for {column}') # axes[i].set_xlabel('Time') # axes[i].set_ylabel(column) # axes[i].legend() # # # 如果只有一个特征,axes可能不是数组,需要检查并相应地调整 # if num_features == 1: # axes.legend() # # plt.tight_layout() # plt.savefig('time_series.jpg') # plt.show() # #5.频域分析:通过傅里叶变换分析信号的频率成分,识别异常频率成分可能表明的退化。 # # # import numpy as np # import pandas as pd # import matplotlib # matplotlib.use('Qt5Agg') # import matplotlib.pyplot as plt # # # 假设df是包含多列时间序列特征的DataFrame # df = pd.read_csv(r'E:\BaiduSyncdisk\zhiguan\01最近做的\算法调试\自己调试--Y\里程碑最终算法\03特征提取\源代码\特征值文件-频域.csv') # # # 进行傅里叶变换并分析每列特征 # n_columns = df.shape[1] # fig, axes = plt.subplots(n_columns, 1, figsize=(10, 4 * n_columns)) # # fft_results = [] # for i, column in enumerate(df.columns): # # 对每列特征进行FFT # data = df[column].values # fft = np.fft.fft(data) # frequencies = np.fft.fftfreq(len(data), d=1) # # # 找到峰值频率及其幅度 # peak_frequency_index = np.argmax(np.abs(fft)) # peak_frequency = frequencies[peak_frequency_index] # peak_amplitude = np.abs(fft[peak_frequency_index]) # # # 绘制频谱图 # axes[i].plot(frequencies, np.abs(fft)) # axes[i].set_title(f'Frequency Spectrum of {column}') # axes[i].set_xlabel('Frequency (Hz)') # axes[i].set_ylabel('Amplitude') # axes[i].grid(True) # # # 存储FFT结果 # fft_results.append({ # 'Feature': column, # 'Peak Frequency (Hz)': peak_frequency, # 'Peak Amplitude': peak_amplitude # }) # # # 调整子图布局以避免重叠 # plt.tight_layout() # plt.subplots_adjust(hspace=0.5) # # # 保存图形 # plt.savefig('fft_spectrum.jpg') # plt.show() # # # 将FFT结果保存到CSV # fft_df = pd.DataFrame(fft_results) # fft_df.to_csv('fft_degradation_parameters.csv', index=False) # ## 6.移动平均和标准差来评估退化 # import matplotlib # matplotlib.use('Qt5Agg') # import matplotlib.pyplot as plt # import numpy as np # import pandas as pd # # # 计算移动平均和标准差,用于退化评估 # def simple_degradation_assessment(data, window_size=5): # moving_average = data.rolling(window=window_size).mean() # std_deviation = data.rolling(window=window_size).std() # return moving_average, std_deviation # # 计算描述性统计量 # def descriptive_statistics(data): # return { # 'Mean': data.mean(), # 'Min': data.min(), # 'Max': data.max(), # 'Std': data.std() # } # # 读取CSV文件 # df = pd.read_csv(r'E:\BaiduSyncdisk\zhiguan\01最近做的\算法调试\自己调试--Y\里程碑最终算法\03特征提取\源代码\特征值文件-频域.csv') # # 对每一列进行退化评估 # degradation_stats = {column: descriptive_statistics(df[column]) for column in df.columns} # # # 将描述性统计量转换为DataFrame并保存到CSV # degradation_df = pd.DataFrame(degradation_stats).T # degradation_df.to_csv('degradation_statistics.csv', index=False) # # 设置图形的大小和布局 # num_features = len(df.columns) # fig, axes = plt.subplots(num_features, 1, figsize=(10, 4 * num_features)) # # # 对每一列进行退化评估并绘制到子图上 # feature_data = [] # 用于存储所有特征的移动平均和标准差数据 # for i, column in enumerate(df.columns, 1): # ma, std = simple_degradation_assessment(df[column], window_size=5) # axes[i-1].plot(df[column], label='Original Data', color='blue') # axes[i-1].plot(ma, label='Moving Average', linestyle='--', color='red') # axes[i-1].plot(std, label='Standard Deviation', linestyle='-.', color='green') # axes[i-1].set_title(f'Degradation Assessment for {column}') # axes[i-1].set_xlabel('Time') # axes[i-1].set_ylabel('Value') # axes[i-1].legend() # # # 将结果收集到列表中 # feature_data.append({ # 'Feature': column, # 'Moving Average': ma.to_list(), # 'Standard Deviation': std.to_list() # }) # # # 调整子图布局以避免重叠 # plt.tight_layout() # plt.subplots_adjust(hspace=0.5) # 调整子图之间的垂直间距 # # # 保存图形 # plt.savefig('degradation_assessment.jpg') # plt.show()