退化评估.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. ## 1.特征值聚类分析:评估聚类结果的质量和是否存在退化。
  2. # 轮廓系数的范围是[-1, 1],接近1表示聚类效果好,
  3. # 肘部法则通过改变聚类的数量,计算不同聚类数的总内部平方和,然后选择一个点,在这个点之后WCSS下降的速率明显减慢,这个点就是合适的聚类数。
  4. # Calinski-Harabasz指数衡量聚类间的分离度和聚类内的紧密度。CH指数越大,表示聚类效果越好。
  5. # 戴维森堡丁指数衡量聚类内样本的相似度和聚类间样本的不相似度。DB指数越小,表示聚类效果越好。
  6. import warnings
  7. warnings.filterwarnings('ignore', category=FutureWarning)
  8. from sklearn.cluster import KMeans
  9. import pandas as pd
  10. from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score
  11. import matplotlib
  12. matplotlib.use('Qt5Agg')
  13. import matplotlib.pyplot as plt
  14. # 读取特征值文件
  15. df = pd.read_csv(r'D:\验收材料\空工大-装备性能退化评估和故障预测健康管理软件\里程碑最终算法\03特征提取\源代码\特征值文件-统计.csv')
  16. # 应用KMeans聚类
  17. kmeans = KMeans(n_clusters=2, random_state=0).fit(df)
  18. df['cluster'] = kmeans.labels_
  19. # 肘部法则
  20. wcss = []
  21. for i in range(1, 11): # 尝试1到10个聚类
  22. kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
  23. kmeans.fit(df)
  24. wcss.append(kmeans.inertia_)
  25. plt.plot(range(1, 11), wcss)
  26. plt.title('Elbow Method') #如果WCSS随聚类数的增加而急剧下降,然后在某个点之后下降非常缓慢或几乎不变,这可能表明聚类数过多,导致模型开始“退化”
  27. plt.xlabel('Number of clusters')
  28. plt.ylabel('WCSS')
  29. plt.savefig('kmeans.jpg')
  30. plt.show()
  31. # 计算轮廓系数
  32. silhouette_avg = silhouette_score(df, kmeans.labels_)
  33. # 计算戴维森堡丁指数
  34. db_index = davies_bouldin_score(df, kmeans.labels_)
  35. # 计算Calinski-Harabasz指数
  36. ch_index = calinski_harabasz_score(df, kmeans.labels_)
  37. # 创建一个包含计算结果的DataFrame
  38. results_df = pd.DataFrame({
  39. 'Metric': ['Silhouette Coefficient', 'Davies-Bouldin Index', 'Calinski-Harabasz Index'],
  40. 'Value': [silhouette_avg, db_index, ch_index]
  41. })
  42. # 保存结果到CSV文件
  43. results_df.to_csv('kmeans_results.csv', index=False)
  44. # ## 2.基于统计的方法来评估退化:计算DataFrame中每一列的均值和标准差。并绘制每一列特征值的直方图
  45. # import numpy as np
  46. # import pandas as pd
  47. # import matplotlib
  48. # matplotlib.use('Qt5Agg')
  49. # import matplotlib.pyplot as plt
  50. #
  51. # def statistical_degradation(df):
  52. # means = df.mean()
  53. # std_devs = df.std()
  54. # return means, std_devs
  55. # def save_to_csv(means, std_devs, file_path):
  56. # # 创建一个包含均值和标准差的DataFrame
  57. # stats_df = pd.DataFrame({'Mean': means, 'Standard Deviation': std_devs})
  58. # # 将DataFrame存储到CSV文件
  59. # stats_df.to_csv(file_path, index=False)
  60. #
  61. # def plot_feature_distribution(df, means, std_devs):
  62. # fig, axes = plt.subplots(nrows=len(df.columns), ncols=1, figsize=(10, 5 * len(df.columns)))
  63. # for i, (column, ax) in enumerate(zip(df.columns, axes)):
  64. # df[column].plot(kind='hist', ax=ax, bins=20, alpha=0.5)
  65. # ax.axvline(means[i], color='r', linestyle='--', label=f'Mean: {means[i]:.2f}')
  66. # ax.axvline(means[i] - std_devs[i], color='g', linestyle='-', label=f'-1 Std Dev')
  67. # ax.axvline(means[i] + std_devs[i], color='g', linestyle='-')
  68. # ax.set_title(f'Distribution of {column}')
  69. # ax.legend(loc='upper left')
  70. # if i != len(df.columns) - 1:
  71. # ax.xaxis.set_visible(False) # 隐藏除底部子图外的所有x轴标签
  72. # if i == 0:
  73. # ax.set_ylabel('Frequency') # 只在第一个子图上显示y轴标签
  74. # else:
  75. # ax.yaxis.set_visible(False) # 隐藏除左侧子图外的所有y轴标签
  76. # fig.tight_layout()
  77. # plt.savefig('distribution.jpg')
  78. # plt.show()
  79. # # 读取CSV文件
  80. # df = pd.read_csv(r'E:\BaiduSyncdisk\zhiguan\01最近做的\算法调试\自己调试--Y\里程碑最终算法\03特征提取\源代码\特征值文件-频域.csv')
  81. # # 计算统计数据
  82. # means, std_devs = statistical_degradation(df)
  83. # save_to_csv(means, std_devs, 'statistics.csv')
  84. # # 绘制特征值分布图
  85. # plot_feature_distribution(df, means, std_devs)
  86. # ## 3.基于趋势分析法:分析特征值随时间的趋势,如果斜率显著不为零,则可能表明退化。
  87. # from sklearn.linear_model import LinearRegression
  88. # import numpy as np
  89. # import pandas as pd
  90. # import matplotlib
  91. # matplotlib.use('Qt5Agg')
  92. # import matplotlib.pyplot as plt
  93. #
  94. # # 趋势分析函数,适用于多列特征值
  95. # def trend_analysis(df, some_threshold):
  96. # results_list = []
  97. # for column in df.columns:
  98. # X = np.arange(len(df[column])).reshape(-1, 1)
  99. # y = df[column].values
  100. # model = LinearRegression().fit(X, y)
  101. # slope = model.coef_[0]
  102. # # print(f"Slope for {column}:", slope)
  103. # results_list.append({'Feature': column, 'Slope': slope, 'Significant': abs(slope) > some_threshold})
  104. # results_df = pd.DataFrame(results_list)
  105. # return results_df
  106. #
  107. # # 读取CSV文件
  108. # df = pd.read_csv(r'E:\BaiduSyncdisk\zhiguan\01最近做的\算法调试\自己调试--Y\里程碑最终算法\03特征提取\源代码\特征值文件-频域.csv')
  109. #
  110. # # 设置斜率显著性的阈值
  111. # some_threshold = 0.01
  112. #
  113. # # 进行趋势分析并获取结果
  114. # slopes = trend_analysis(df, some_threshold)
  115. #
  116. # # 保存结果到CSV文件
  117. # slopes.to_csv('trend_analysis_results.csv', index=False)
  118. #
  119. # # 筛选出显著的特征
  120. # significant_columns = slopes[slopes['Significant'] == True]['Feature']
  121. # num_features = len(significant_columns)
  122. #
  123. # # 动态设置图形的高度,每个子图的高度为4英寸
  124. # plt.figure(figsize=(15, 4 * num_features)) # 总宽度15英寸,高度根据特征数量自适应
  125. # for i, column in enumerate(significant_columns):
  126. # plt.subplot(num_features, 1, i+1) # 创建子图
  127. # plt.scatter(range(len(df)), df[column], label='Data')
  128. # significant_slope = slopes[slopes['Feature'] == column]['Slope'].values[0]
  129. # plt.plot(range(len(df)), significant_slope * np.arange(len(df)) + df[column].iloc[0],
  130. # color='red', label=f'Trend line with slope {significant_slope:.4f}')
  131. # plt.xlabel('Time')
  132. # plt.ylabel(column)
  133. # plt.title(f'Trend Analysis for {column}')
  134. # plt.legend()
  135. #
  136. # plt.tight_layout() # 调整子图布局以避免重叠
  137. # plt.savefig('trend_analysis.jpg')
  138. # plt.show()
  139. # # ## 4.基于时间序列分析的方法:识别特征值的周期性变化或异常模式。
  140. # import numpy as np
  141. # import pandas as pd
  142. # import matplotlib
  143. # matplotlib.use('Qt5Agg')
  144. # import matplotlib.pyplot as plt
  145. # from statsmodels.tsa.arima.model import ARIMA
  146. # import warnings
  147. # warnings.filterwarnings('ignore', category=FutureWarning)
  148. #
  149. # # 多变量时间序列退化评估函数
  150. # def time_series_degradation_multicolumn(df):
  151. # aic_values = pd.Series()
  152. # for column in df.columns:
  153. # data = df[column].values
  154. # model = ARIMA(data, order=(1, 1, 1))
  155. # results = model.fit()
  156. # aic_value = results.aic
  157. # print(f"AIC for {column}:", aic_value)
  158. # aic_values[column] = aic_value
  159. # return aic_values
  160. #
  161. # # 读取CSV文件
  162. # df = pd.read_csv(r'E:\BaiduSyncdisk\zhiguan\01最近做的\算法调试\自己调试--Y\里程碑最终算法\03特征提取\源代码\特征值文件-频域.csv')
  163. #
  164. # # 进行多变量时间序列退化评估
  165. # aic_values = time_series_degradation_multicolumn(df)
  166. #
  167. # # 将AIC值保存到CSV文件
  168. # aic_values.to_csv('aic_values.csv', index=True)
  169. #
  170. # # 选择AIC值最高的前N个特征
  171. # N = 10
  172. # top_features = aic_values.sort_values(ascending=False).index[:N]
  173. #
  174. # # 设置图形和子图的布局
  175. # num_features = len(top_features)
  176. # fig, axes = plt.subplots(num_features, 1, figsize=(10, 4 * num_features), sharex=True)
  177. #
  178. # for i, column in enumerate(top_features):
  179. # df[column].plot(ax=axes[i], label=column)
  180. # axes[i].set_title(f'Time Series Plot for {column}')
  181. # axes[i].set_xlabel('Time')
  182. # axes[i].set_ylabel(column)
  183. # axes[i].legend()
  184. #
  185. # # 如果只有一个特征,axes可能不是数组,需要检查并相应地调整
  186. # if num_features == 1:
  187. # axes.legend()
  188. #
  189. # plt.tight_layout()
  190. # plt.savefig('time_series.jpg')
  191. # plt.show()
  192. # #5.频域分析:通过傅里叶变换分析信号的频率成分,识别异常频率成分可能表明的退化。
  193. #
  194. #
  195. # import numpy as np
  196. # import pandas as pd
  197. # import matplotlib
  198. # matplotlib.use('Qt5Agg')
  199. # import matplotlib.pyplot as plt
  200. #
  201. # # 假设df是包含多列时间序列特征的DataFrame
  202. # df = pd.read_csv(r'E:\BaiduSyncdisk\zhiguan\01最近做的\算法调试\自己调试--Y\里程碑最终算法\03特征提取\源代码\特征值文件-频域.csv')
  203. #
  204. # # 进行傅里叶变换并分析每列特征
  205. # n_columns = df.shape[1]
  206. # fig, axes = plt.subplots(n_columns, 1, figsize=(10, 4 * n_columns))
  207. #
  208. # fft_results = []
  209. # for i, column in enumerate(df.columns):
  210. # # 对每列特征进行FFT
  211. # data = df[column].values
  212. # fft = np.fft.fft(data)
  213. # frequencies = np.fft.fftfreq(len(data), d=1)
  214. #
  215. # # 找到峰值频率及其幅度
  216. # peak_frequency_index = np.argmax(np.abs(fft))
  217. # peak_frequency = frequencies[peak_frequency_index]
  218. # peak_amplitude = np.abs(fft[peak_frequency_index])
  219. #
  220. # # 绘制频谱图
  221. # axes[i].plot(frequencies, np.abs(fft))
  222. # axes[i].set_title(f'Frequency Spectrum of {column}')
  223. # axes[i].set_xlabel('Frequency (Hz)')
  224. # axes[i].set_ylabel('Amplitude')
  225. # axes[i].grid(True)
  226. #
  227. # # 存储FFT结果
  228. # fft_results.append({
  229. # 'Feature': column,
  230. # 'Peak Frequency (Hz)': peak_frequency,
  231. # 'Peak Amplitude': peak_amplitude
  232. # })
  233. #
  234. # # 调整子图布局以避免重叠
  235. # plt.tight_layout()
  236. # plt.subplots_adjust(hspace=0.5)
  237. #
  238. # # 保存图形
  239. # plt.savefig('fft_spectrum.jpg')
  240. # plt.show()
  241. #
  242. # # 将FFT结果保存到CSV
  243. # fft_df = pd.DataFrame(fft_results)
  244. # fft_df.to_csv('fft_degradation_parameters.csv', index=False)
  245. # ## 6.移动平均和标准差来评估退化
  246. # import matplotlib
  247. # matplotlib.use('Qt5Agg')
  248. # import matplotlib.pyplot as plt
  249. # import numpy as np
  250. # import pandas as pd
  251. #
  252. # # 计算移动平均和标准差,用于退化评估
  253. # def simple_degradation_assessment(data, window_size=5):
  254. # moving_average = data.rolling(window=window_size).mean()
  255. # std_deviation = data.rolling(window=window_size).std()
  256. # return moving_average, std_deviation
  257. # # 计算描述性统计量
  258. # def descriptive_statistics(data):
  259. # return {
  260. # 'Mean': data.mean(),
  261. # 'Min': data.min(),
  262. # 'Max': data.max(),
  263. # 'Std': data.std()
  264. # }
  265. # # 读取CSV文件
  266. # df = pd.read_csv(r'E:\BaiduSyncdisk\zhiguan\01最近做的\算法调试\自己调试--Y\里程碑最终算法\03特征提取\源代码\特征值文件-频域.csv')
  267. # # 对每一列进行退化评估
  268. # degradation_stats = {column: descriptive_statistics(df[column]) for column in df.columns}
  269. #
  270. # # 将描述性统计量转换为DataFrame并保存到CSV
  271. # degradation_df = pd.DataFrame(degradation_stats).T
  272. # degradation_df.to_csv('degradation_statistics.csv', index=False)
  273. # # 设置图形的大小和布局
  274. # num_features = len(df.columns)
  275. # fig, axes = plt.subplots(num_features, 1, figsize=(10, 4 * num_features))
  276. #
  277. # # 对每一列进行退化评估并绘制到子图上
  278. # feature_data = [] # 用于存储所有特征的移动平均和标准差数据
  279. # for i, column in enumerate(df.columns, 1):
  280. # ma, std = simple_degradation_assessment(df[column], window_size=5)
  281. # axes[i-1].plot(df[column], label='Original Data', color='blue')
  282. # axes[i-1].plot(ma, label='Moving Average', linestyle='--', color='red')
  283. # axes[i-1].plot(std, label='Standard Deviation', linestyle='-.', color='green')
  284. # axes[i-1].set_title(f'Degradation Assessment for {column}')
  285. # axes[i-1].set_xlabel('Time')
  286. # axes[i-1].set_ylabel('Value')
  287. # axes[i-1].legend()
  288. #
  289. # # 将结果收集到列表中
  290. # feature_data.append({
  291. # 'Feature': column,
  292. # 'Moving Average': ma.to_list(),
  293. # 'Standard Deviation': std.to_list()
  294. # })
  295. #
  296. # # 调整子图布局以避免重叠
  297. # plt.tight_layout()
  298. # plt.subplots_adjust(hspace=0.5) # 调整子图之间的垂直间距
  299. #
  300. # # 保存图形
  301. # plt.savefig('degradation_assessment.jpg')
  302. # plt.show()