退化评估.py 15 KB


  1. # ## 1.特征值聚类分析:评估聚类结果的质量和是否存在退化。
  2. # # 轮廓系数的范围是[-1, 1],接近1表示聚类效果好,
  3. # # 肘部法则通过改变聚类的数量,计算不同聚类数的总内部平方和,然后选择一个点,在这个点之后WCSS下降的速率明显减慢,这个点就是合适的聚类数。
  4. # # Calinski-Harabasz指数衡量聚类间的分离度和聚类内的紧密度。CH指数越大,表示聚类效果越好。
  5. # # 戴维森堡丁指数衡量聚类内样本的相似度和聚类间样本的不相似度。DB指数越小,表示聚类效果越好。
  6. from flask import Flask, request, jsonify
  7. from sklearn.cluster import KMeans
  8. from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score
  9. import pandas as pd
  10. import matplotlib
  11. import matplotlib.pyplot as plt
  12. import os
  13. import requests
  14. app = Flask(__name__)
  15. @app.route('/cluster_analysis', methods=['POST'])
  16. def cluster_analysis():
  17. try:
  18. # 检查请求体中是否包含文件地址
  19. data = request.json
  20. if 'file_url' not in data:
  21. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  22. file_url = data['file_url']
  23. # 使用requests获取文件内容
  24. response = requests.get(file_url)
  25. if response.status_code != 200:
  26. return jsonify({'code': 500, 'msg': 'Failed to retrieve file from URL'})
  27. # 读取数据集
  28. df = pd.read_csv(pd.compat.StringIO(response.text), header=None)
  29. # 应用 KMeans 聚类
  30. kmeans = KMeans(n_clusters=2, random_state=0).fit(df)
  31. df['cluster'] = kmeans.labels_
  32. # 肘部法则
  33. wcss = []
  34. for i in range(1, 11): # 尝试1到10个聚类
  35. kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
  36. kmeans.fit(df)
  37. wcss.append(kmeans.inertia_)
  38. # 可视化肘部法则
  39. plt.figure()
  40. plt.plot(range(1, 11), wcss)
  41. plt.title('Elbow Method')
  42. plt.xlabel('Number of clusters')
  43. plt.ylabel('WCSS')
  44. plt.savefig('kmeans.jpg') # 保存图片
  45. # 计算轮廓系数
  46. silhouette_avg = silhouette_score(df, kmeans.labels_)
  47. # 计算戴维森堡丁指数
  48. db_index = davies_bouldin_score(df, kmeans.labels_)
  49. # 计算Calinski-Harabasz指数
  50. ch_index = calinski_harabasz_score(df, kmeans.labels_)
  51. # 创建一个包含计算结果的 DataFrame
  52. results_df = pd.DataFrame({
  53. 'Metric': ['Silhouette Coefficient', 'Davies-Bouldin Index', 'Calinski-Harabasz Index'],
  54. 'Value': [silhouette_avg, db_index, ch_index]
  55. })
  56. # 保存结果到 CSV 文件
  57. results_file_path = 'kmeans_results.csv' # 保存结果的路径
  58. results_df.to_csv(results_file_path, index=False)
  59. # 返回结果文件路径给客户端
  60. return jsonify({
  61. 'code': 200,
  62. 'msg': 'Cluster analysis completed',
  63. 'resultsFilePath': results_file_path,
  64. 'plotImagePath': 'kmeans.jpg'
  65. })
  66. except Exception as e:
  67. return jsonify({
  68. 'code': 500,
  69. 'msg': str(e)
  70. })
  71. if __name__ == '__main__':
  72. app.run(debug=True, port=8081, host='0.0.0.0')
  73. ## 2.基于统计的方法来评估退化:计算DataFrame中每一列的均值和标准差。并绘制每一列特征值的直方图
  74. from flask import Flask, request, jsonify
  75. import numpy as np
  76. import pandas as pd
  77. import matplotlib.pyplot as plt
  78. import os
  79. import requests
  80. app = Flask(__name__)
  81. def statistical_degradation(df):
  82. means = df.mean()
  83. std_devs = df.std()
  84. return means, std_devs
  85. def save_to_csv(means, std_devs, file_path):
  86. stats_df = pd.DataFrame({'Mean': means, 'Standard Deviation': std_devs})
  87. stats_df.to_csv(file_path, index=False)
  88. def plot_feature_distribution(df, means, std_devs):
  89. fig, axes = plt.subplots(nrows=len(df.columns), ncols=1, figsize=(10, 5 * len(df.columns)))
  90. for i, (column, ax) in enumerate(zip(df.columns, axes)):
  91. df[column].plot(kind='hist', ax=ax, bins=20, alpha=0.5)
  92. ax.axvline(means[i], color='r', linestyle='--', label=f'Mean: {means[i]:.2f}')
  93. ax.axvline(means[i] - std_devs[i], color='g', linestyle='-', label=f'-1 Std Dev')
  94. ax.axvline(means[i] + std_devs[i], color='g', linestyle='-')
  95. ax.set_title(f'Distribution of {column}')
  96. ax.legend(loc='upper left')
  97. if i != len(df.columns) - 1:
  98. ax.xaxis.set_visible(False)
  99. if i == 0:
  100. ax.set_ylabel('Frequency')
  101. else:
  102. ax.yaxis.set_visible(False)
  103. fig.tight_layout()
  104. plt.savefig('distribution.jpg')
  105. plt.show()
  106. @app.route('/analyze_features', methods=['POST'])
  107. def analyze_features():
  108. try:
  109. # 检查请求体中是否包含文件地址
  110. data = request.json
  111. if 'file_url' not in data:
  112. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  113. file_url = data['file_url']
  114. # 使用requests获取文件内容
  115. response = requests.get(file_url)
  116. if response.status_code != 200:
  117. return jsonify({'code': 500, 'msg': 'Failed to retrieve file from URL'})
  118. # 读取数据集
  119. df = pd.read_csv(pd.compat.StringIO(response.text), header=None)
  120. # 计算统计数据
  121. means, std_devs = statistical_degradation(df)
  122. stats_file_path = 'statistics.csv' # 保存统计数据的路径
  123. save_to_csv(means, std_devs, stats_file_path)
  124. # 绘制特征值分布图
  125. plot_feature_distribution(df, means, std_devs)
  126. # 返回结果文件路径给客户端
  127. return jsonify({
  128. 'code': 200,
  129. 'msg': 'Feature analysis completed',
  130. 'statsFilePath': stats_file_path,
  131. 'plotImagePath': 'distribution.jpg'
  132. })
  133. except Exception as e:
  134. return jsonify({
  135. 'code': 500,
  136. 'msg': str(e)
  137. })
  138. if __name__ == '__main__':
  139. app.run(debug=True, port=8081, host='0.0.0.0')
  140. # # ## 3.基于趋势分析法:分析特征值随时间的趋势,如果斜率显著不为零,则可能表明退化。
  141. from flask import Flask, request, jsonify
  142. from sklearn.linear_model import LinearRegression
  143. import numpy as np
  144. import pandas as pd
  145. import matplotlib.pyplot as plt
  146. import os
  147. import requests
  148. app = Flask(__name__)
  149. def trend_analysis(df, some_threshold):
  150. results_list = []
  151. for column in df.columns:
  152. X = np.arange(len(df[column])).reshape(-1, 1)
  153. y = df[column].values
  154. model = LinearRegression().fit(X, y)
  155. slope = model.coef_[0]
  156. results_list.append({'Feature': column, 'Slope': slope, 'Significant': abs(slope) > some_threshold})
  157. results_df = pd.DataFrame(results_list)
  158. return results_df
  159. @app.route('/trend_analysis', methods=['POST'])
  160. def trend_analysis_endpoint():
  161. try:
  162. # 检查请求体中是否包含文件地址
  163. data = request.json
  164. if 'file_url' not in data:
  165. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  166. file_url = data['file_url']
  167. # 使用requests获取文件内容
  168. response = requests.get(file_url)
  169. if response.status_code != 200:
  170. return jsonify({'code': 500, 'msg': 'Failed to retrieve file from URL'})
  171. # 读取数据集
  172. df = pd.read_csv(pd.compat.StringIO(response.text), header=None)
  173. # 设置斜率显著性的阈值
  174. some_threshold = 0.01
  175. # 进行趋势分析并获取结果
  176. slopes = trend_analysis(df, some_threshold)
  177. # 保存结果到CSV文件
  178. results_file_path = 'trend_analysis_results.csv' # 保存结果的路径
  179. slopes.to_csv(results_file_path, index=False)
  180. # 筛选出显著的特征
  181. significant_columns = slopes[slopes['Significant'] == True]['Feature']
  182. num_features = len(significant_columns)
  183. # 动态设置图形的高度,每个子图的高度为4英寸
  184. plt.figure(figsize=(15, 4 * num_features)) # 总宽度15英寸,高度根据特征数量自适应
  185. for i, column in enumerate(significant_columns):
  186. plt.subplot(num_features, 1, i+1) # 创建子图
  187. plt.scatter(range(len(df)), df[column], label='Data')
  188. significant_slope = slopes[slopes['Feature'] == column]['Slope'].values[0]
  189. plt.plot(range(len(df)), significant_slope * np.arange(len(df)) + df[column].iloc[0],
  190. color='red', label=f'Trend line with slope {significant_slope:.4f}')
  191. plt.xlabel('Time')
  192. plt.ylabel(column)
  193. plt.title(f'Trend Analysis for {column}')
  194. plt.legend()
  195. plt.tight_layout() # 调整子图布局以避免重叠
  196. plot_file_path = 'trend_analysis.jpg' # 保存趋势图的路径
  197. plt.savefig(plot_file_path)
  198. plt.close() # 关闭图形以释放资源
  199. # 返回结果文件路径和趋势图路径给客户端
  200. return jsonify({
  201. 'code': 200,
  202. 'msg': 'Trend analysis completed',
  203. 'resultsFilePath': results_file_path,
  204. 'plotImagePath': plot_file_path
  205. })
  206. except Exception as e:
  207. return jsonify({
  208. 'code': 500,
  209. 'msg': str(e)
  210. })
  211. if __name__ == '__main__':
  212. app.run(debug=True, port=8081, host='0.0.0.0')
  213. # # ## 4.基于时间序列分析的方法:识别特征值的周期性变化或异常模式。
  214. from flask import Flask, request, jsonify
  215. import numpy as np
  216. import pandas as pd
  217. import matplotlib.pyplot as plt
  218. from statsmodels.tsa.arima.model import ARIMA
  219. import warnings
  220. import requests
  221. app = Flask(__name__)
  222. def time_series_degradation_multicolumn(df):
  223. aic_values = pd.Series()
  224. for column in df.columns:
  225. data = df[column].values
  226. model = ARIMA(data, order=(1, 1, 1))
  227. with warnings.catch_warnings():
  228. warnings.filterwarnings('ignore', category=FutureWarning)
  229. results = model.fit()
  230. aic_value = results.aic
  231. # print(f"AIC for {column}:", aic_value) # 可在Flask日志中输出此信息
  232. aic_values[column] = aic_value
  233. return aic_values
  234. @app.route('/time_series_analysis', methods=['POST'])
  235. def time_series_analysis():
  236. try:
  237. # 检查请求体中是否包含文件地址
  238. data = request.json
  239. if 'file_url' not in data:
  240. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  241. file_url = data['file_url']
  242. # 使用requests获取文件内容
  243. response = requests.get(file_url)
  244. if response.status_code != 200:
  245. return jsonify({'code': 500, 'msg': 'Failed to retrieve file from URL'})
  246. # 读取数据集
  247. df = pd.read_csv(pd.compat.StringIO(response.text), header=None)
  248. # 进行多变量时间序列退化评估
  249. aic_values = time_series_degradation_multicolumn(df)
  250. # 将AIC值保存到CSV文件
  251. aic_file_path = 'aic_values.csv' # 保存AIC值的路径
  252. aic_values.to_csv(aic_file_path, index=True)
  253. # 选择AIC值最高的前N个特征
  254. N = 10
  255. top_features = aic_values.sort_values(ascending=False).index[:N]
  256. # 设置图形和子图的布局
  257. num_features = len(top_features)
  258. fig, axes = plt.subplots(num_features, 1, figsize=(10, 4 * num_features), sharex=True)
  259. for i, column in enumerate(top_features):
  260. df[column].plot(ax=axes[i], label=column)
  261. axes[i].set_title(f'Time Series Plot for {column}')
  262. axes[i].set_xlabel('Time')
  263. axes[i].set_ylabel(column)
  264. axes[i].legend()
  265. # 如果只有一个特征,axes可能不是数组,需要检查并相应地调整
  266. if num_features == 1:
  267. axes.legend()
  268. plt.tight_layout()
  269. plot_file_path = 'time_series.jpg' # 保存时间序列图的路径
  270. plt.savefig(plot_file_path)
  271. plt.close() # 关闭图形以释放资源
  272. # 返回结果文件路径和时间序列图路径给客户端
  273. return jsonify({
  274. 'code': 200,
  275. 'msg': 'Time series analysis completed',
  276. 'aicFilePath': aic_file_path,
  277. 'plotImagePath': plot_file_path
  278. })
  279. except Exception as e:
  280. return jsonify({
  281. 'code': 500,
  282. 'msg': str(e)
  283. })
  284. if __name__ == '__main__':
  285. app.run(debug=True, port=8081, host='0.0.0.0')
  286. # #5.频域分析:通过傅里叶变换分析信号的频率成分,识别异常频率成分可能表明的退化。
  287. from flask import Flask, request, jsonify
  288. import numpy as np
  289. import pandas as pd
  290. import matplotlib.pyplot as plt
  291. import requests
  292. app = Flask(__name__)
  293. def perform_fft_analysis(df):
  294. n_columns = df.shape[1]
  295. fig, axes = plt.subplots(n_columns, 1, figsize=(10, 4 * n_columns))
  296. fft_results = []
  297. for i, column in enumerate(df.columns):
  298. data = df[column].values
  299. fft = np.fft.fft(data)
  300. frequencies = np.fft.fftfreq(len(data), d=1)
  301. peak_frequency_index = np.argmax(np.abs(fft))
  302. peak_frequency = frequencies[peak_frequency_index]
  303. peak_amplitude = np.abs(fft[peak_frequency_index])
  304. axes[i].plot(frequencies, np.abs(fft))
  305. axes[i].set_title(f'Frequency Spectrum of {column}')
  306. axes[i].set_xlabel('Frequency (Hz)')
  307. axes[i].set_ylabel('Amplitude')
  308. axes[i].grid(True)
  309. fft_results.append({
  310. 'Feature': column,
  311. 'Peak Frequency (Hz)': peak_frequency,
  312. 'Peak Amplitude': peak_amplitude
  313. })
  314. plt.tight_layout()
  315. plt.subplots_adjust(hspace=0.5)
  316. return fig, fft_results
  317. @app.route('/fft_analysis', methods=['POST'])
  318. def fft_analysis():
  319. try:
  320. # 检查请求体中是否包含文件地址
  321. data = request.json
  322. if 'file_url' not in data:
  323. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  324. file_url = data['file_url']
  325. # 使用requests获取文件内容
  326. response = requests.get(file_url)
  327. if response.status_code != 200:
  328. return jsonify({'code': 500, 'msg': 'Failed to retrieve file from URL'})
  329. # 读取数据集
  330. df = pd.read_csv(pd.compat.StringIO(response.text), header=None)
  331. # 执行FFT分析
  332. fig, fft_results = perform_fft_analysis(df)
  333. # 保存图形
  334. plt_file_path = 'fft_spectrum.jpg' # 保存FFT图的路径
  335. plt.savefig(plt_file_path)
  336. plt.close() # 关闭图形以释放资源
  337. # 将FFT结果保存到CSV
  338. fft_df = pd.DataFrame(fft_results)
  339. fft_results_file_path = 'fft_degradation_parameters.csv' # 保存FFT结果的路径
  340. fft_df.to_csv(fft_results_file_path, index=False)
  341. # 返回结果文件路径和FFT图路径给客户端
  342. return jsonify({
  343. 'code': 200,
  344. 'msg': 'FFT analysis completed',
  345. 'fftResultsFilePath': fft_results_file_path,
  346. 'plotImagePath': plt_file_path
  347. })
  348. except Exception as e:
  349. return jsonify({
  350. 'code': 500,
  351. 'msg': str(e)
  352. })
  353. if __name__ == '__main__':
  354. app.run(debug=True, port=8081, host='0.0.0.0')