退化评估.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. # ## 1.特征值聚类分析:评估聚类结果的质量和是否存在退化。
  2. # # 轮廓系数的范围是[-1, 1],接近1表示聚类效果好,
  3. # # 肘部法则通过改变聚类的数量,计算不同聚类数的总内部平方和,然后选择一个点,在这个点之后WCSS下降的速率明显减慢,这个点就是合适的聚类数。
  4. # # Calinski-Harabasz指数衡量聚类间的分离度和聚类内的紧密度。CH指数越大,表示聚类效果越好。
  5. # # 戴维森堡丁指数衡量聚类内样本的相似度和聚类间样本的不相似度。DB指数越小,表示聚类效果越好。
  6. from flask import Flask, request, jsonify
  7. from sklearn.cluster import KMeans
  8. from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score
  9. import pandas as pd
  10. import matplotlib
  11. import matplotlib.pyplot as plt
  12. import os
  13. import requests
  14. app = Flask(__name__)
  15. @app.route('/cluster_analysis', methods=['POST'])
  16. def cluster_analysis():
  17. try:
  18. # 检查请求体中是否包含文件地址
  19. data = request.json
  20. if 'file_url' not in data:
  21. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  22. file_url = request.json.get('file_url')
  23. result_file_path = request.json.get('result_file_path')
  24. result_img_path = request.json.get('result_img_path')
  25. df = pd.read_csv('{0}'.format(file_url))
  26. # 应用 KMeans 聚类
  27. kmeans = KMeans(n_clusters=2, random_state=0).fit(df)
  28. df['cluster'] = kmeans.labels_
  29. # 肘部法则
  30. wcss = []
  31. for i in range(1, 11): # 尝试1到10个聚类
  32. kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
  33. kmeans.fit(df)
  34. wcss.append(kmeans.inertia_)
  35. # 可视化肘部法则
  36. plt.figure()
  37. plt.plot(range(1, 11), wcss)
  38. plt.title('Elbow Method')
  39. plt.xlabel('Number of clusters')
  40. plt.ylabel('WCSS')
  41. plt.savefig(result_img_path) # 保存图片
  42. # 计算轮廓系数
  43. silhouette_avg = silhouette_score(df, kmeans.labels_)
  44. # 计算戴维森堡丁指数
  45. db_index = davies_bouldin_score(df, kmeans.labels_)
  46. # 计算Calinski-Harabasz指数
  47. ch_index = calinski_harabasz_score(df, kmeans.labels_)
  48. # 创建一个包含计算结果的 DataFrame
  49. results_df = pd.DataFrame({
  50. 'Metric': ['Silhouette Coefficient', 'Davies-Bouldin Index', 'Calinski-Harabasz Index'],
  51. 'Value': [silhouette_avg, db_index, ch_index]
  52. })
  53. # 保存结果到 CSV 文件
  54. results_df.to_csv(result_file_path, index=False)
  55. # 返回结果文件路径给客户端
  56. return jsonify({
  57. 'code': 200,
  58. 'msg': 'Cluster analysis completed',
  59. })
  60. except Exception as e:
  61. return jsonify({
  62. 'code': 500,
  63. 'msg': str(e)
  64. })
  65. ## 2.基于统计的方法来评估退化:计算DataFrame中每一列的均值和标准差。并绘制每一列特征值的直方图
  66. def statistical_degradation(df):
  67. means = df.mean()
  68. std_devs = df.std()
  69. return means, std_devs
  70. def save_to_csv(means, std_devs, file_path):
  71. stats_df = pd.DataFrame({'Mean': means, 'Standard Deviation': std_devs})
  72. stats_df.to_csv(file_path, index=False)
  73. def plot_feature_distribution(df, means, std_devs,result_img_path):
  74. fig, axes = plt.subplots(nrows=len(df.columns), ncols=1, figsize=(10, 5 * len(df.columns)))
  75. for i, (column, ax) in enumerate(zip(df.columns, axes)):
  76. df[column].plot(kind='hist', ax=ax, bins=20, alpha=0.5)
  77. ax.axvline(means[i], color='r', linestyle='--', label=f'Mean: {means[i]:.2f}')
  78. ax.axvline(means[i] - std_devs[i], color='g', linestyle='-', label=f'-1 Std Dev')
  79. ax.axvline(means[i] + std_devs[i], color='g', linestyle='-')
  80. ax.set_title(f'Distribution of {column}')
  81. ax.legend(loc='upper left')
  82. if i != len(df.columns) - 1:
  83. ax.xaxis.set_visible(False)
  84. if i == 0:
  85. ax.set_ylabel('Frequency')
  86. else:
  87. ax.yaxis.set_visible(False)
  88. fig.tight_layout()
  89. plt.savefig(result_img_path)
  90. @app.route('/analyze_features', methods=['POST'])
  91. def analyze_features():
  92. try:
  93. # 检查请求体中是否包含文件地址
  94. data = request.json
  95. if 'file_url' not in data:
  96. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  97. file_url = request.json.get('file_url')
  98. result_file_path = request.json.get('result_file_path')
  99. result_img_path = request.json.get('result_img_path')
  100. df = pd.read_csv('{0}'.format(file_url))
  101. # 计算统计数据
  102. means, std_devs = statistical_degradation(df)
  103. save_to_csv(means, std_devs, result_file_path)
  104. # 绘制特征值分布图
  105. plot_feature_distribution(df, means, std_devs,result_img_path)
  106. # 返回结果文件路径给客户端
  107. return jsonify({
  108. 'code': 200,
  109. 'msg': 'Feature analysis completed',
  110. })
  111. except Exception as e:
  112. return jsonify({
  113. 'code': 500,
  114. 'msg': str(e)
  115. })
  116. # # ## 3.基于趋势分析法:分析特征值随时间的趋势,如果斜率显著不为零,则可能表明退化。
  117. from flask import Flask, request, jsonify
  118. from sklearn.linear_model import LinearRegression
  119. def trend_analysis(df, some_threshold):
  120. results_list = []
  121. for column in df.columns:
  122. X = np.arange(len(df[column])).reshape(-1, 1)
  123. y = df[column].values
  124. model = LinearRegression().fit(X, y)
  125. slope = model.coef_[0]
  126. results_list.append({'Feature': column, 'Slope': slope, 'Significant': abs(slope) > some_threshold})
  127. results_df = pd.DataFrame(results_list)
  128. return results_df
  129. @app.route('/trend_analysis', methods=['POST'])
  130. def trend_analysis_endpoint():
  131. try:
  132. # 检查请求体中是否包含文件地址
  133. data = request.json
  134. if 'file_url' not in data:
  135. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  136. file_url = request.json.get('file_url')
  137. result_file_path = request.json.get('result_file_path')
  138. result_img_path = request.json.get('result_img_path')
  139. df = pd.read_csv('{0}'.format(file_url))
  140. # 设置斜率显著性的阈值
  141. some_threshold = 0.01
  142. # 进行趋势分析并获取结果
  143. slopes = trend_analysis(df, some_threshold)
  144. # 保存结果到CSV文件
  145. slopes.to_csv(result_file_path, index=False)
  146. # 筛选出显著的特征
  147. significant_columns = slopes[slopes['Significant'] == True]['Feature']
  148. num_features = len(significant_columns)
  149. # 动态设置图形的高度,每个子图的高度为4英寸
  150. plt.figure(figsize=(15, 4 * num_features)) # 总宽度15英寸,高度根据特征数量自适应
  151. for i, column in enumerate(significant_columns):
  152. plt.subplot(num_features, 1, i+1) # 创建子图
  153. plt.scatter(range(len(df)), df[column], label='Data')
  154. significant_slope = slopes[slopes['Feature'] == column]['Slope'].values[0]
  155. plt.plot(range(len(df)), significant_slope * np.arange(len(df)) + df[column].iloc[0],
  156. color='red', label=f'Trend line with slope {significant_slope:.4f}')
  157. plt.xlabel('Time')
  158. plt.ylabel(column)
  159. plt.title(f'Trend Analysis for {column}')
  160. plt.legend()
  161. plt.tight_layout() # 调整子图布局以避免重叠
  162. plt.savefig(result_img_path)
  163. plt.close() # 关闭图形以释放资源
  164. # 返回结果文件路径和趋势图路径给客户端
  165. return jsonify({
  166. 'code': 200,
  167. 'msg': 'Trend analysis completed',
  168. })
  169. except Exception as e:
  170. return jsonify({
  171. 'code': 500,
  172. 'msg': str(e)
  173. })
  174. # # ## 4.基于时间序列分析的方法:识别特征值的周期性变化或异常模式。
  175. from statsmodels.tsa.arima.model import ARIMA
  176. import warnings
  177. def time_series_degradation_multicolumn(df):
  178. aic_values = pd.Series()
  179. for column in df.columns:
  180. data = df[column].values
  181. model = ARIMA(data, order=(1, 1, 1))
  182. with warnings.catch_warnings():
  183. warnings.filterwarnings('ignore', category=FutureWarning)
  184. results = model.fit()
  185. aic_value = results.aic
  186. # print(f"AIC for {column}:", aic_value) # 可在Flask日志中输出此信息
  187. aic_values[column] = aic_value
  188. return aic_values
  189. @app.route('/time_series_analysis', methods=['POST'])
  190. def time_series_analysis():
  191. try:
  192. # 检查请求体中是否包含文件地址
  193. data = request.json
  194. if 'file_url' not in data:
  195. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  196. file_url = request.json.get('file_url')
  197. result_file_path = request.json.get('result_file_path')
  198. result_img_path = request.json.get('result_img_path')
  199. df = pd.read_csv('{0}'.format(file_url))
  200. # 进行多变量时间序列退化评估
  201. aic_values = time_series_degradation_multicolumn(df)
  202. # 将AIC值保存到CSV文件
  203. aic_values.to_csv(result_file_path, index=True)
  204. # 选择AIC值最高的前N个特征
  205. N = 10
  206. top_features = aic_values.sort_values(ascending=False).index[:N]
  207. # 设置图形和子图的布局
  208. num_features = len(top_features)
  209. fig, axes = plt.subplots(num_features, 1, figsize=(10, 4 * num_features), sharex=True)
  210. for i, column in enumerate(top_features):
  211. df[column].plot(ax=axes[i], label=column)
  212. axes[i].set_title(f'Time Series Plot for {column}')
  213. axes[i].set_xlabel('Time')
  214. axes[i].set_ylabel(column)
  215. axes[i].legend()
  216. # 如果只有一个特征,axes可能不是数组,需要检查并相应地调整
  217. if num_features == 1:
  218. axes.legend()
  219. plt.tight_layout()
  220. plt.savefig(result_img_path)
  221. plt.close() # 关闭图形以释放资源
  222. # 返回结果文件路径和时间序列图路径给客户端
  223. return jsonify({
  224. 'code': 200,
  225. 'msg': 'Time series analysis completed',
  226. })
  227. except Exception as e:
  228. return jsonify({
  229. 'code': 500,
  230. 'msg': str(e)
  231. })
  232. # #5.频域分析:通过傅里叶变换分析信号的频率成分,识别异常频率成分可能表明的退化。
  233. from flask import Flask, request, jsonify
  234. import numpy as np
  235. import pandas as pd
  236. import matplotlib.pyplot as plt
  237. import requests
  238. def perform_fft_analysis(df):
  239. n_columns = df.shape[1]
  240. fig, axes = plt.subplots(n_columns, 1, figsize=(10, 4 * n_columns))
  241. fft_results = []
  242. for i, column in enumerate(df.columns):
  243. data = df[column].values
  244. fft = np.fft.fft(data)
  245. frequencies = np.fft.fftfreq(len(data), d=1)
  246. peak_frequency_index = np.argmax(np.abs(fft))
  247. peak_frequency = frequencies[peak_frequency_index]
  248. peak_amplitude = np.abs(fft[peak_frequency_index])
  249. axes[i].plot(frequencies, np.abs(fft))
  250. axes[i].set_title(f'Frequency Spectrum of {column}')
  251. axes[i].set_xlabel('Frequency (Hz)')
  252. axes[i].set_ylabel('Amplitude')
  253. axes[i].grid(True)
  254. fft_results.append({
  255. 'Feature': column,
  256. 'Peak Frequency (Hz)': peak_frequency,
  257. 'Peak Amplitude': peak_amplitude
  258. })
  259. plt.tight_layout()
  260. plt.subplots_adjust(hspace=0.5)
  261. return fig, fft_results
  262. @app.route('/fft_analysis', methods=['POST'])
  263. def fft_analysis():
  264. try:
  265. # 检查请求体中是否包含文件地址
  266. data = request.json
  267. if 'file_url' not in data:
  268. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  269. file_url = request.json.get('file_url')
  270. result_file_path = request.json.get('result_file_path')
  271. result_img_path = request.json.get('result_img_path')
  272. df = pd.read_csv('{0}'.format(file_url))
  273. # 执行FFT分析
  274. fig, fft_results = perform_fft_analysis(df)
  275. # 保存图形
  276. plt.savefig(result_img_path)
  277. plt.close() # 关闭图形以释放资源
  278. # 将FFT结果保存到CSV
  279. fft_df = pd.DataFrame(fft_results)
  280. fft_df.to_csv(result_file_path, index=False)
  281. # 返回结果文件路径和FFT图路径给客户端
  282. return jsonify({
  283. 'code': 200,
  284. 'msg': 'FFT analysis completed',
  285. })
  286. except Exception as e:
  287. return jsonify({
  288. 'code': 500,
  289. 'msg': str(e)
  290. })
  291. if __name__ == '__main__':
  292. app.run(debug=True, port=10005, host='0.0.0.0')