故障预测.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. # ## 1.PCA + K-means 聚类
  2. from flask import Flask, request, jsonify
  3. import pandas as pd
  4. import matplotlib.pyplot as plt
  5. from sklearn.decomposition import PCA
  6. from sklearn.cluster import KMeans
  7. from sklearn.preprocessing import StandardScaler
  8. import requests
  9. app = Flask(__name__)
  10. def pca_kmeans_analysis(df):
  11. # 特征缩放
  12. scaler = StandardScaler()
  13. X_scaled = scaler.fit_transform(df)
  14. # PCA降维
  15. pca = PCA(n_components=2)
  16. X_pca = pca.fit_transform(X_scaled)
  17. # KMeans聚类
  18. kmeans = KMeans(n_clusters=2, n_init=10, random_state=42)
  19. kmeans.fit(X_pca)
  20. # 定义颜色列表
  21. colors = ['r', 'g', 'b'] # 为每个聚类指定颜色
  22. # 绘制结果
  23. fig, ax = plt.subplots(figsize=(15, 5)) # 使用1个子图显示结果
  24. if kmeans.n_clusters <= len(colors):
  25. # 绘制聚类结果
  26. for i in range(kmeans.n_clusters):
  27. cluster_members = X_pca[kmeans.labels_ == i]
  28. ax.scatter(cluster_members[:, 0], cluster_members[:, 1],
  29. c=colors[i], label=f'Cluster {i + 1}')
  30. # 绘制质心
  31. ax.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
  32. s=300, c='k', marker='x', label='Centroids')
  33. # 设置图表标题和图例
  34. ax.set_title('PCA and KMeans Clustering')
  35. ax.legend()
  36. else:
  37. print("聚类数量超过了预定义的颜色数量。")
  38. return fig, pca, kmeans
  39. @app.route('/pca_kmeans', methods=['POST'])
  40. def pca_kmeans():
  41. try:
  42. # 检查请求体中是否包含文件地址
  43. data = request.json
  44. if 'file_url' not in data:
  45. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  46. file_url = request.json.get('file_url')
  47. result_file_path = request.json.get('result_file_path')
  48. result_img_path = request.json.get('result_img_path')
  49. # 读取数据集
  50. df = pd.read_csv('{0}'.format(file_url))
  51. # 执行PCA和KMeans分析
  52. fig, pca, kmeans = pca_kmeans_analysis(df)
  53. # 保存图形
  54. plt.savefig('{0}'.format(result_img_path))
  55. plt.close() # 关闭图形以释放资源
  56. # 保存PCA结果和聚类标签到CSV
  57. pca_df = pd.DataFrame(data=pca.transform(df), columns=['PCA1', 'PCA2'])
  58. pca_df['Cluster'] = kmeans.labels_
  59. pca_df.to_csv(result_file_path, index=False)
  60. # 返回结果文件路径和PCA图路径给客户端
  61. return jsonify({
  62. 'code': 200,
  63. 'msg': 'PCA and KMeans analysis completed',
  64. })
  65. except Exception as e:
  66. return jsonify({
  67. 'code': 500,
  68. 'msg': str(e)
  69. })
  70. ##2.自编码器(AutoEncoder): 一种无监督神经网络模型,通过学习输入数据的有效表示来进行特征学习。
  71. def train_autoencoder(X_scaled):
  72. input_dim = X_scaled.shape[1] # 输入维度
  73. encoding_dim = 8 # 编码维度
  74. # 输入层
  75. input_layer = Input(shape=(input_dim,))
  76. # 编码层
  77. encoded = Dense(encoding_dim, activation='relu')(input_layer)
  78. # 解码层
  79. decoded = Dense(input_dim, activation='sigmoid')(encoded)
  80. # 创建自编码器模型
  81. autoencoder = Model(input_layer, decoded)
  82. # 编译模型
  83. autoencoder.compile(optimizer='adam', loss='mean_squared_error')
  84. # 训练自编码器
  85. autoencoder.fit(X_scaled, X_scaled, epochs=50, batch_size=256, shuffle=True)
  86. # 创建编码器模型
  87. encoder = Model(input_layer, encoded)
  88. # 编码然后解码数据
  89. X_encoded = encoder.predict(X_scaled)
  90. X_decoded = autoencoder.predict(X_encoded)
  91. # 计算重构误差
  92. reconstruction_error = np.mean(np.square(X_scaled - X_decoded), axis=1)
  93. return reconstruction_error, autoencoder
  94. import tensorflow as tf
  95. graph = tf.get_default_graph()
  96. @app.route('/autoencoder', methods=['POST'])
  97. def autoencoder_anomaly_detection():
  98. try:
  99. global graph
  100. # 检查请求体中是否包含文件地址
  101. data = request.json
  102. if 'file_url' not in data:
  103. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  104. file_url = request.json.get('file_url')
  105. result_file_path = request.json.get('result_file_path')
  106. result_img_path = request.json.get('result_img_path')
  107. # 读取数据集
  108. df = pd.read_csv('{0}'.format(file_url))
  109. # 特征缩放
  110. scaler = StandardScaler()
  111. X_scaled = scaler.fit_transform(df)
  112. # 训练自编码器并获取重构误差
  113. reconstruction_error, autoencoder = train_autoencoder(X_scaled)
  114. # 绘制重构误差的分布情况
  115. plt.hist(reconstruction_error, bins=50, color='blue', alpha=0.7)
  116. plt.xlabel('Reconstruction Error')
  117. plt.ylabel('Frequency')
  118. plt.title('Reconstruction Error Distribution')
  119. plt.savefig('{0}'.format(result_img_path))
  120. plt.close() # 关闭图形以释放资源
  121. # 保存重构误差到CSV
  122. reconstruction_error_df = pd.DataFrame(reconstruction_error, columns=['Reconstruction_Error'])
  123. reconstruction_error_df.to_csv(result_file_path, index=False)
  124. # 根据重构误差进行异常检测
  125. mean = np.mean(reconstruction_error)
  126. std = np.std(reconstruction_error)
  127. threshold = mean + 2 * std
  128. outliers = reconstruction_error > threshold
  129. # 异常点索引
  130. abnormal_indices = np.where(outliers)[0]
  131. # 返回结果文件路径、图路径和异常点索引给客户端
  132. return jsonify({
  133. 'code': 200,
  134. 'msg': 'Autoencoder training and anomaly detection completed',
  135. 'abnormalIndices': list(abnormal_indices)
  136. })
  137. except Exception as e:
  138. return jsonify({
  139. 'code': 500,
  140. 'msg': str(e)
  141. })
  142. ##3.高斯混合模型(GMM): 假设数据由多个高斯分布混合而成,使用EM算法来估计每个高斯分布的参数,从而进行聚类。
  143. from sklearn.mixture import GaussianMixture
  144. def perform_gmm_clustering(X_scaled):
  145. # PCA降维
  146. pca = PCA(n_components=2)
  147. X_pca = pca.fit_transform(X_scaled)
  148. # GMM聚类
  149. gmm = GaussianMixture(n_components=2, random_state=42)
  150. gmm.fit(X_pca)
  151. # 预测每个数据点的聚类标签
  152. predicted_labels = gmm.predict(X_pca)
  153. # 绘制结果
  154. plt.figure(figsize=(15, 5))
  155. plt.scatter(X_pca[:, 0], X_pca[:, 1], c=predicted_labels, cmap=plt.cm.viridis, label=['Cluster 1', 'Cluster 2'])
  156. plt.scatter(gmm.means_[:, 0], gmm.means_[:, 1], s=300, c='k', marker='x', label='Centroids')
  157. plt.title('Gaussian Mixture Model Clustering')
  158. plt.legend()
  159. return gmm, pca, plt
  160. @app.route('/gmm_clustering', methods=['POST'])
  161. def gmm_clustering():
  162. try:
  163. # 检查请求体中是否包含文件地址
  164. data = request.json
  165. if 'file_url' not in data:
  166. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  167. file_url = request.json.get('file_url')
  168. result_file_path = request.json.get('result_file_path')
  169. result_img_path = request.json.get('result_img_path')
  170. # 读取数据集
  171. df = pd.read_csv('{0}'.format(file_url))
  172. # 特征缩放
  173. scaler = StandardScaler()
  174. X_scaled = scaler.fit_transform(df)
  175. # 执行GMM聚类分析
  176. gmm, pca, plt_object = perform_gmm_clustering(X_scaled)
  177. # 保存图形
  178. plt_object.savefig(result_img_path)
  179. plt_object.close() # 关闭图形以释放资源
  180. # 保存GMM结果到CSV
  181. gmm_df = pd.DataFrame(data=pca.transform(X_scaled), columns=['PCA1', 'PCA2'])
  182. gmm_df['Cluster'] = gmm.predict(pca.transform(X_scaled))
  183. gmm_df.to_csv(result_file_path, index=False)
  184. # 返回结果文件路径和GMM聚类图路径给客户端
  185. return jsonify({
  186. 'code': 200,
  187. 'msg': 'GMM clustering completed',
  188. })
  189. except Exception as e:
  190. return jsonify({
  191. 'code': 500,
  192. 'msg': str(e)
  193. })
  194. ##4.孤立森林(Isolation Forest): 一种异常检测算法,通过随机选择特征和切分值来“孤立”异常点。
  195. from sklearn.ensemble import IsolationForest
  196. from sklearn.decomposition import PCA
  197. def perform_iforestation(X_scaled):
  198. # PCA降维
  199. pca = PCA(n_components=2)
  200. X_pca = pca.fit_transform(X_scaled)
  201. # 孤立森林异常检测
  202. iforest = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
  203. iforest.fit(X_scaled)
  204. # 预测异常分数
  205. scores = iforest.decision_function(X_scaled)
  206. # 将数据点标记为正常或异常
  207. predicted_labels = (scores < np.median(scores)).astype(int)
  208. # 绘制结果
  209. plt.figure(figsize=(15, 5))
  210. normal_color = 'green'
  211. anomaly_color = 'red'
  212. scatter = plt.scatter(X_pca[:, 0], X_pca[:, 1], c=[normal_color if label else anomaly_color for label in predicted_labels], label=['Normal', 'Anomaly'])
  213. plt.title('Isolation Forest Anomaly Detection')
  214. plt.legend()
  215. return pca, iforest, plt, predicted_labels
  216. @app.route('/iforestation', methods=['POST'])
  217. def iforestation():
  218. try:
  219. # 检查请求体中是否包含文件地址
  220. data = request.json
  221. if 'file_url' not in data:
  222. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  223. file_url = request.json.get('file_url')
  224. result_file_path = request.json.get('result_file_path')
  225. result_img_path = request.json.get('result_img_path')
  226. # 读取数据集
  227. df = pd.read_csv('{0}'.format(file_url))
  228. # 特征缩放
  229. scaler = StandardScaler()
  230. X_scaled = scaler.fit_transform(df)
  231. # 执行孤立森林分析
  232. pca, iforest, plt_object, predicted_labels = perform_iforestation(X_scaled)
  233. # 保存图形
  234. plt_object.savefig(result_img_path)
  235. plt_object.close() # 关闭图形以释放资源
  236. # 保存孤立森林结果到CSV
  237. iforest_df = pd.DataFrame({
  238. 'PCA1': pca.transform(X_scaled)[:, 0],
  239. 'PCA2': pca.transform(X_scaled)[:, 1],
  240. 'Anomaly_Score': iforest.decision_function(X_scaled),
  241. 'Anomaly_Label': predicted_labels
  242. })
  243. iforest_df.to_csv(result_file_path, index=False)
  244. # 返回结果文件路径和孤立森林图路径给客户端
  245. return jsonify({
  246. 'code': 200,
  247. 'msg': 'Isolation Forest analysis completed',
  248. })
  249. except Exception as e:
  250. return jsonify({
  251. 'code': 500,
  252. 'msg': str(e)
  253. })
  254. ##5.变分自编码器(Variational AutoEncoder, VAE): 一种生成模型,通过学习输入数据的概率表示来进行特征学习
  255. from flask import Flask, request, jsonify
  256. import pandas as pd
  257. import numpy as np
  258. import matplotlib.pyplot as plt
  259. from sklearn.preprocessing import StandardScaler
  260. from keras.layers import Input, Dense, Lambda, Layer, Reshape
  261. from keras.models import Model
  262. from keras import backend as K
  263. from keras.losses import binary_crossentropy
  264. import requests
  265. def build_vae_model(input_dim, intermediate_dim):
  266. input_layer = Input(shape=(input_dim,), name='encoder_input')
  267. x = Dense(intermediate_dim, activation='relu')(input_layer)
  268. z_mean = Dense(input_dim, name='z_mean')(x)
  269. z_log_var = Dense(input_dim, name='z_log_var')(x)
  270. def sampling(args):
  271. z_mean, z_log_var = args
  272. batch = K.shape(z_mean)[0]
  273. dim = K.int_shape(z_mean)[1]
  274. epsilon = K.random_normal(shape=(batch, dim))
  275. return z_mean + K.exp(0.5 * z_log_var) * epsilon
  276. z = Lambda(sampling, output_shape=(input_dim,), name='z')([z_mean, z_log_var])
  277. encoder = Model(input_layer, [z_mean, z_log_var, z], name='encoder')
  278. decoder_input = Input(shape=(input_dim,), name='decoder_input')
  279. x = Dense(intermediate_dim, activation='relu')(decoder_input)
  280. x = Dense(input_dim, activation='sigmoid')(x)
  281. decoder = Model(decoder_input, x, name='decoder')
  282. z_mean, z_log_var, z = encoder(input_layer)
  283. x_decoded = decoder(z)
  284. vae = Model(input_layer, [x_decoded, z_mean, z_log_var])
  285. reconstruction_loss = binary_crossentropy(input_layer, x_decoded)
  286. reconstruction_loss *= input_dim
  287. kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
  288. kl_loss = K.sum(kl_loss, axis=-1)
  289. kl_loss *= -0.5
  290. vae_loss = K.mean(reconstruction_loss + kl_loss)
  291. vae.add_loss(vae_loss)
  292. vae.compile(optimizer='adam')
  293. return vae, encoder, decoder
  294. @app.route('/vae', methods=['POST'])
  295. def vae_anomaly_detection():
  296. try:
  297. # 检查请求体中是否包含文件地址
  298. data = request.json
  299. if 'file_url' not in data:
  300. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  301. file_url = request.json.get('file_url')
  302. result_file_path = request.json.get('result_file_path')
  303. result_img_path = request.json.get('result_img_path')
  304. # 读取数据集
  305. df = pd.read_csv('{0}'.format(file_url))
  306. scaler = StandardScaler()
  307. X_scaled = scaler.fit_transform(df)
  308. input_dim = X_scaled.shape[1]
  309. intermediate_dim = 64
  310. vae, _, _ = build_vae_model(input_dim, intermediate_dim)
  311. vae.fit(X_scaled, epochs=50, batch_size=128, shuffle=True)
  312. X_decoded = vae.predict(X_scaled)[0]
  313. if X_decoded.ndim == 2:
  314. X_decoded = X_decoded.reshape(X_decoded.shape[0], -1)
  315. reconstruction_error = np.mean(np.square(X_scaled - X_decoded), axis=1)
  316. plt.figure(figsize=(10, 6))
  317. plt.hist(reconstruction_error, bins=50, color='blue', alpha=0.7)
  318. plt.xlabel('Reconstruction Error')
  319. plt.ylabel('Frequency')
  320. plt.title('Reconstruction Error Distribution')
  321. plt.savefig(result_img_path)
  322. plt.close()
  323. reconstruction_error_df = pd.DataFrame(
  324. reconstruction_error, columns=['Reconstruction_Error'])
  325. reconstruction_error_df.to_csv(result_file_path, index=False)
  326. mean = np.mean(reconstruction_error)
  327. std = np.std(reconstruction_error)
  328. threshold = mean + 2 * std
  329. outliers = reconstruction_error > threshold
  330. return jsonify({
  331. 'code': 200,
  332. 'msg': 'VAE training and anomaly detection completed',
  333. 'abnormalIndices': list(np.where(outliers)[0])
  334. })
  335. except Exception as e:
  336. return jsonify({
  337. 'code': 500,
  338. 'msg': str(e)
  339. })
  340. if __name__ == '__main__':
  341. app.run(debug=True, port=10003, host='0.0.0.0')