故障预测(封装).py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. # ## 1.PCA + K-means 聚类
  2. from flask import Flask, request, jsonify
  3. from sklearn.cluster import KMeans
  4. import scipy.special.cython_special
  5. app = Flask(__name__)
  6. def pca_kmeans_analysis(df):
  7. # 特征缩放
  8. scaler = StandardScaler()
  9. X_scaled = scaler.fit_transform(df)
  10. # PCA降维
  11. pca = PCA(n_components=2)
  12. X_pca = pca.fit_transform(X_scaled)
  13. # KMeans聚类
  14. kmeans = KMeans(n_clusters=2, n_init=10, random_state=42)
  15. kmeans.fit(X_pca)
  16. # 定义颜色列表
  17. colors = ['r', 'g', 'b'] # 为每个聚类指定颜色
  18. # 绘制结果
  19. fig, ax = plt.subplots(figsize=(15, 5)) # 使用1个子图显示结果
  20. if kmeans.n_clusters <= len(colors):
  21. # 绘制聚类结果
  22. for i in range(kmeans.n_clusters):
  23. cluster_members = X_pca[kmeans.labels_ == i]
  24. ax.scatter(cluster_members[:, 0], cluster_members[:, 1],
  25. c=colors[i], label=f'Cluster {i + 1}')
  26. # 绘制质心
  27. ax.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
  28. s=300, c='k', marker='x', label='Centroids')
  29. # 设置图表标题和图例
  30. ax.set_title('PCA and KMeans Clustering')
  31. ax.legend()
  32. else:
  33. print("聚类数量超过了预定义的颜色数量。")
  34. return fig, pca, kmeans
  35. @app.route('/pca_kmeans', methods=['POST'])
  36. def pca_kmeans():
  37. try:
  38. # 检查请求体中是否包含文件地址
  39. data = request.json
  40. if 'file_url' not in data:
  41. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  42. file_url = request.json.get('file_url')
  43. result_file_path = request.json.get('result_file_path')
  44. result_img_path = request.json.get('result_img_path')
  45. # 读取数据集
  46. df = pd.read_csv('{0}'.format(file_url))
  47. # 执行PCA和KMeans分析
  48. fig, pca, kmeans = pca_kmeans_analysis(df)
  49. # 保存图形
  50. plt.savefig('{0}'.format(result_img_path))
  51. plt.close() # 关闭图形以释放资源
  52. # 保存PCA结果和聚类标签到CSV
  53. pca_df = pd.DataFrame(data=pca.transform(df), columns=['PCA1', 'PCA2'])
  54. pca_df['Cluster'] = kmeans.labels_
  55. pca_df.to_csv(result_file_path, index=False)
  56. # 返回结果文件路径和PCA图路径给客户端
  57. return jsonify({
  58. 'code': 200,
  59. 'msg': 'PCA and KMeans analysis completed'
  60. })
  61. except Exception as e:
  62. return jsonify({
  63. 'code': 500,
  64. 'msg': str(e)
  65. })
  66. ##2.自编码器(AutoEncoder): 一种无监督神经网络模型,通过学习输入数据的有效表示来进行特征学习。
  67. def train_autoencoder(X_scaled):
  68. input_dim = X_scaled.shape[1] # 输入维度
  69. encoding_dim = 8 # 编码维度
  70. # 输入层
  71. input_layer = Input(shape=(input_dim,))
  72. # 编码层
  73. encoded = Dense(encoding_dim, activation='relu')(input_layer)
  74. # 解码层
  75. decoded = Dense(input_dim, activation='sigmoid')(encoded)
  76. # 创建自编码器模型
  77. autoencoder = Model(input_layer, decoded)
  78. # 编译模型
  79. autoencoder.compile(optimizer='adam', loss='mean_squared_error')
  80. # 训练自编码器
  81. autoencoder.fit(X_scaled, X_scaled, epochs=50, batch_size=256, shuffle=True)
  82. # 创建编码器模型
  83. encoder = Model(input_layer, encoded)
  84. # 编码然后解码数据
  85. X_encoded = encoder.predict(X_scaled)
  86. X_decoded = autoencoder.predict(X_encoded)
  87. # 计算重构误差
  88. reconstruction_error = np.mean(np.square(X_scaled - X_decoded), axis=1)
  89. return reconstruction_error, autoencoder
  90. import tensorflow as tf
  91. @app.route('/autoencoder', methods=['POST'])
  92. def autoencoder_anomaly_detection():
  93. try:
  94. # 检查请求体中是否包含文件地址
  95. data = request.json
  96. if 'file_url' not in data:
  97. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  98. file_url = request.json.get('file_url')
  99. result_file_path = request.json.get('result_file_path')
  100. result_img_path = request.json.get('result_img_path')
  101. # 读取数据集
  102. df = pd.read_csv('{0}'.format(file_url))
  103. # 特征缩放
  104. scaler = StandardScaler()
  105. X_scaled = scaler.fit_transform(df)
  106. # 训练自编码器并获取重构误差
  107. reconstruction_error, autoencoder = train_autoencoder(X_scaled)
  108. # 绘制重构误差的分布情况
  109. plt.hist(reconstruction_error, bins=50, color='blue', alpha=0.7)
  110. plt.xlabel('Reconstruction Error')
  111. plt.ylabel('Frequency')
  112. plt.title('Reconstruction Error Distribution')
  113. plt.savefig('{0}'.format(result_img_path))
  114. plt.close() # 关闭图形以释放资源
  115. # 保存重构误差到CSV
  116. reconstruction_error_df = pd.DataFrame(reconstruction_error, columns=['Reconstruction_Error'])
  117. reconstruction_error_df.to_csv(result_file_path, index=False)
  118. # 根据重构误差进行异常检测
  119. mean = np.mean(reconstruction_error)
  120. std = np.std(reconstruction_error)
  121. threshold = mean + 2 * std
  122. outliers = reconstruction_error > threshold
  123. # 异常点索引
  124. abnormal_indices = np.where(outliers)[0]
  125. # 返回结果文件路径、图路径和异常点索引给客户端
  126. return jsonify({
  127. 'code': 200,
  128. 'msg': 'Autoencoder training and anomaly detection completed',
  129. 'abnormalIndices': list(abnormal_indices)
  130. })
  131. except Exception as e:
  132. return jsonify({
  133. 'code': 500,
  134. 'msg': str(e)
  135. })
  136. ##3.高斯混合模型(GMM): 假设数据由多个高斯分布混合而成,使用EM算法来估计每个高斯分布的参数,从而进行聚类。
  137. from sklearn.mixture import GaussianMixture
  138. def perform_gmm_clustering(X_scaled):
  139. # PCA降维
  140. pca = PCA(n_components=2)
  141. X_pca = pca.fit_transform(X_scaled)
  142. # GMM聚类
  143. gmm = GaussianMixture(n_components=2, random_state=42)
  144. gmm.fit(X_pca)
  145. # 预测每个数据点的聚类标签
  146. predicted_labels = gmm.predict(X_pca)
  147. # 绘制结果
  148. plt.figure(figsize=(15, 5))
  149. plt.scatter(X_pca[:, 0], X_pca[:, 1], c=predicted_labels, cmap=plt.cm.viridis, label=['Cluster 1', 'Cluster 2'])
  150. plt.scatter(gmm.means_[:, 0], gmm.means_[:, 1], s=300, c='k', marker='x', label='Centroids')
  151. plt.title('Gaussian Mixture Model Clustering')
  152. plt.legend()
  153. return gmm, pca, plt
  154. @app.route('/gmm_clustering', methods=['POST'])
  155. def gmm_clustering():
  156. try:
  157. # 检查请求体中是否包含文件地址
  158. data = request.json
  159. if 'file_url' not in data:
  160. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  161. file_url = request.json.get('file_url')
  162. result_file_path = request.json.get('result_file_path')
  163. result_img_path = request.json.get('result_img_path')
  164. # 读取数据集
  165. df = pd.read_csv('{0}'.format(file_url))
  166. # 特征缩放
  167. scaler = StandardScaler()
  168. X_scaled = scaler.fit_transform(df)
  169. # 执行GMM聚类分析
  170. gmm, pca, plt_object = perform_gmm_clustering(X_scaled)
  171. # 保存图形
  172. plt_object.savefig(result_img_path)
  173. plt_object.close() # 关闭图形以释放资源
  174. # 保存GMM结果到CSV
  175. gmm_df = pd.DataFrame(data=pca.transform(X_scaled), columns=['PCA1', 'PCA2'])
  176. gmm_df['Cluster'] = gmm.predict(pca.transform(X_scaled))
  177. gmm_df.to_csv(result_file_path, index=False)
  178. # 返回结果文件路径和GMM聚类图路径给客户端
  179. return jsonify({
  180. 'code': 200,
  181. 'msg': 'GMM clustering completed',
  182. })
  183. except Exception as e:
  184. return jsonify({
  185. 'code': 500,
  186. 'msg': str(e)
  187. })
  188. ##4.孤立森林(Isolation Forest): 一种异常检测算法,通过随机选择特征和切分值来“孤立”异常点。
  189. from sklearn.ensemble import IsolationForest
  190. from sklearn.decomposition import PCA
  191. def perform_iforestation(X_scaled):
  192. # PCA降维
  193. pca = PCA(n_components=2)
  194. X_pca = pca.fit_transform(X_scaled)
  195. # 孤立森林异常检测
  196. iforest = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
  197. iforest.fit(X_scaled)
  198. # 预测异常分数
  199. scores = iforest.decision_function(X_scaled)
  200. # 将数据点标记为正常或异常
  201. predicted_labels = (scores < np.median(scores)).astype(int)
  202. # 绘制结果
  203. plt.figure(figsize=(15, 5))
  204. normal_color = 'green'
  205. anomaly_color = 'red'
  206. scatter = plt.scatter(X_pca[:, 0], X_pca[:, 1], c=[normal_color if label else anomaly_color for label in predicted_labels], label=['Normal', 'Anomaly'])
  207. plt.title('Isolation Forest Anomaly Detection')
  208. plt.legend()
  209. return pca, iforest, plt, predicted_labels
  210. @app.route('/iforestation', methods=['POST'])
  211. def iforestation():
  212. try:
  213. # 检查请求体中是否包含文件地址
  214. data = request.json
  215. if 'file_url' not in data:
  216. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  217. file_url = request.json.get('file_url')
  218. result_file_path = request.json.get('result_file_path')
  219. result_img_path = request.json.get('result_img_path')
  220. # 读取数据集
  221. df = pd.read_csv('{0}'.format(file_url))
  222. # 特征缩放
  223. scaler = StandardScaler()
  224. X_scaled = scaler.fit_transform(df)
  225. # 执行孤立森林分析
  226. pca, iforest, plt_object, predicted_labels = perform_iforestation(X_scaled)
  227. # 保存图形
  228. plt_object.savefig(result_img_path)
  229. plt_object.close() # 关闭图形以释放资源
  230. # 保存孤立森林结果到CSV
  231. iforest_df = pd.DataFrame({
  232. 'PCA1': pca.transform(X_scaled)[:, 0],
  233. 'PCA2': pca.transform(X_scaled)[:, 1],
  234. 'Anomaly_Score': iforest.decision_function(X_scaled),
  235. 'Anomaly_Label': predicted_labels
  236. })
  237. iforest_df.to_csv(result_file_path, index=False)
  238. # 返回结果文件路径和孤立森林图路径给客户端
  239. return jsonify({
  240. 'code': 200,
  241. 'msg': 'Isolation Forest analysis completed',
  242. })
  243. except Exception as e:
  244. return jsonify({
  245. 'code': 500,
  246. 'msg': str(e)
  247. })
  248. ##5.变分自编码器(Variational AutoEncoder, VAE): 一种生成模型,通过学习输入数据的概率表示来进行特征学习
  249. from flask import Flask, request, jsonify
  250. import pandas as pd
  251. import numpy as np
  252. import matplotlib.pyplot as plt
  253. from sklearn.preprocessing import StandardScaler
  254. from keras.layers import Input, Dense, Lambda, Layer, Reshape
  255. from keras.models import Model
  256. from keras import backend as K
  257. from keras.losses import binary_crossentropy
  258. import requests
  259. def build_vae_model(input_dim, intermediate_dim):
  260. input_layer = Input(shape=(input_dim,), name='encoder_input')
  261. x = Dense(intermediate_dim, activation='relu')(input_layer)
  262. z_mean = Dense(input_dim, name='z_mean')(x)
  263. z_log_var = Dense(input_dim, name='z_log_var')(x)
  264. def sampling(args):
  265. z_mean, z_log_var = args
  266. batch = K.shape(z_mean)[0]
  267. dim = K.int_shape(z_mean)[1]
  268. epsilon = K.random_normal(shape=(batch, dim))
  269. return z_mean + K.exp(0.5 * z_log_var) * epsilon
  270. z = Lambda(sampling, output_shape=(input_dim,), name='z')([z_mean, z_log_var])
  271. encoder = Model(input_layer, [z_mean, z_log_var, z], name='encoder')
  272. decoder_input = Input(shape=(input_dim,), name='decoder_input')
  273. x = Dense(intermediate_dim, activation='relu')(decoder_input)
  274. x = Dense(input_dim, activation='sigmoid')(x)
  275. decoder = Model(decoder_input, x, name='decoder')
  276. z_mean, z_log_var, z = encoder(input_layer)
  277. x_decoded = decoder(z)
  278. vae = Model(input_layer, [x_decoded, z_mean, z_log_var])
  279. reconstruction_loss = binary_crossentropy(input_layer, x_decoded)
  280. reconstruction_loss *= input_dim
  281. kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
  282. kl_loss = K.sum(kl_loss, axis=-1)
  283. kl_loss *= -0.5
  284. vae_loss = K.mean(reconstruction_loss + kl_loss)
  285. vae.add_loss(vae_loss)
  286. vae.compile(optimizer='adam')
  287. return vae, encoder, decoder
  288. @app.route('/vae', methods=['POST'])
  289. def vae_anomaly_detection():
  290. try:
  291. # 检查请求体中是否包含文件地址
  292. data = request.json
  293. if 'file_url' not in data:
  294. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  295. file_url = request.json.get('file_url')
  296. result_file_path = request.json.get('result_file_path')
  297. result_img_path = request.json.get('result_img_path')
  298. # 读取数据集
  299. df = pd.read_csv('{0}'.format(file_url))
  300. scaler = StandardScaler()
  301. X_scaled = scaler.fit_transform(df)
  302. input_dim = X_scaled.shape[1]
  303. intermediate_dim = 64
  304. vae, _, _ = build_vae_model(input_dim, intermediate_dim)
  305. vae.fit(X_scaled, epochs=50, batch_size=128, shuffle=True)
  306. X_decoded = vae.predict(X_scaled)[0]
  307. if X_decoded.ndim == 2:
  308. X_decoded = X_decoded.reshape(X_decoded.shape[0], -1)
  309. reconstruction_error = np.mean(np.square(X_scaled - X_decoded), axis=1)
  310. plt.figure(figsize=(10, 6))
  311. plt.hist(reconstruction_error, bins=50, color='blue', alpha=0.7)
  312. plt.xlabel('Reconstruction Error')
  313. plt.ylabel('Frequency')
  314. plt.title('Reconstruction Error Distribution')
  315. plt.savefig(result_img_path)
  316. plt.close()
  317. reconstruction_error_df = pd.DataFrame(
  318. reconstruction_error, columns=['Reconstruction_Error'])
  319. reconstruction_error_df.to_csv(result_file_path, index=False)
  320. mean = np.mean(reconstruction_error)
  321. std = np.std(reconstruction_error)
  322. threshold = mean + 2 * std
  323. outliers = reconstruction_error > threshold
  324. return jsonify({
  325. 'code': 200,
  326. 'msg': 'VAE training and anomaly detection completed',
  327. 'abnormalIndices': list(np.where(outliers)[0])
  328. })
  329. except Exception as e:
  330. return jsonify({
  331. 'code': 500,
  332. 'msg': str(e)
  333. })
  334. if __name__ == '__main__':
  335. app.run(debug=True, port=10003)