故障预测.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. # ## 1.PCA + K-means 聚类
  2. from flask import Flask, request, jsonify
  3. import pandas as pd
  4. import matplotlib.pyplot as plt
  5. from sklearn.decomposition import PCA
  6. from sklearn.cluster import KMeans
  7. from sklearn.preprocessing import StandardScaler
  8. import requests
  9. app = Flask(__name__)
  10. def pca_kmeans_analysis(df):
  11. # 特征缩放
  12. scaler = StandardScaler()
  13. X_scaled = scaler.fit_transform(df)
  14. # PCA降维
  15. pca = PCA(n_components=2)
  16. X_pca = pca.fit_transform(X_scaled)
  17. # KMeans聚类
  18. kmeans = KMeans(n_clusters=2, n_init=10, random_state=42)
  19. kmeans.fit(X_pca)
  20. # 定义颜色列表
  21. colors = ['r', 'g', 'b'] # 为每个聚类指定颜色
  22. # 绘制结果
  23. fig, ax = plt.subplots(figsize=(15, 5)) # 使用1个子图显示结果
  24. if kmeans.n_clusters <= len(colors):
  25. # 绘制聚类结果
  26. for i in range(kmeans.n_clusters):
  27. cluster_members = X_pca[kmeans.labels_ == i]
  28. ax.scatter(cluster_members[:, 0], cluster_members[:, 1],
  29. c=colors[i], label=f'Cluster {i + 1}')
  30. # 绘制质心
  31. ax.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
  32. s=300, c='k', marker='x', label='Centroids')
  33. # 设置图表标题和图例
  34. ax.set_title('PCA and KMeans Clustering')
  35. ax.legend()
  36. else:
  37. print("聚类数量超过了预定义的颜色数量。")
  38. return fig, pca, kmeans
  39. @app.route('/pca_kmeans', methods=['POST'])
  40. def pca_kmeans():
  41. try:
  42. # 检查请求体中是否包含文件地址
  43. data = request.json
  44. if 'file_url' not in data:
  45. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  46. file_url = data['file_url']
  47. # 使用requests获取文件内容
  48. response = requests.get(file_url)
  49. if response.status_code != 200:
  50. return jsonify({'code': 500, 'msg': 'Failed to retrieve file from URL'})
  51. # 读取数据集
  52. df = pd.read_csv(pd.compat.StringIO(response.text), header=None)
  53. # 执行PCA和KMeans分析
  54. fig, pca, kmeans = pca_kmeans_analysis(df)
  55. # 保存图形
  56. plt_file_path = 'PCA and KMeans.jpg' # 保存PCA和KMeans图的路径
  57. plt.savefig(plt_file_path)
  58. plt.close() # 关闭图形以释放资源
  59. # 保存PCA结果和聚类标签到CSV
  60. pca_df = pd.DataFrame(data=pca.transform(df), columns=['PCA1', 'PCA2'])
  61. pca_df['Cluster'] = kmeans.labels_
  62. output_csv_path = 'PCA_Results.csv' # 保存PCA结果的路径
  63. pca_df.to_csv(output_csv_path, index=False)
  64. # 返回结果文件路径和PCA图路径给客户端
  65. return jsonify({
  66. 'code': 200,
  67. 'msg': 'PCA and KMeans analysis completed',
  68. 'outputCsvPath': output_csv_path,
  69. 'plotImagePath': plt_file_path
  70. })
  71. except Exception as e:
  72. return jsonify({
  73. 'code': 500,
  74. 'msg': str(e)
  75. })
  76. if __name__ == '__main__':
  77. app.run(debug=True, port=8081, host='0.0.0.0')
  78. ##2.自编码器(AutoEncoder): 一种无监督神经网络模型,通过学习输入数据的有效表示来进行特征学习。
  79. from flask import Flask, request, jsonify
  80. import pandas as pd
  81. import numpy as np
  82. import matplotlib.pyplot as plt
  83. from sklearn.preprocessing import StandardScaler
  84. from keras.layers import Input, Dense
  85. from keras.models import Model
  86. import requests
  87. app = Flask(__name__)
  88. def train_autoencoder(X_scaled):
  89. input_dim = X_scaled.shape[1] # 输入维度
  90. encoding_dim = 8 # 编码维度
  91. # 输入层
  92. input_layer = Input(shape=(input_dim,))
  93. # 编码层
  94. encoded = Dense(encoding_dim, activation='relu')(input_layer)
  95. # 解码层
  96. decoded = Dense(input_dim, activation='sigmoid')(encoded)
  97. # 创建自编码器模型
  98. autoencoder = Model(input_layer, decoded)
  99. # 编译模型
  100. autoencoder.compile(optimizer='adam', loss='mean_squared_error')
  101. # 训练自编码器
  102. autoencoder.fit(X_scaled, X_scaled, epochs=50, batch_size=256, shuffle=True)
  103. # 创建编码器模型
  104. encoder = Model(input_layer, encoded)
  105. # 编码然后解码数据
  106. X_encoded = encoder.predict(X_scaled)
  107. X_decoded = autoencoder.predict(X_encoded)
  108. # 计算重构误差
  109. reconstruction_error = np.mean(np.square(X_scaled - X_decoded), axis=1)
  110. return reconstruction_error, autoencoder
  111. @app.route('/autoencoder', methods=['POST'])
  112. def autoencoder_anomaly_detection():
  113. try:
  114. # 检查请求体中是否包含文件地址
  115. data = request.json
  116. if 'file_url' not in data:
  117. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  118. file_url = data['file_url']
  119. # 使用requests获取文件内容
  120. response = requests.get(file_url)
  121. if response.status_code != 200:
  122. return jsonify({'code': 500, 'msg': 'Failed to retrieve file from URL'})
  123. # 读取数据集
  124. df = pd.read_csv(pd.compat.StringIO(response.text), header=None)
  125. # 特征缩放
  126. scaler = StandardScaler()
  127. X_scaled = scaler.fit_transform(df)
  128. # 训练自编码器并获取重构误差
  129. reconstruction_error, autoencoder = train_autoencoder(X_scaled)
  130. # 绘制重构误差的分布情况
  131. plt.hist(reconstruction_error, bins=50, color='blue', alpha=0.7)
  132. plt.xlabel('Reconstruction Error')
  133. plt.ylabel('Frequency')
  134. plt.title('Reconstruction Error Distribution')
  135. plt_file_path = 'Reconstruction_Error.jpg' # 保存图的路径
  136. plt.savefig(plt_file_path)
  137. plt.close() # 关闭图形以释放资源
  138. # 保存重构误差到CSV
  139. reconstruction_error_df = pd.DataFrame(reconstruction_error, columns=['Reconstruction_Error'])
  140. output_csv_path = 'Reconstruction_Error.csv' # 保存CSV的路径
  141. reconstruction_error_df.to_csv(output_csv_path, index=False)
  142. # 根据重构误差进行异常检测
  143. mean = np.mean(reconstruction_error)
  144. std = np.std(reconstruction_error)
  145. threshold = mean + 2 * std
  146. outliers = reconstruction_error > threshold
  147. # 异常点索引
  148. abnormal_indices = np.where(outliers)[0]
  149. # 返回结果文件路径、图路径和异常点索引给客户端
  150. return jsonify({
  151. 'code': 200,
  152. 'msg': 'Autoencoder training and anomaly detection completed',
  153. 'reconstructionErrorCsvPath': output_csv_path,
  154. 'plotImagePath': plt_file_path,
  155. 'abnormalIndices': list(abnormal_indices)
  156. })
  157. except Exception as e:
  158. return jsonify({
  159. 'code': 500,
  160. 'msg': str(e)
  161. })
  162. if __name__ == '__main__':
  163. app.run(debug=True, port=8081, host='0.0.0.0')
  164. ##3.高斯混合模型(GMM): 假设数据由多个高斯分布混合而成,使用EM算法来估计每个高斯分布的参数,从而进行聚类。
  165. from flask import Flask, request, jsonify
  166. import pandas as pd
  167. import matplotlib.pyplot as plt
  168. from sklearn.preprocessing import StandardScaler
  169. from sklearn.mixture import GaussianMixture
  170. from sklearn.decomposition import PCA
  171. import requests
  172. app = Flask(__name__)
  173. def perform_gmm_clustering(X_scaled):
  174. # PCA降维
  175. pca = PCA(n_components=2)
  176. X_pca = pca.fit_transform(X_scaled)
  177. # GMM聚类
  178. gmm = GaussianMixture(n_components=2, random_state=42)
  179. gmm.fit(X_pca)
  180. # 预测每个数据点的聚类标签
  181. predicted_labels = gmm.predict(X_pca)
  182. # 绘制结果
  183. plt.figure(figsize=(15, 5))
  184. plt.scatter(X_pca[:, 0], X_pca[:, 1], c=predicted_labels, cmap=plt.cm.viridis, label=['Cluster 1', 'Cluster 2'])
  185. plt.scatter(gmm.means_[:, 0], gmm.means_[:, 1], s=300, c='k', marker='x', label='Centroids')
  186. plt.title('Gaussian Mixture Model Clustering')
  187. plt.legend()
  188. return gmm, pca, plt
  189. @app.route('/gmm_clustering', methods=['POST'])
  190. def gmm_clustering():
  191. try:
  192. # 检查请求体中是否包含文件地址
  193. data = request.json
  194. if 'file_url' not in data:
  195. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  196. file_url = data['file_url']
  197. # 使用requests获取文件内容
  198. response = requests.get(file_url)
  199. if response.status_code != 200:
  200. return jsonify({'code': 500, 'msg': 'Failed to retrieve file from URL'})
  201. # 读取数据集
  202. df = pd.read_csv(pd.compat.StringIO(response.text), header=None)
  203. # 特征缩放
  204. scaler = StandardScaler()
  205. X_scaled = scaler.fit_transform(df)
  206. # 执行GMM聚类分析
  207. gmm, pca, plt_object = perform_gmm_clustering(X_scaled)
  208. # 保存图形
  209. plt_file_path = 'GMM_Clustering.jpg' # 保存GMM聚类图的路径
  210. plt_object.savefig(plt_file_path)
  211. plt_object.close() # 关闭图形以释放资源
  212. # 保存GMM结果到CSV
  213. gmm_df = pd.DataFrame(data=pca.transform(X_scaled), columns=['PCA1', 'PCA2'])
  214. gmm_df['Cluster'] = gmm.predict(pca.transform(X_scaled))
  215. output_csv_path = 'GMM_Results.csv' # 保存GMM结果的路径
  216. gmm_df.to_csv(output_csv_path, index=False)
  217. # 返回结果文件路径和GMM聚类图路径给客户端
  218. return jsonify({
  219. 'code': 200,
  220. 'msg': 'GMM clustering completed',
  221. 'outputCsvPath': output_csv_path,
  222. 'plotImagePath': plt_file_path
  223. })
  224. except Exception as e:
  225. return jsonify({
  226. 'code': 500,
  227. 'msg': str(e)
  228. })
  229. if __name__ == '__main__':
  230. app.run(debug=True, port=8081, host='0.0.0.0')
  231. ##4.孤立森林(Isolation Forest): 一种异常检测算法,通过随机选择特征和切分值来“孤立”异常点。
  232. from flask import Flask, request, jsonify
  233. import pandas as pd
  234. import numpy as np
  235. import matplotlib.pyplot as plt
  236. from sklearn.preprocessing import StandardScaler
  237. from sklearn.ensemble import IsolationForest
  238. from sklearn.decomposition import PCA
  239. import requests
  240. app = Flask(__name__)
  241. def perform_iforestation(X_scaled):
  242. # PCA降维
  243. pca = PCA(n_components=2)
  244. X_pca = pca.fit_transform(X_scaled)
  245. # 孤立森林异常检测
  246. iforest = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
  247. iforest.fit(X_scaled)
  248. # 预测异常分数
  249. scores = iforest.decision_function(X_scaled)
  250. # 将数据点标记为正常或异常
  251. predicted_labels = (scores < np.median(scores)).astype(int)
  252. # 绘制结果
  253. plt.figure(figsize=(15, 5))
  254. normal_color = 'green'
  255. anomaly_color = 'red'
  256. scatter = plt.scatter(X_pca[:, 0], X_pca[:, 1], c=[normal_color if label else anomaly_color for label in predicted_labels], label=['Normal', 'Anomaly'])
  257. plt.title('Isolation Forest Anomaly Detection')
  258. plt.legend()
  259. return pca, iforest, plt, predicted_labels
  260. @app.route('/iforestation', methods=['POST'])
  261. def iforestation():
  262. try:
  263. # 检查请求体中是否包含文件地址
  264. data = request.json
  265. if 'file_url' not in data:
  266. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  267. file_url = data['file_url']
  268. # 使用requests获取文件内容
  269. response = requests.get(file_url)
  270. if response.status_code != 200:
  271. return jsonify({'code': 500, 'msg': 'Failed to retrieve file from URL'})
  272. # 读取数据集
  273. df = pd.read_csv(pd.compat.StringIO(response.text), header=None)
  274. # 特征缩放
  275. scaler = StandardScaler()
  276. X_scaled = scaler.fit_transform(df)
  277. # 执行孤立森林分析
  278. pca, iforest, plt_object, predicted_labels = perform_iforestation(X_scaled)
  279. # 保存图形
  280. plt_file_path = 'Isolation_Forest_Anomaly_Detection.jpg' # 保存孤立森林图的路径
  281. plt_object.savefig(plt_file_path)
  282. plt_object.close() # 关闭图形以释放资源
  283. # 保存孤立森林结果到CSV
  284. iforest_df = pd.DataFrame({
  285. 'PCA1': pca.transform(X_scaled)[:, 0],
  286. 'PCA2': pca.transform(X_scaled)[:, 1],
  287. 'Anomaly_Score': iforest.decision_function(X_scaled),
  288. 'Anomaly_Label': predicted_labels
  289. })
  290. output_csv_path = 'Isolation_Forest_Results.csv' # 保存孤立森林结果的路径
  291. iforest_df.to_csv(output_csv_path, index=False)
  292. # 返回结果文件路径和孤立森林图路径给客户端
  293. return jsonify({
  294. 'code': 200,
  295. 'msg': 'Isolation Forest analysis completed',
  296. 'outputCsvPath': output_csv_path,
  297. 'plotImagePath': plt_file_path
  298. })
  299. except Exception as e:
  300. return jsonify({
  301. 'code': 500,
  302. 'msg': str(e)
  303. })
  304. if __name__ == '__main__':
  305. app.run(debug=True, port=8081, host='0.0.0.0')
  306. ##5.变分自编码器(Variational AutoEncoder, VAE): 一种生成模型,通过学习输入数据的概率表示来进行特征学习
  307. from flask import Flask, request, jsonify
  308. import pandas as pd
  309. import numpy as np
  310. import matplotlib.pyplot as plt
  311. from sklearn.preprocessing import StandardScaler
  312. from keras.layers import Input, Dense, Lambda, Layer, Reshape
  313. from keras.models import Model
  314. from keras import backend as K
  315. from keras.losses import binary_crossentropy
  316. import requests
  317. app = Flask(__name__)
  318. def build_vae_model(input_dim, intermediate_dim):
  319. input_layer = Input(shape=(input_dim,), name='encoder_input')
  320. x = Dense(intermediate_dim, activation='relu')(input_layer)
  321. z_mean = Dense(input_dim, name='z_mean')(x)
  322. z_log_var = Dense(input_dim, name='z_log_var')(x)
  323. def sampling(args):
  324. z_mean, z_log_var = args
  325. batch = K.shape(z_mean)[0]
  326. dim = K.int_shape(z_mean)[1]
  327. epsilon = K.random_normal(shape=(batch, dim))
  328. return z_mean + K.exp(0.5 * z_log_var) * epsilon
  329. z = Lambda(sampling, output_shape=(input_dim,), name='z')([z_mean, z_log_var])
  330. encoder = Model(input_layer, [z_mean, z_log_var, z], name='encoder')
  331. decoder_input = Input(shape=(input_dim,), name='decoder_input')
  332. x = Dense(intermediate_dim, activation='relu')(decoder_input)
  333. x = Dense(input_dim, activation='sigmoid')(x)
  334. decoder = Model(decoder_input, x, name='decoder')
  335. z_mean, z_log_var, z = encoder(input_layer)
  336. x_decoded = decoder(z)
  337. vae = Model(input_layer, [x_decoded, z_mean, z_log_var])
  338. reconstruction_loss = binary_crossentropy(input_layer, x_decoded)
  339. reconstruction_loss *= input_dim
  340. kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
  341. kl_loss = K.sum(kl_loss, axis=-1)
  342. kl_loss *= -0.5
  343. vae_loss = K.mean(reconstruction_loss + kl_loss)
  344. vae.add_loss(vae_loss)
  345. vae.compile(optimizer='adam')
  346. return vae, encoder, decoder
  347. @app.route('/vae', methods=['POST'])
  348. def vae_anomaly_detection():
  349. try:
  350. # 检查请求体中是否包含文件地址
  351. data = request.json
  352. if 'file_url' not in data:
  353. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  354. file_url = data['file_url']
  355. # 使用requests获取文件内容
  356. response = requests.get(file_url)
  357. if response.status_code != 200:
  358. return jsonify({'code': 500, 'msg': 'Failed to retrieve file from URL'})
  359. # 读取数据集
  360. df = pd.read_csv(pd.compat.StringIO(response.text), header=None)
  361. scaler = StandardScaler()
  362. X_scaled = scaler.fit_transform(df)
  363. input_dim = X_scaled.shape[1]
  364. intermediate_dim = 64
  365. vae, _, _ = build_vae_model(input_dim, intermediate_dim)
  366. vae.fit(X_scaled, epochs=50, batch_size=128, shuffle=True)
  367. X_decoded = vae.predict(X_scaled)[0]
  368. if X_decoded.ndim == 2:
  369. X_decoded = X_decoded.reshape(X_decoded.shape[0], -1)
  370. reconstruction_error = np.mean(np.square(X_scaled - X_decoded), axis=1)
  371. plt_file_path = 'VAE_Reconstruction_Error.jpg'
  372. plt.figure(figsize=(10, 6))
  373. plt.hist(reconstruction_error, bins=50, color='blue', alpha=0.7)
  374. plt.xlabel('Reconstruction Error')
  375. plt.ylabel('Frequency')
  376. plt.title('Reconstruction Error Distribution')
  377. plt.savefig(plt_file_path)
  378. plt.close()
  379. reconstruction_error_df = pd.DataFrame(
  380. reconstruction_error, columns=['Reconstruction_Error'])
  381. output_csv_path = 'VAE_Reconstruction_Error.csv'
  382. reconstruction_error_df.to_csv(output_csv_path, index=False)
  383. mean = np.mean(reconstruction_error)
  384. std = np.std(reconstruction_error)
  385. threshold = mean + 2 * std
  386. outliers = reconstruction_error > threshold
  387. return jsonify({
  388. 'code': 200,
  389. 'msg': 'VAE training and anomaly detection completed',
  390. 'reconstructionErrorCsvPath': output_csv_path,
  391. 'plotImagePath': plt_file_path,
  392. 'abnormalIndices': list(np.where(outliers)[0])
  393. })
  394. except Exception as e:
  395. return jsonify({
  396. 'code': 500,
  397. 'msg': str(e)
  398. })
  399. if __name__ == '__main__':
  400. app.run(debug=True, port=8081, host='0.0.0.0')