故障预测.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. # ## 1.PCA + K-means 聚类
  2. # import matplotlib
  3. # matplotlib.use('Qt5Agg')
  4. import pandas as pd
  5. import matplotlib.pyplot as plt
  6. from sklearn.decomposition import PCA
  7. from sklearn.cluster import KMeans
  8. from sklearn.preprocessing import StandardScaler
  9. # 读取CSV文件
  10. df = pd.read_csv(r'/03特征提取/源代码/特征值文件-频域.csv')
  11. # 特征缩放
  12. scaler = StandardScaler()
  13. X_scaled = scaler.fit_transform(df)
  14. # PCA降维
  15. pca = PCA(n_components=2)
  16. X_pca = pca.fit_transform(X_scaled)
  17. # KMeans聚类
  18. kmeans = KMeans(n_clusters=2, n_init=10, random_state=42)
  19. kmeans.fit(X_pca)
  20. # 定义颜色列表
  21. colors = ['r', 'g', 'b'] # 为每个聚类指定颜色
  22. # 绘制结果
  23. fig, ax = plt.subplots(figsize=(15, 5)) # 使用1个子图显示结果
  24. # 检查聚类数量是否超过颜色列表的长度
  25. if kmeans.n_clusters <= len(colors):
  26. # 绘制聚类结果
  27. for i in range(kmeans.n_clusters):
  28. cluster_members = X_pca[kmeans.labels_ == i]
  29. ax.scatter(cluster_members[:, 0], cluster_members[:, 1],
  30. c=colors[i], label=f'Cluster {i + 1}')
  31. # 绘制质心
  32. ax.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
  33. s=300, c='k', marker='x', label='Centroids')
  34. # 设置图表标题和图例
  35. ax.set_title('PCA and KMeans Clustering')
  36. ax.legend()
  37. plt.savefig('PCA and KMeans.jpg')
  38. plt.show()
  39. # 保存PCA结果和聚类标签到CSV
  40. pca_df = pd.DataFrame(data=X_pca, columns=['PCA1', 'PCA2'])
  41. pca_df['Cluster'] = kmeans.labels_
  42. output_csv_path = '../../05故障预测/源代码/PCA_Results.csv'
  43. pca_df.to_csv(output_csv_path, index=False)
  44. else:
  45. print("聚类数量超过了预定义的颜色数量。")
  46. #2.自编码器(AutoEncoder): 一种无监督神经网络模型,通过学习输入数据的有效表示来进行特征学习。
  47. import pandas as pd
  48. import numpy as np
  49. import matplotlib.pyplot as plt
  50. from sklearn.preprocessing import StandardScaler
  51. from keras.layers import Input, Dense
  52. from keras.models import Model
  53. # 读取CSV文件
  54. df = pd.read_csv(r'/03特征提取/源代码/特征值文件-频域.csv')
  55. # 特征缩放
  56. scaler = StandardScaler()
  57. X_scaled = scaler.fit_transform(df)
  58. # 定义自编码器架构
  59. input_dim = X_scaled.shape[1] # 输入维度
  60. encoding_dim = 8 # 编码维度,根据您的情况调整
  61. # 输入层
  62. input_layer = Input(shape=(input_dim,))
  63. # 编码层
  64. encoded = Dense(encoding_dim, activation='relu')(input_layer)
  65. # 解码层
  66. decoded = Dense(input_dim, activation='sigmoid')(encoded)
  67. # 创建自编码器模型
  68. autoencoder = Model(input_layer, decoded)
  69. # 编译模型
  70. autoencoder.compile(optimizer='adam', loss='mean_squared_error')
  71. # 训练自编码器
  72. autoencoder.fit(X_scaled, X_scaled, epochs=50, batch_size=256, shuffle=True)
  73. # 创建编码器和解码器模型
  74. encoder = Model(input_layer, encoded)
  75. # 编码然后解码数据
  76. X_encoded = encoder.predict(X_scaled)
  77. X_decoded = autoencoder.predict(X_encoded)
  78. # 计算重构误差
  79. reconstruction_error = np.mean(np.square(X_scaled - X_decoded), axis=1)
  80. # 绘制重构误差的分布情况
  81. plt.hist(reconstruction_error, bins=50, color='blue', alpha=0.7)
  82. plt.xlabel('Reconstruction Error')
  83. plt.ylabel('Frequency')
  84. plt.title('Reconstruction Error Distribution')
  85. plt.savefig('Reconstruction_Error.jpg')
  86. # plt.show()
  87. # 保存重构误差到CSV
  88. reconstruction_error_df = pd.DataFrame(reconstruction_error, columns=['Reconstruction_Error'])
  89. output_csv_path = 'Reconstruction_Error.csv'
  90. reconstruction_error_df.to_csv(output_csv_path, index=False)
  91. # 根据重构误差进行异常检测
  92. # 假设大于均值加上2倍标准差的数据点为异常点
  93. mean = np.mean(reconstruction_error)
  94. std = np.std(reconstruction_error)
  95. threshold = mean + 2 * std
  96. outliers = reconstruction_error > threshold
  97. # 打印异常点的索引
  98. print("Indices of abnormal data points:", np.where(outliers)[0])
  99. ##3.高斯混合模型(GMM): 假设数据由多个高斯分布混合而成,使用EM算法来估计每个高斯分布的参数,从而进行聚类。
  100. import pandas as pd
  101. import matplotlib
  102. matplotlib.use('Qt5Agg')
  103. import matplotlib.pyplot as plt
  104. from sklearn.preprocessing import StandardScaler
  105. from sklearn.mixture import GaussianMixture
  106. from sklearn.decomposition import PCA
  107. # 读取CSV文件
  108. df = pd.read_csv(r'/03特征提取/源代码/特征值文件-频域.csv')
  109. # 特征缩放
  110. scaler = StandardScaler()
  111. X_scaled = scaler.fit_transform(df)
  112. # 假设数据只有两类正常和异常,我们先使用PCA降维以简化GMM的计算
  113. pca = PCA(n_components=2)
  114. X_pca = pca.fit_transform(X_scaled)
  115. # GMM,我们假设有2个高斯分布(即2个簇)
  116. gmm = GaussianMixture(n_components=2, random_state=42)
  117. gmm.fit(X_pca)
  118. # 预测每个数据点属于每个组件的对数概率
  119. log_prob = gmm.score_samples(X_pca)
  120. # 选择具有最高对数概率的组件作为每个数据点的聚类标签
  121. predicted_labels = gmm.predict(X_pca)
  122. # 绘制结果
  123. plt.figure(figsize=(15, 5))
  124. # 使用GMM的聚类标签来为散点图上色
  125. plt.scatter(X_pca[:, 0], X_pca[:, 1], c=predicted_labels, cmap=plt.cm.viridis, label=['Cluster 1', 'Cluster 2'])
  126. # 绘制质心
  127. plt.scatter(gmm.means_[:, 0], gmm.means_[:, 1], s=300, c='k', marker='x', label='Centroids')
  128. plt.title('Gaussian Mixture Model Clustering')
  129. plt.legend()
  130. plt.savefig('GMM_Clustering.jpg')
  131. plt.show()
  132. # 保存GMM结果到CSV
  133. gmm_df = pd.DataFrame(data=X_pca, columns=['PCA1', 'PCA2'])
  134. gmm_df['Cluster'] = predicted_labels
  135. output_csv_path = 'GMM_Results.csv'
  136. gmm_df.to_csv(output_csv_path, index=False)
  137. #4.孤立森林(Isolation Forest): 一种异常检测算法,通过随机选择特征和切分值来“孤立”异常点。
  138. import pandas as pd
  139. import numpy as np
  140. import matplotlib
  141. matplotlib.use('Qt5Agg')
  142. import matplotlib.pyplot as plt
  143. from sklearn.preprocessing import StandardScaler
  144. from sklearn.ensemble import IsolationForest
  145. from sklearn.decomposition import PCA
  146. # 读取CSV文件
  147. df = pd.read_csv(r'/03特征提取/源代码/特征值文件-频域.csv')
  148. # 特征缩放
  149. scaler = StandardScaler()
  150. X_scaled = scaler.fit_transform(df)
  151. # PCA降维
  152. pca = PCA(n_components=2)
  153. X_pca = pca.fit_transform(X_scaled)
  154. # 孤立森林异常检测
  155. iforest = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
  156. iforest.fit(X_scaled)
  157. # 预测异常分数
  158. scores = iforest.decision_function(X_scaled)
  159. # 将数据点标记为正常或异常
  160. predicted_labels = (scores < np.median(scores)).astype(int) # 1 for normal, 0 for anomaly
  161. # 绘制结果
  162. plt.figure(figsize=(15, 5))
  163. # 为正常和异常数据分别指定颜色
  164. normal_color = 'green'
  165. anomaly_color = 'red'
  166. # 使用不同的颜色绘制正常和异常点
  167. scatter = plt.scatter(X_pca[:, 0], X_pca[:, 1], c=[normal_color if label else anomaly_color for label in predicted_labels], label=['Normal', 'Anomaly'])
  168. plt.title('Isolation Forest Anomaly Detection')
  169. plt.legend()
  170. plt.savefig('Isolation_Forest_Anomaly_Detection.jpg')
  171. plt.show()
  172. # 保存孤立森林结果到CSV
  173. iforest_df = pd.DataFrame({
  174. 'PCA1': X_pca[:, 0],
  175. 'PCA2': X_pca[:, 1],
  176. 'Anomaly_Score': scores,
  177. 'Anomaly_Label': predicted_labels
  178. })
  179. output_csv_path = 'Isolation_Forest_Results.csv'
  180. iforest_df.to_csv(output_csv_path, index=False)
  181. ##5.变分自编码器(Variational AutoEncoder, VAE): 一种生成模型,通过学习输入数据的概率表示来进行特征学习
  182. # import matplotlib
  183. # matplotlib.use('Qt5Agg')
  184. import pandas as pd
  185. import numpy as np
  186. import matplotlib.pyplot as plt
  187. from sklearn.preprocessing import StandardScaler
  188. from sklearn.decomposition import PCA
  189. from keras.layers import Input, Dense, Lambda, Layer, Reshape
  190. from keras.models import Model
  191. from keras import backend as K
  192. from keras.losses import binary_crossentropy
  193. # 读取CSV文件
  194. df = pd.read_csv(r'/03特征提取/源代码/特征值文件-频域.csv')
  195. # 特征缩放
  196. scaler = StandardScaler()
  197. X_scaled = scaler.fit_transform(df)
  198. # VAE模型参数
  199. input_dim = X_scaled.shape[1] # 输入数据的维度
  200. intermediate_dim = 64 # 隐藏层的维度
  201. batch_size = 128 # 训练时的批量大小
  202. epochs = 50 # 训练迭代次数
  203. # VAE模型架构
  204. input_layer = Input(shape=(input_dim,), name='encoder_input')
  205. x = Dense(intermediate_dim, activation='relu')(input_layer)
  206. # 均值和对数方差的独立同分布输出
  207. z_mean = Dense(input_dim, name='z_mean')(x)
  208. z_log_var = Dense(input_dim, name='z_log_var')(x)
  209. # 方差需要为正数,因此取指数
  210. def sampling(args):
  211. z_mean, z_log_var = args
  212. batch = K.shape(z_mean)[0]
  213. dim = K.int_shape(z_mean)[1]
  214. epsilon = K.random_normal(shape=(batch, dim))
  215. return z_mean + K.exp(0.5 * z_log_var) * epsilon
  216. z = Lambda(sampling, output_shape=(input_dim,), name='z')([z_mean, z_log_var])
  217. # 实例化编码器模型
  218. encoder = Model(input_layer, [z_mean, z_log_var, z], name='encoder')
  219. encoder.summary()
  220. # 解码器的输入是潜在空间的样本
  221. decoder_input = Input(shape=(input_dim,), name='decoder_input')
  222. # 重建输入数据
  223. x = Dense(intermediate_dim, activation='relu')(decoder_input)
  224. x = Dense(input_dim, activation='sigmoid')(x)
  225. # 实例化解码器模型
  226. decoder = Model(decoder_input, x, name='decoder')
  227. decoder.summary()
  228. # VAE模型
  229. z_mean, z_log_var, z = encoder(input_layer)
  230. x_decoded = decoder(z)
  231. # VAE模型
  232. vae = Model(input_layer, [x_decoded, z_mean, z_log_var])
  233. # VAE损失函数
  234. reconstruction_loss = binary_crossentropy(input_layer, x_decoded)
  235. reconstruction_loss *= input_dim
  236. kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
  237. kl_loss = K.sum(kl_loss, axis=-1)
  238. kl_loss *= -0.5
  239. vae_loss = K.mean(reconstruction_loss + kl_loss)
  240. vae.add_loss(vae_loss)
  241. vae.compile(optimizer='adam')
  242. vae.summary()
  243. # 训练VAE模型
  244. vae.fit(X_scaled, epochs=epochs, batch_size=batch_size, shuffle=True)
  245. # 使用VAE模型进行预测,获取解码后的输出
  246. X_decoded = vae.predict(X_scaled)[0] # 修改这一行,取出列表中的第一个元素
  247. # 确保X_decoded是二维数组,然后转换为一维数组以计算重构误差
  248. if X_decoded.ndim == 2:
  249. X_decoded = X_decoded.reshape(X_decoded.shape[0], -1)
  250. reconstruction_error = np.mean(np.square(X_scaled - X_decoded), axis=1)
  251. # 绘制重构误差的分布情况
  252. plt.figure(figsize=(10, 6))
  253. # 选择一个颜色,而不是一个颜色列表
  254. color = 'blue' # 例如,我们选择 'blue' 颜色
  255. plt.hist(reconstruction_error, bins=50, color=color, alpha=0.7)
  256. plt.xlabel('Reconstruction Error')
  257. plt.ylabel('Frequency')
  258. plt.title('Reconstruction Error Distribution')
  259. plt.savefig('VAE_Reconstruction_Error.jpg')
  260. plt.show()
  261. # 保存重构误差到CSV
  262. reconstruction_error_df = pd.DataFrame(reconstruction_error, columns=['Reconstruction_Error'])
  263. output_csv_path = '../../05故障预测/源代码/VAE_Reconstruction_Error.csv'
  264. reconstruction_error_df.to_csv(output_csv_path, index=False)
  265. # 根据重构误差进行异常检测
  266. mean = np.mean(reconstruction_error)
  267. std = np.std(reconstruction_error)
  268. threshold = mean + 2 * std
  269. outliers = reconstruction_error > threshold
  270. # 打印异常点的索引
  271. print("Indices of abnormal data points:", np.where(outliers)[0])