故障预测.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. # ## 1.PCA + K-means 聚类
  2. # import matplotlib
  3. # matplotlib.use('Qt5Agg')
  4. import pandas as pd
  5. import matplotlib.pyplot as plt
  6. from sklearn.decomposition import PCA
  7. from sklearn.cluster import KMeans
  8. from sklearn.preprocessing import StandardScaler
  9. # 读取CSV文件
  10. '''df = pd.read_csv(r'/03特征提取/源代码/特征值文件-频域.csv')
  11. # 特征缩放
  12. scaler = StandardScaler()
  13. X_scaled = scaler.fit_transform(df)
  14. # PCA降维
  15. pca = PCA(n_components=2)
  16. X_pca = pca.fit_transform(X_scaled)
  17. # KMeans聚类
  18. kmeans = KMeans(n_clusters=2, n_init=10, random_state=42)
  19. kmeans.fit(X_pca)
  20. # 定义颜色列表
  21. colors = ['r', 'g', 'b'] # 为每个聚类指定颜色
  22. # 绘制结果
  23. fig, ax = plt.subplots(figsize=(15, 5)) # 使用1个子图显示结果
  24. # 检查聚类数量是否超过颜色列表的长度
  25. if kmeans.n_clusters <= len(colors):
  26. # 绘制聚类结果
  27. for i in range(kmeans.n_clusters):
  28. cluster_members = X_pca[kmeans.labels_ == i]
  29. ax.scatter(cluster_members[:, 0], cluster_members[:, 1],
  30. c=colors[i], label=f'Cluster {i + 1}')
  31. # 绘制质心
  32. ax.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
  33. s=300, c='k', marker='x', label='Centroids')
  34. # 设置图表标题和图例
  35. ax.set_title('PCA and KMeans Clustering')
  36. ax.legend()
  37. plt.savefig('PCA and KMeans.jpg')
  38. plt.show()
  39. # 保存PCA结果和聚类标签到CSV
  40. pca_df = pd.DataFrame(data=X_pca, columns=['PCA1', 'PCA2'])
  41. pca_df['Cluster'] = kmeans.labels_
  42. output_csv_path = '../../05故障预测/源代码/PCA_Results.csv'
  43. pca_df.to_csv(output_csv_path, index=False)
  44. else:
  45. print("聚类数量超过了预定义的颜色数量。")'''
  46. #2.自编码器(AutoEncoder): 一种无监督神经网络模型,通过学习输入数据的有效表示来进行特征学习。
  47. import pandas as pd
  48. import numpy as np
  49. import matplotlib.pyplot as plt
  50. from sklearn.preprocessing import StandardScaler
  51. from keras.layers import Input, Dense
  52. from keras.models import Model
  53. # 读取CSV文件
  54. df = pd.read_csv('特征提取.csv')
  55. # 特征缩放
  56. scaler = StandardScaler()
  57. X_scaled = scaler.fit_transform(df)
  58. # 定义自编码器架构
  59. input_dim = X_scaled.shape[1] # 输入维度
  60. encoding_dim = 8 # 编码维度,根据您的情况调整
  61. # 输入层
  62. input_layer = Input(shape=(input_dim,))
  63. # 编码层
  64. encoded = Dense(encoding_dim, activation='relu')(input_layer)
  65. # 解码层
  66. decoded = Dense(input_dim, activation='sigmoid')(encoded)
  67. # 创建自编码器模型
  68. autoencoder = Model(input_layer, decoded)
  69. # 编译模型
  70. autoencoder.compile(optimizer='adam', loss='mean_squared_error')
  71. # 训练自编码器
  72. autoencoder.fit(X_scaled, X_scaled, epochs=50, batch_size=256, shuffle=True)
  73. # 创建编码器和解码器模型
  74. encoder = Model(input_layer, encoded)
  75. # 编码然后解码数据
  76. X_encoded = encoder.predict(X_scaled)
  77. X_decoded = autoencoder.predict(X_encoded)
  78. # 计算重构误差
  79. reconstruction_error = np.mean(np.square(X_scaled - X_decoded), axis=1)
  80. # 绘制重构误差的分布情况
  81. plt.hist(reconstruction_error, bins=50, color='blue', alpha=0.7)
  82. plt.xlabel('Reconstruction Error')
  83. plt.ylabel('Frequency')
  84. plt.title('Reconstruction Error Distribution')
  85. plt.savefig('Reconstruction_Error.jpg')
  86. # plt.show()
  87. # 保存重构误差到CSV
  88. reconstruction_error_df = pd.DataFrame(reconstruction_error, columns=['Reconstruction_Error'])
  89. output_csv_path = 'Reconstruction_Error.csv'
  90. reconstruction_error_df.to_csv(output_csv_path, index=False)
  91. # 根据重构误差进行异常检测
  92. # 假设大于均值加上2倍标准差的数据点为异常点
  93. mean = np.mean(reconstruction_error)
  94. std = np.std(reconstruction_error)
  95. threshold = mean + 2 * std
  96. outliers = reconstruction_error > threshold
  97. # 打印异常点的索引
  98. print("Indices of abnormal data points:", np.where(outliers)[0])
  99. ##3.高斯混合模型(GMM): 假设数据由多个高斯分布混合而成,使用EM算法来估计每个高斯分布的参数,从而进行聚类。
  100. '''
  101. import pandas as pd
  102. import matplotlib
  103. matplotlib.use('Qt5Agg')
  104. import matplotlib.pyplot as plt
  105. from sklearn.preprocessing import StandardScaler
  106. from sklearn.mixture import GaussianMixture
  107. from sklearn.decomposition import PCA
  108. # 读取CSV文件
  109. df = pd.read_csv(r'/03特征提取/源代码/特征值文件-频域.csv')
  110. # 特征缩放
  111. scaler = StandardScaler()
  112. X_scaled = scaler.fit_transform(df)
  113. # 假设数据只有两类正常和异常,我们先使用PCA降维以简化GMM的计算
  114. pca = PCA(n_components=2)
  115. X_pca = pca.fit_transform(X_scaled)
  116. # GMM,我们假设有2个高斯分布(即2个簇)
  117. gmm = GaussianMixture(n_components=2, random_state=42)
  118. gmm.fit(X_pca)
  119. # 预测每个数据点属于每个组件的对数概率
  120. log_prob = gmm.score_samples(X_pca)
  121. # 选择具有最高对数概率的组件作为每个数据点的聚类标签
  122. predicted_labels = gmm.predict(X_pca)
  123. # 绘制结果
  124. plt.figure(figsize=(15, 5))
  125. # 使用GMM的聚类标签来为散点图上色
  126. plt.scatter(X_pca[:, 0], X_pca[:, 1], c=predicted_labels, cmap=plt.cm.viridis, label=['Cluster 1', 'Cluster 2'])
  127. # 绘制质心
  128. plt.scatter(gmm.means_[:, 0], gmm.means_[:, 1], s=300, c='k', marker='x', label='Centroids')
  129. plt.title('Gaussian Mixture Model Clustering')
  130. plt.legend()
  131. plt.savefig('GMM_Clustering.jpg')
  132. plt.show()
  133. # 保存GMM结果到CSV
  134. gmm_df = pd.DataFrame(data=X_pca, columns=['PCA1', 'PCA2'])
  135. gmm_df['Cluster'] = predicted_labels
  136. output_csv_path = 'GMM_Results.csv'
  137. gmm_df.to_csv(output_csv_path, index=False)
  138. #4.孤立森林(Isolation Forest): 一种异常检测算法,通过随机选择特征和切分值来“孤立”异常点。
  139. import pandas as pd
  140. import numpy as np
  141. import matplotlib
  142. matplotlib.use('Qt5Agg')
  143. import matplotlib.pyplot as plt
  144. from sklearn.preprocessing import StandardScaler
  145. from sklearn.ensemble import IsolationForest
  146. from sklearn.decomposition import PCA
  147. # 读取CSV文件
  148. df = pd.read_csv(r'/03特征提取/源代码/特征值文件-频域.csv')
  149. # 特征缩放
  150. scaler = StandardScaler()
  151. X_scaled = scaler.fit_transform(df)
  152. # PCA降维
  153. pca = PCA(n_components=2)
  154. X_pca = pca.fit_transform(X_scaled)
  155. # 孤立森林异常检测
  156. iforest = IsolationForest(n_estimators=100, contamination=0.1, random_state=42)
  157. iforest.fit(X_scaled)
  158. # 预测异常分数
  159. scores = iforest.decision_function(X_scaled)
  160. # 将数据点标记为正常或异常
  161. predicted_labels = (scores < np.median(scores)).astype(int) # 1 for normal, 0 for anomaly
  162. # 绘制结果
  163. plt.figure(figsize=(15, 5))
  164. # 为正常和异常数据分别指定颜色
  165. normal_color = 'green'
  166. anomaly_color = 'red'
  167. # 使用不同的颜色绘制正常和异常点
  168. scatter = plt.scatter(X_pca[:, 0], X_pca[:, 1], c=[normal_color if label else anomaly_color for label in predicted_labels], label=['Normal', 'Anomaly'])
  169. plt.title('Isolation Forest Anomaly Detection')
  170. plt.legend()
  171. plt.savefig('Isolation_Forest_Anomaly_Detection.jpg')
  172. plt.show()
  173. # 保存孤立森林结果到CSV
  174. iforest_df = pd.DataFrame({
  175. 'PCA1': X_pca[:, 0],
  176. 'PCA2': X_pca[:, 1],
  177. 'Anomaly_Score': scores,
  178. 'Anomaly_Label': predicted_labels
  179. })
  180. output_csv_path = 'Isolation_Forest_Results.csv'
  181. iforest_df.to_csv(output_csv_path, index=False)
  182. ##5.变分自编码器(Variational AutoEncoder, VAE): 一种生成模型,通过学习输入数据的概率表示来进行特征学习
  183. # import matplotlib
  184. # matplotlib.use('Qt5Agg')
  185. import pandas as pd
  186. import numpy as np
  187. import matplotlib.pyplot as plt
  188. from sklearn.preprocessing import StandardScaler
  189. from sklearn.decomposition import PCA
  190. from keras.layers import Input, Dense, Lambda, Layer, Reshape
  191. from keras.models import Model
  192. from keras import backend as K
  193. from keras.losses import binary_crossentropy
  194. # 读取CSV文件
  195. df = pd.read_csv(r'/03特征提取/源代码/特征值文件-频域.csv')
  196. # 特征缩放
  197. scaler = StandardScaler()
  198. X_scaled = scaler.fit_transform(df)
  199. # VAE模型参数
  200. input_dim = X_scaled.shape[1] # 输入数据的维度
  201. intermediate_dim = 64 # 隐藏层的维度
  202. batch_size = 128 # 训练时的批量大小
  203. epochs = 50 # 训练迭代次数
  204. # VAE模型架构
  205. input_layer = Input(shape=(input_dim,), name='encoder_input')
  206. x = Dense(intermediate_dim, activation='relu')(input_layer)
  207. # 均值和对数方差的独立同分布输出
  208. z_mean = Dense(input_dim, name='z_mean')(x)
  209. z_log_var = Dense(input_dim, name='z_log_var')(x)
  210. # 方差需要为正数,因此取指数
  211. def sampling(args):
  212. z_mean, z_log_var = args
  213. batch = K.shape(z_mean)[0]
  214. dim = K.int_shape(z_mean)[1]
  215. epsilon = K.random_normal(shape=(batch, dim))
  216. return z_mean + K.exp(0.5 * z_log_var) * epsilon
  217. z = Lambda(sampling, output_shape=(input_dim,), name='z')([z_mean, z_log_var])
  218. # 实例化编码器模型
  219. encoder = Model(input_layer, [z_mean, z_log_var, z], name='encoder')
  220. encoder.summary()
  221. # 解码器的输入是潜在空间的样本
  222. decoder_input = Input(shape=(input_dim,), name='decoder_input')
  223. # 重建输入数据
  224. x = Dense(intermediate_dim, activation='relu')(decoder_input)
  225. x = Dense(input_dim, activation='sigmoid')(x)
  226. # 实例化解码器模型
  227. decoder = Model(decoder_input, x, name='decoder')
  228. decoder.summary()
  229. # VAE模型
  230. z_mean, z_log_var, z = encoder(input_layer)
  231. x_decoded = decoder(z)
  232. # VAE模型
  233. vae = Model(input_layer, [x_decoded, z_mean, z_log_var])
  234. # VAE损失函数
  235. reconstruction_loss = binary_crossentropy(input_layer, x_decoded)
  236. reconstruction_loss *= input_dim
  237. kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
  238. kl_loss = K.sum(kl_loss, axis=-1)
  239. kl_loss *= -0.5
  240. vae_loss = K.mean(reconstruction_loss + kl_loss)
  241. vae.add_loss(vae_loss)
  242. vae.compile(optimizer='adam')
  243. vae.summary()
  244. # 训练VAE模型
  245. vae.fit(X_scaled, epochs=epochs, batch_size=batch_size, shuffle=True)
  246. # 使用VAE模型进行预测,获取解码后的输出
  247. X_decoded = vae.predict(X_scaled)[0] # 修改这一行,取出列表中的第一个元素
  248. # 确保X_decoded是二维数组,然后转换为一维数组以计算重构误差
  249. if X_decoded.ndim == 2:
  250. X_decoded = X_decoded.reshape(X_decoded.shape[0], -1)
  251. reconstruction_error = np.mean(np.square(X_scaled - X_decoded), axis=1)
  252. # 绘制重构误差的分布情况
  253. plt.figure(figsize=(10, 6))
  254. # 选择一个颜色,而不是一个颜色列表
  255. color = 'blue' # 例如,我们选择 'blue' 颜色
  256. plt.hist(reconstruction_error, bins=50, color=color, alpha=0.7)
  257. plt.xlabel('Reconstruction Error')
  258. plt.ylabel('Frequency')
  259. plt.title('Reconstruction Error Distribution')
  260. plt.savefig('VAE_Reconstruction_Error.jpg')
  261. plt.show()
  262. # 保存重构误差到CSV
  263. reconstruction_error_df = pd.DataFrame(reconstruction_error, columns=['Reconstruction_Error'])
  264. output_csv_path = '../../05故障预测/源代码/VAE_Reconstruction_Error.csv'
  265. reconstruction_error_df.to_csv(output_csv_path, index=False)
  266. # 根据重构误差进行异常检测
  267. mean = np.mean(reconstruction_error)
  268. std = np.std(reconstruction_error)
  269. threshold = mean + 2 * std
  270. outliers = reconstruction_error > threshold
  271. # 打印异常点的索引
  272. print("Indices of abnormal data points:", np.where(outliers)[0])'''