特征提取.py 15 KB


  1. # 1.基本统计特征
  2. from flask import Flask, request, jsonify
  3. import pandas as pd
  4. from scipy.stats import skew, kurtosis
  5. import numpy as np
  6. import os
  7. import requests
  8. app = Flask(__name__)
  9. def ae_feature(raw, fs):
  10. mean = np.mean(raw)
  11. std_dev = np.std(raw)
  12. max_value = np.max(raw)
  13. min_value = np.min(raw)
  14. variance = np.var(raw)
  15. skewness = skew(raw)
  16. kurt = kurtosis(raw)
  17. range=np.max(raw) - np.min(raw)
  18. rms = np.sum(raw ** 2) / len(raw)
  19. ae_feature = pd.DataFrame()
  20. ae_feature['mean'] = [mean]
  21. ae_feature['std_dev'] = [std_dev]
  22. ae_feature['max_value'] = [max_value]
  23. ae_feature['min_value'] = [min_value]
  24. ae_feature['variance'] = [variance]
  25. ae_feature['skewness'] = [skewness]
  26. ae_feature['kurt'] = [kurt]
  27. ae_feature['range'] = [range]
  28. ae_feature['rms'] = [rms]
  29. return ae_feature
  30. @app.route('/statistical', methods=['POST'])
  31. def upload_file():
  32. try:
  33. # 检查请求体中是否包含文件地址
  34. data = request.json
  35. if 'file_url' not in data:
  36. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  37. file_url = data['file_url']
  38. # 使用requests获取文件内容
  39. response = requests.get(file_url)
  40. if response.status_code != 200:
  41. return jsonify({'code': 500, 'msg': 'Failed to retrieve file from URL'})
  42. # 读取数据集
  43. df = pd.read_csv(pd.compat.StringIO(response.text), header=None)
  44. fs = 300 # 设置提取特征值每组的数据量
  45. all_features_df = pd.DataFrame()
  46. for i in range(int(df.shape[1])):
  47. for j in range(0, len(df), fs):
  48. end_index = min(j + fs, len(df))
  49. data_test = np.array(df.iloc[j:end_index, i])
  50. sa = ae_feature(data_test, fs)
  51. all_features_df = pd.concat([all_features_df, sa], ignore_index=True)
  52. # 将结果保存为CSV文件
  53. result_file_path = os.path.join('/tmp', '特征值文件-统计.csv.csv')
  54. all_features_df.to_csv(result_file_path, index=False)
  55. # 构建返回数据,只返回文件路径
  56. return jsonify({
  57. 'code': 200,
  58. 'msg': '文件处理完成',
  59. 'file_path': result_file_path
  60. })
  61. except Exception as e:
  62. return jsonify({
  63. 'code': 500,
  64. 'msg': str(e)
  65. })
  66. if __name__ == '__main__':
  67. app.run(debug=True, port=8081, host='0.0.0.0')
  68. ## 2.时间域特征
  69. from flask import Flask, request, jsonify
  70. import pandas as pd
  71. import numpy as np
  72. from scipy import fftpack
  73. import requests
  74. app = Flask(__name__)
  75. def time_features(raw, fs):
  76. n = len(raw)
  77. fft_raw = abs(fftpack.fft(raw, n) * 2 / n)
  78. main_freq_index = np.where(fft_raw[0:math.floor(n / 2)] == max(fft_raw[0:math.floor(n / 2)]))
  79. f = np.linspace(0, fs, n) * fs / n
  80. peak_freq = f[main_freq_index[0][0]] # 峰值频率,单位Hz
  81. xm = np.mean(raw) # 均值
  82. xstd = np.std(raw) # 标准差
  83. kur = ((np.sum((raw - xm) ** 4)) / len(raw)) / (xstd ** 4) # 峭度
  84. cal_ave_amp = np.sum(np.abs(raw)) / n
  85. power_2 = np.power(raw, 2)
  86. sum_power = np.sum(power_2)
  87. cal_rms = np.sqrt(sum_power / n)
  88. cal_form = cal_rms / cal_ave_amp # 波峰因子,均方根/平均幅值
  89. cal_peak = np.max(np.abs(raw)) # 峰值
  90. cal_crest = cal_peak / cal_rms # 峰值因子
  91. feature_df = pd.DataFrame()
  92. feature_df['peak_freq_khz'] = [peak_freq / 1000] # 转换为kHz
  93. feature_df['均值v'] = [xm]
  94. feature_df['标准差'] = [xstd]
  95. feature_df['峭度'] = [kur]
  96. feature_df['平均幅值'] = [cal_ave_amp]
  97. feature_df['均方根'] = [cal_rms]
  98. feature_df['波峰因子'] = [cal_form]
  99. feature_df['峰值'] = [cal_peak]
  100. feature_df['峰值因子'] = [cal_crest]
  101. return feature_df
  102. @app.route('/time', methods=['POST'])
  103. def upload_file():
  104. try:
  105. # 检查请求体中是否包含文件地址
  106. data = request.json
  107. if 'file_url' not in data:
  108. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  109. file_url = data['file_url']
  110. # 使用requests获取文件内容
  111. response = requests.get(file_url)
  112. if response.status_code != 200:
  113. return jsonify({'code': 500, 'msg': 'Failed to retrieve file from URL'})
  114. # 读取数据集
  115. df = pd.read_csv(pd.compat.StringIO(response.text), header=None)
  116. fs = 300 # 每组数据量
  117. all_features_df = pd.DataFrame()
  118. for i in range(int(df.shape[1])):
  119. for j in range(0, len(df.iloc[:, i]), fs):
  120. end_index = min(j + fs, len(df.iloc[:, i]))
  121. data_test = np.array(df.iloc[j:end_index, i])
  122. sa = time_features(data_test, fs)
  123. all_features_df = pd.concat([all_features_df, sa], ignore_index=True)
  124. result_file_path = '/tmp/特征值文件-时域.csv' # 使用相对路径
  125. all_features_df.to_csv(result_file_path, index=False)
  126. # 构建返回数据,只返回文件路径
  127. return jsonify({
  128. 'code': 200,
  129. 'msg': '文件处理完成',
  130. 'file_path': result_file_path
  131. })
  132. except Exception as e:
  133. return jsonify({
  134. 'code': 500,
  135. 'msg': str(e)
  136. })
  137. if __name__ == '__main__':
  138. app.run(debug=True, port=8081, host='0.0.0.0')
  139. ## 3.频域特征
  140. from flask import Flask, request, jsonify
  141. import pandas as pd
  142. from scipy.fft import fft, fftfreq
  143. from scipy.stats import skew, kurtosis
  144. import numpy as np
  145. import requests
  146. app = Flask(__name__)
  147. def fe_feature(signal, fs):
  148. n = len(signal)
  149. freqs = fftfreq(n, d=1/fs) # 生成频率序列
  150. signal_fft = fft(signal) # 计算FFT
  151. signal_fft_shifted = np.fft.fftshift(signal_fft) # 将FFT结果中心化
  152. # 计算频域特征
  153. total_energy = np.sum(np.abs(signal_fft)**2)
  154. total_power = total_energy / n
  155. rms = np.sqrt(total_power)
  156. freq_centroid = np.sum(np.abs(freqs) * np.abs(signal_fft_shifted)**2) / total_energy
  157. freq_bandwidth = np.sum(np.abs(freqs[1:]**2 * signal_fft_shifted[1:]**2) - np.abs(freqs[:-1]**2 * signal_fft_shifted[:-1]**2)) / (2 * np.sum(np.abs(signal_fft_shifted)**2))
  158. peak_freq = (np.argmax(np.abs(signal_fft_shifted)) + 1) * fs / n # 峰值频率
  159. freq_skewness = skew(freqs[1:n//2] * (np.abs(signal_fft[1:n//2]) / np.max(np.abs(signal_fft[1:n//2]))))
  160. freq_kurtosis = kurtosis(freqs[1:n//2] * (np.abs(signal_fft[1:n//2]) / np.max(np.abs(signal_fft[1:n//2]))))
  161. feature_df = pd.DataFrame({
  162. '总能量': [total_energy],
  163. '总功率': [total_power],
  164. '均方根': [rms],
  165. '频率中心': [freq_centroid],
  166. '频率带宽': [freq_bandwidth],
  167. '峰值频率': [peak_freq],
  168. '频率偏度': [freq_skewness],
  169. '频率峰度': [freq_kurtosis]
  170. })
  171. return feature_df
  172. @app.route('/frequency', methods=['POST'])
  173. def upload_file():
  174. try:
  175. # 检查请求体中是否包含文件地址
  176. data = request.json
  177. if 'file_url' not in data:
  178. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  179. file_url = data['file_url']
  180. # 使用requests获取文件内容
  181. response = requests.get(file_url)
  182. if response.status_code != 200:
  183. return jsonify({'code': 500, 'msg': 'Failed to retrieve file from URL'})
  184. # 读取数据集
  185. df = pd.read_csv(pd.compat.StringIO(response.text), header=None)
  186. fs = 300 # 采样频率,根据实际情况调整
  187. all_features_df = pd.DataFrame()
  188. for i in range(int(df.shape[1])):
  189. for j in range(0, len(df.iloc[:, i]), fs):
  190. end_index = min(j + fs, len(df.iloc[:, i]))
  191. data_test = df.iloc[j:end_index, i].to_numpy()
  192. fa = fe_feature(data_test, fs)
  193. all_features_df = pd.concat([all_features_df, fa], ignore_index=True)
  194. result_file_path = '/tmp/特征值文件-频域.csv' # 保存结果的路径
  195. all_features_df.to_csv(result_file_path, index=False, header=True)
  196. # 构建返回数据,只返回文件路径
  197. return jsonify({
  198. 'code': 200,
  199. 'msg': '文件处理完成',
  200. 'file_path': result_file_path
  201. })
  202. except Exception as e:
  203. return jsonify({
  204. 'code': 500,
  205. 'msg': str(e)
  206. })
  207. if __name__ == '__main__':
  208. app.run(debug=True, port=8081, host='0.0.0.0')
  209. # #4.时频域特征
  210. from flask import Flask, request, jsonify
  211. import pandas as pd
  212. from scipy.stats import skew, kurtosis
  213. import numpy as np
  214. from scipy.fft import fft, fftfreq
  215. import requests
  216. app = Flask(__name__)
  217. def tf_features(raw, fs):
  218. # 计算快速傅里叶变换
  219. fft_values = fft(raw)
  220. fft_magnitude = np.abs(fft_values)
  221. # 计算频率向量
  222. n = len(raw)
  223. freq = fftfreq(n, 1 / fs)
  224. # 时域特征
  225. mean = np.mean(raw)
  226. std_dev = np.std(raw)
  227. max_value = np.max(raw)
  228. min_value = np.min(raw)
  229. variance = np.var(raw)
  230. skewness = skew(raw)
  231. kurt = kurtosis(raw)
  232. # 频域特征
  233. power_spectrum = np.square(fft_magnitude) / n
  234. mean_freq = np.mean(freq[power_spectrum > 0])
  235. std_freq = np.std(freq[power_spectrum > 0])
  236. freq_skewness = skew(freq[power_spectrum > 0])
  237. freq_kurt = kurtosis(freq[power_spectrum > 0])
  238. peak_freq_index = np.argmax(power_spectrum)
  239. peak_freq = freq[peak_freq_index]
  240. bandwidth = np.max(freq[power_spectrum > 0]) - np.min(freq[power_spectrum > 0])
  241. energy = np.sum(power_spectrum)
  242. feature_df = pd.DataFrame({
  243. '均值': [mean],
  244. '标准差': [std_dev],
  245. '最大值': [max_value],
  246. '最小值': [min_value],
  247. '方差': [variance],
  248. '偏度': [skewness],
  249. '峰度': [kurt],
  250. '频率均值': [mean_freq],
  251. '频率标准差': [std_freq],
  252. '频率偏度': [freq_skewness],
  253. '频率峰度': [freq_kurt],
  254. '峰值频率': [peak_freq],
  255. '带宽': [bandwidth],
  256. '能量': [energy]
  257. })
  258. return feature_df
  259. @app.route('/time-frequency', methods=['POST'])
  260. def upload_file():
  261. try:
  262. # 检查请求体中是否包含文件地址
  263. data = request.json
  264. if 'file_url' not in data:
  265. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  266. file_url = data['file_url']
  267. # 使用requests获取文件内容
  268. response = requests.get(file_url)
  269. if response.status_code != 200:
  270. return jsonify({'code': 500, 'msg': 'Failed to retrieve file from URL'})
  271. # 读取数据集
  272. df = pd.read_csv(pd.compat.StringIO(response.text), header=None)
  273. fs = 300 # 采样频率
  274. all_features_df = pd.DataFrame()
  275. for i in range(int(df.shape[1])):
  276. for j in range(0, len(df.iloc[:, i]), fs):
  277. end_index = min(j + fs, len(df.iloc[:, i]))
  278. data_test = df.iloc[j:end_index, i].to_numpy()
  279. fa = tf_features(data_test, fs)
  280. all_features_df = pd.concat([all_features_df, fa], ignore_index=True)
  281. result_file_path = '/tmp/特征值文件-时频域.csv' # 保存结果的路径
  282. all_features_df.to_csv(result_file_path, index=False, header=True)
  283. # 构建返回数据,只返回文件路径
  284. return jsonify({
  285. 'code': 200,
  286. 'msg': '文件处理完成',
  287. 'file_path': result_file_path
  288. })
  289. except Exception as e:
  290. return jsonify({
  291. 'code': 500,
  292. 'msg': str(e)
  293. })
  294. if __name__ == '__main__':
  295. app.run(debug=True, port=8081, host='0.0.0.0')
  296. ##5.形态学特征提取
  297. from flask import Flask, request, jsonify
  298. import pandas as pd
  299. from scipy.signal import find_peaks
  300. import numpy as np
  301. import requests
  302. app = Flask(__name__)
  303. def morphological_features(raw):
  304. nonzero_elements = np.count_nonzero(raw)
  305. mean_slope = np.mean(np.diff(raw) / np.diff(np.arange(len(raw))))
  306. peaks, _ = find_peaks(raw)
  307. num_peaks = len(peaks)
  308. troughs, _ = find_peaks(-raw) # 寻找谷值
  309. num_troughs = len(troughs)
  310. # 假设峰值和谷值的宽度为相邻峰值或谷值之间的距离
  311. peak_widths = np.diff(np.sort(peaks)[1:] - np.sort(peaks)[:-1]) if num_peaks > 1 else np.array([])
  312. trough_widths = np.diff(np.sort(troughs)[1:] - np.sort(troughs)[:-1]) if num_troughs > 1 else np.array([])
  313. # 计算曲率和加速度变化需要一阶和二阶导数
  314. first_derivative = np.diff(raw) / np.diff(np.arange(len(raw)))
  315. second_derivative = np.diff(first_derivative) / np.diff(np.arange(len(raw) - 1))
  316. curvature = np.sum(second_derivative ** 2) # 曲率的简单估计
  317. acceleration_change = np.sum(np.abs(np.diff(first_derivative))) # 加速度变化的简单估计
  318. feature_df = pd.DataFrame({
  319. '非零元素数': [nonzero_elements],
  320. '平均斜率': [mean_slope],
  321. '峰值数量': [num_peaks],
  322. '谷值数量': [num_troughs],
  323. '平均峰宽': [np.mean(peak_widths)] if peak_widths.size else [0],
  324. '平均谷宽': [np.mean(trough_widths)] if trough_widths.size else [0],
  325. '曲率': [curvature],
  326. '加速度变化': [acceleration_change]
  327. })
  328. return feature_df
  329. @app.route('/morphology', methods=['POST'])
  330. def upload_file():
  331. try:
  332. # 检查请求体中是否包含文件地址
  333. data = request.json
  334. if 'file_url' not in data:
  335. return jsonify({'code': 400, 'msg': 'No file URL provided'})
  336. file_url = data['file_url']
  337. # 使用requests获取文件内容
  338. response = requests.get(file_url)
  339. if response.status_code != 200:
  340. return jsonify({'code': 500, 'msg': 'Failed to retrieve file from URL'})
  341. # 读取数据集
  342. df = pd.read_csv(pd.compat.StringIO(response.text), header=None)
  343. all_features_df = pd.DataFrame()
  344. for i in range(int(df.shape[1])):
  345. # 假设数据是等间隔采样的,fs 不再作为函数参数
  346. data_test = df.iloc[:, i].to_numpy()
  347. fa = morphological_features(data_test)
  348. all_features_df = pd.concat([all_features_df, fa], ignore_index=True)
  349. result_file_path = '/tmp/特征值文件-形态学.csv' # 保存结果的路径
  350. all_features_df.to_csv(result_file_path, index=False, header=True)
  351. # 构建返回数据,只返回文件路径
  352. return jsonify({
  353. 'code': 200,
  354. 'msg': '文件处理完成',
  355. 'file_path': result_file_path
  356. })
  357. except Exception as e:
  358. return jsonify({
  359. 'code': 500,
  360. 'msg': str(e)
  361. })
  362. if __name__ == '__main__':
  363. app.run(debug=True, port=8081, host='0.0.0.0')