stable_marking.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. #打分算法API
  2. from flask import Flask, request, jsonify
  3. import pandas as pd
  4. import joblib
  5. from sklearn.preprocessing import LabelEncoder # 用来将文本标签转换为数值
  6. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis # 用来创建线性判别分析分类器
  7. from sklearn.model_selection import train_test_split
  8. from sklearn.metrics import accuracy_score
  9. from sklearn.metrics import recall_score
  10. app = Flask(__name__)
  11. app.config['JSON_AS_ASCII'] = False
  12. app.config['JSONIFY_MIMETYPE'] = "application/json;charset=utf-8" # 指定浏览器渲染的文件类型,和解码格式;
  13. # def load_model(model_path):
  14. # try:
  15. # loaded_model = joblib.load(model_path)
  16. # return loaded_model
  17. # except Exception as e:
  18. # raise Exception(f"Failed to load the model: {str(e)}")
  19. #
  20. # def make_Scoring(model_data, data):
  21. # try:
  22. # df = pd.DataFrame(data['data'])
  23. # X = df.iloc[:, 1:-1]
  24. # le_y = model_data['label_encoder_y']
  25. # le_X = model_data['label_encoder_X']
  26. # X = X.apply(le_X.transform)
  27. # loaded_lda = model_data['model']
  28. # y_pred = loaded_lda.predict(X)
  29. # new_y_pred = le_y.inverse_transform(y_pred)
  30. # data['finalGrade'] = new_y_pred
  31. # return data
  32. # except Exception as e:
  33. # raise Exception(f"Failed to make Scoring: {str(e)}")
  34. #
  35. # @app.route('/biz/score/marking', methods=['POST'])
  36. # def Scoring_endpoint():
  37. # try:
  38. # # Get input parameters from the request
  39. # data = request.json
  40. # model_path = data.get('modelPath')
  41. # # Load the pre-trained model and vectorizer
  42. # model_data = joblib.load(model_path+'model_and_encoders.joblib')
  43. # # Make recommendations
  44. # result_data = make_Scoring(model_data, data)
  45. # return jsonify({'code': 200, 'msg': '成功', 'data': result_data})
  46. #
  47. # except Exception as e:
  48. # return jsonify({'code': 500, 'msg': str(e)})
  49. def load_model(model_path):
  50. try:
  51. loaded_model = joblib.load(model_path)
  52. return loaded_model
  53. except Exception as e:
  54. raise Exception(f"Failed to load the model: {str(e)}")
  55. def make_Scoring(model_data, data):
  56. try:
  57. # df = pd.DataFrame(data['dataSet'])
  58. df = pd.DataFrame(data['dataSet'], index=range(len(data['dataSet'])))
  59. # X = df[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9', 'factor10', 'factor11', 'factor12','factor13', 'factor14', 'factor15', 'factor16','factor17', 'factor18', 'factor19', 'factor20', 'factor21', 'factor22', 'factor23', 'factor24','factor25', 'factor26', 'factor27', 'factor28', 'factor29', 'factor30', 'factor31', 'factor32', 'factor33', 'factor34', 'factor35', 'factor36','factor37', 'factor38', 'factor39', 'factor40',
  60. # 'factor41', 'factor42', 'factor43', 'factor44']].apply(lambda x: ' '.join(x.astype(str)), axis=1)
  61. X = df[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9',
  62. 'factor10', 'factor11', 'factor12', 'factor13', 'factor14', 'factor15', 'factor16', 'factor17',
  63. 'factor18', 'factor19', 'factor20', 'factor21', 'factor22', 'factor23', 'factor24', 'factor25',
  64. 'factor26', 'factor27', 'factor28', 'factor29', 'factor30', 'factor31', 'factor32', 'factor33',
  65. 'factor34', 'factor35', 'factor36', 'factor37', 'factor38', 'factor39', 'factor40',
  66. 'factor41', 'factor42', 'factor43', 'factor44']]
  67. le_y = model_data['label_encoder_y']
  68. le_X = model_data['label_encoder_X']
  69. X = X.apply(le_X.transform)
  70. loaded_lda = model_data['model']
  71. y_pred = loaded_lda.predict(X)
  72. new_y_pred = le_y.inverse_transform(y_pred)
  73. # new_y_pred = le_y.inverse_transform(y_pred).tolist()
  74. data['dataSet']['finalGrade'] = new_y_pred[0]
  75. return data
  76. except Exception as e:
  77. raise Exception(f"Failed to make Scoring: {str(e)}")
  78. @app.route('/biz/score/marking', methods=['POST'])
  79. def Scoring_endpoint():
  80. try:
  81. # Get input parameters from the request
  82. data = request.json
  83. model_path = data.get('modelPath')
  84. # Load the pre-trained model and vectorizer
  85. model_data = joblib.load(model_path+'model_and_encoders.joblib')
  86. # Make recommendations
  87. result_data = make_Scoring(model_data, data)
  88. return jsonify({'code': 200, 'msg': '成功', 'data': result_data})
  89. except Exception as e:
  90. return jsonify({'code': 500, 'msg': str(e)})
  91. # def train_and_save_model(data, model_path):
  92. # print(data)
  93. # try:
  94. # df = pd.DataFrame(data['dataSet'])
  95. # X = df.iloc[:, 2:-1]
  96. # # y = df.iloc[:, -1]
  97. # y = data.iloc[:,-1]
  98. # le_X = LabelEncoder()
  99. # X = X.apply(le_X.fit_transform)
  100. # le_y = LabelEncoder()
  101. # y = le_y.fit_transform(y)
  102. # lda = LinearDiscriminantAnalysis()
  103. # lda.fit(X, y)
  104. # model_data = {
  105. # 'model': lda,
  106. # 'label_encoder_y': le_y,
  107. # 'label_encoder_X': le_X
  108. # }
  109. # joblib.dump(model_data, model_path+'model_and_encoders.joblib')
  110. # return {'code': 200, 'msg': '成功'}
  111. # except Exception as e:
  112. # return {'code': 500, 'msg': str(e)}
  113. #
  114. # @app.route('/biz/score/train', methods=['POST'])
  115. # def train_model():
  116. # try:
  117. # data = request.json
  118. # model_path = data.get('modelPath')
  119. # result = train_and_save_model(data, model_path)
  120. # return jsonify(result)
  121. # except Exception as e:
  122. # return jsonify({'code': 500, 'msg': str(e)})
  123. def train_and_save_model(data, model_path):
  124. try:
  125. df = pd.DataFrame(data['dataSet'])
  126. # X = df[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9', 'factor10', 'factor11', 'factor12','factor13', 'factor14', 'factor15', 'factor16','factor17', 'factor18', 'factor19', 'factor20', 'factor21', 'factor22', 'factor23', 'factor24','factor25', 'factor26', 'factor27', 'factor28', 'factor29', 'factor30', 'factor31', 'factor32', 'factor33', 'factor34', 'factor35', 'factor36','factor37', 'factor38', 'factor39', 'factor40',
  127. # 'factor41', 'factor42', 'factor43', 'factor44']].apply(lambda x: ' '.join(x.astype(str)), axis=1)
  128. X = df[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9', 'factor10',
  129. 'factor11', 'factor12', 'factor13', 'factor14', 'factor15', 'factor16', 'factor17', 'factor18', 'factor19',
  130. 'factor20', 'factor21', 'factor22', 'factor23', 'factor24', 'factor25', 'factor26', 'factor27', 'factor28',
  131. 'factor29', 'factor30', 'factor31', 'factor32', 'factor33', 'factor34', 'factor35', 'factor36', 'factor37',
  132. 'factor38', 'factor39', 'factor40', 'factor41', 'factor42', 'factor43', 'factor44']]
  133. le_X = LabelEncoder()
  134. # 对X中的每一列应用LabelEncoder
  135. # for column in X.columns:
  136. # X[column] = le_X.fit_transform(X[column])
  137. X = X.apply(le_X.fit_transform)
  138. y = df[['finalGrade']]
  139. le_y = LabelEncoder()
  140. y = le_y.fit_transform(y)
  141. lda = LinearDiscriminantAnalysis()
  142. lda.fit(X, y)
  143. model_data = {
  144. 'model': lda,
  145. 'label_encoder_y': le_y,
  146. 'label_encoder_X': le_X
  147. }
  148. joblib.dump(model_data, model_path+'model_and_encoders.joblib')
  149. return {'code': 200, 'msg': '成功'}
  150. except Exception as e:
  151. return {'code': 500, 'msg': str(e)}
  152. @app.route('/biz/score/train', methods=['POST'])
  153. def train_model():
  154. try:
  155. data = request.json
  156. model_path = data.get('modelPath')
  157. result = train_and_save_model(data, model_path)
  158. return jsonify(result)
  159. except Exception as e:
  160. return jsonify({'code': 500, 'msg': str(e)})
  161. #验证算法API
  162. # 创建Flask应用
  163. # app = Flask(__name__)
  164. # 定义训练模型的函数
  165. # def train_model2(data):
  166. # try:
  167. # X = data.iloc[:, 2:-1]
  168. # y = data.iloc[:, -1]
  169. # le = LabelEncoder()
  170. # X = X.apply(le.fit_transform)
  171. # y = le.fit_transform(y)
  172. # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  173. # lda = LinearDiscriminantAnalysis()
  174. # lda.fit(X_train, y_train)
  175. # y_pred = lda.predict(X_test)
  176. # y_pred_text = le.inverse_transform(y_pred)
  177. # accuracy = accuracy_score(y_test, y_pred)
  178. # recall = recall_score(y_test, y_pred, average='macro')
  179. # return accuracy, recall, X_test,X_train,y_pred_text
  180. # except Exception as e:
  181. # raise Exception(f"Failed to make Scoring: {str(e)}")
  182. # # 定义路由和处理函数
  183. # @app.route('/biz/scoretaskdetail/verification', methods=['POST'])
  184. # def verification():
  185. # try:
  186. # # 从请求中获取JSON数据
  187. # input_data = request.get_json()
  188. # # 将JSON数据转换为DataFrame
  189. # data = pd.DataFrame(input_data['dataSet'])
  190. # # 训练模型
  191. # accuracy, recall, X_test,X_train,y_pred_text= train_model2(data)
  192. # data['trained'] = 0
  193. # data['tested'] = 0
  194. # data.loc[data.index.isin(X_test.index), 'tested'] = 1
  195. # # 标记训练数据
  196. # data.loc[data.index.isin(X_train.index), 'trained'] = 1
  197. # # 预测结果
  198. # data.loc[data.index.isin(X_test.index), 'calculatedGrade'] = y_pred_text
  199. # output_data = {}
  200. # output_data['code'] = 200
  201. # output_data['msg'] = '成功'
  202. # output_data['data'] = {}
  203. # output_data['data']['accuracyRate'] = round(accuracy*100, 2)
  204. # output_data['data']['recallRate'] = round(recall*100, 2)
  205. # output_data['data']['dataSet'] = data.to_dict('records')
  206. # # 返回JSON格式的响应
  207. # return jsonify(output_data)
  208. # except Exception as e:
  209. # raise Exception(f"Failed to make Scoring: {str(e)}")
  210. # 定义训练模型的函数
  211. def train_model2(data):
  212. try:
  213. # X = data[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9', 'factor10', 'factor11', 'factor12','factor13', 'factor14', 'factor15', 'factor16','factor17', 'factor18', 'factor19', 'factor20', 'factor21', 'factor22', 'factor23', 'factor24','factor25', 'factor26', 'factor27', 'factor28', 'factor29', 'factor30', 'factor31', 'factor32', 'factor33', 'factor34', 'factor35', 'factor36','factor37', 'factor38', 'factor39', 'factor40',
  214. # 'factor41', 'factor42', 'factor43', 'factor44']].apply(lambda x: ' '.join(x.astype(str)), axis=1)
  215. X = data[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9',
  216. 'factor10', 'factor11', 'factor12', 'factor13', 'factor14', 'factor15', 'factor16', 'factor17',
  217. 'factor18', 'factor19', 'factor20', 'factor21', 'factor22', 'factor23', 'factor24', 'factor25',
  218. 'factor26', 'factor27', 'factor28', 'factor29', 'factor30', 'factor31', 'factor32', 'factor33',
  219. 'factor34', 'factor35', 'factor36', 'factor37', 'factor38', 'factor39', 'factor40',
  220. 'factor41', 'factor42', 'factor43', 'factor44']]
  221. # data = data.dropna(subset=['finalGrade'])
  222. # y = data['finalGrade'].values.ravel() # 将 y 转换为一维数组
  223. y = data[['finalGrade']]
  224. le = LabelEncoder()
  225. X = X.apply(le.fit_transform)
  226. y = le.fit_transform(y)
  227. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=40)
  228. print("Number of samples in X_train:", len(X_train))
  229. print("y_train:", y_train)
  230. lda = LinearDiscriminantAnalysis()
  231. lda.fit(X_train, y_train)
  232. y_pred = lda.predict(X_test)
  233. y_pred_text = le.inverse_transform(y_pred)
  234. accuracy = accuracy_score(y_test, y_pred)
  235. recall = recall_score(y_test, y_pred, average='macro')
  236. return accuracy, recall, X_test,X_train,y_pred_text
  237. except Exception as e:
  238. raise Exception(f"Failed to make Scoring: {str(e)}")
  239. # 定义路由和处理函数
  240. @app.route('/biz/scoretaskdetail/verification', methods=['POST'])
  241. def verification():
  242. try:
  243. # 从请求中获取JSON数据
  244. input_data = request.get_json()
  245. # print(input_data)
  246. # 将JSON数据转换为DataFrame
  247. data = pd.DataFrame(input_data['dataSet'])
  248. # 训练模型
  249. accuracy, recall, X_test,X_train,y_pred_text= train_model2(data)
  250. data['trained'] = 0
  251. data['tested'] = 0
  252. data.loc[data.index.isin(X_test.index), 'tested'] = 1
  253. # 标记训练数据
  254. data.loc[data.index.isin(X_train.index), 'trained'] = 1
  255. # 预测结果
  256. data.loc[data.index.isin(X_test.index), 'calculatedGrade'] = y_pred_text
  257. output_data = {}
  258. output_data['code'] = 200
  259. output_data['msg'] = '成功'
  260. output_data['data'] = {}
  261. output_data['data']['accuracyRate'] = round(accuracy*100, 2)
  262. output_data['data']['recallRate'] = round(recall*100, 2)
  263. output_data['data']['dataSet'] = data.to_dict('records')
  264. # 返回JSON格式的响应
  265. return jsonify(output_data)
  266. except Exception as e:
  267. raise Exception(f"Failed to make Scoring: {str(e)}")
  268. # 运行应用
  269. if __name__ == '__main__':
  270. app.run(debug=True, port=8082, host='0.0.0.0')