|
@@ -0,0 +1,183 @@
|
|
|
+#打分算法API
|
|
|
+from flask import Flask, request, jsonify
|
|
|
+import pandas as pd
|
|
|
+import joblib
|
|
|
+
|
|
|
+from sklearn.preprocessing import LabelEncoder # 用来将文本标签转换为数值
|
|
|
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis # 用来创建线性判别分析分类器
|
|
|
+from sklearn.model_selection import train_test_split
|
|
|
+
|
|
|
+from sklearn.metrics import accuracy_score
|
|
|
+from sklearn.metrics import recall_score
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+app = Flask(__name__)
|
|
|
+app.config['JSON_AS_ASCII'] = False
|
|
|
+app.config['JSONIFY_MIMETYPE'] = "application/json;charset=utf-8" # 指定浏览器渲染的文件类型,和解码格式;
|
|
|
+
|
|
|
+#定义读取训练好的模型函数
|
|
|
+def load_model(model_path):
|
|
|
+ try:
|
|
|
+ loaded_model = joblib.load(model_path)
|
|
|
+ return loaded_model
|
|
|
+ except Exception as e:
|
|
|
+ raise Exception(f"Failed to load the model: {str(e)}")
|
|
|
+
|
|
|
+
|
|
|
+#定义预测结果的函数
|
|
|
+def make_Scoring(model_data, data):
|
|
|
+ try:
|
|
|
+
|
|
|
+ # df = pd.DataFrame(data['dataSet'])
|
|
|
+ df = pd.DataFrame(data['dataSet'], index=range(len(data['dataSet'])))
|
|
|
+ # X = df[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9', 'factor10', 'factor11', 'factor12','factor13', 'factor14', 'factor15', 'factor16','factor17', 'factor18', 'factor19', 'factor20', 'factor21', 'factor22', 'factor23', 'factor24','factor25', 'factor26', 'factor27', 'factor28', 'factor29', 'factor30', 'factor31', 'factor32', 'factor33', 'factor34', 'factor35', 'factor36','factor37', 'factor38', 'factor39', 'factor40',
|
|
|
+ # 'factor41', 'factor42', 'factor43', 'factor44']].apply(lambda x: ' '.join(x.astype(str)), axis=1)
|
|
|
+ X = df[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9',
|
|
|
+ 'factor10', 'factor11', 'factor12', 'factor13', 'factor14', 'factor15', 'factor16', 'factor17',
|
|
|
+ 'factor18', 'factor19', 'factor20', 'factor21', 'factor22', 'factor23', 'factor24', 'factor25',
|
|
|
+ 'factor26', 'factor27', 'factor28', 'factor29', 'factor30', 'factor31', 'factor32', 'factor33',
|
|
|
+ 'factor34', 'factor35', 'factor36', 'factor37', 'factor38', 'factor39', 'factor40',
|
|
|
+ 'factor41', 'factor42', 'factor43', 'factor44']]
|
|
|
+ le_y = model_data['label_encoder_y']
|
|
|
+ le_X = model_data['label_encoder_X']
|
|
|
+ X = X.apply(le_X.transform)
|
|
|
+
|
|
|
+ loaded_lda = model_data['model']
|
|
|
+ y_pred = loaded_lda.predict(X)
|
|
|
+ new_y_pred = le_y.inverse_transform(y_pred)
|
|
|
+ # new_y_pred = le_y.inverse_transform(y_pred).tolist()
|
|
|
+ data['dataSet']['finalGrade'] = new_y_pred[0]
|
|
|
+
|
|
|
+ return data
|
|
|
+ except Exception as e:
|
|
|
+ raise Exception(f"Failed to make Scoring: {str(e)}")
|
|
|
+#读取训练好的模型
|
|
|
+@app.route('/biz/score/marking', methods=['POST'])
|
|
|
+def Scoring_endpoint():
|
|
|
+ try:
|
|
|
+ # Get input parameters from the request
|
|
|
+ data = request.json
|
|
|
+ model_path = data.get('modelPath')
|
|
|
+ # Load the pre-trained model and vectorizer
|
|
|
+ model_data = joblib.load(model_path+'model_and_encoders.joblib')
|
|
|
+ # Make recommendations
|
|
|
+ result_data = make_Scoring(model_data, data)
|
|
|
+ return jsonify({'code': 200, 'msg': '成功', 'data': result_data})
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ return jsonify({'code': 500, 'msg': str(e)})
|
|
|
+
|
|
|
+#定义训练并保存模型函数
|
|
|
+def train_and_save_model(data, model_path):
|
|
|
+ try:
|
|
|
+ df = pd.DataFrame(data['dataSet'])
|
|
|
+ # X = df[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9', 'factor10', 'factor11', 'factor12','factor13', 'factor14', 'factor15', 'factor16','factor17', 'factor18', 'factor19', 'factor20', 'factor21', 'factor22', 'factor23', 'factor24','factor25', 'factor26', 'factor27', 'factor28', 'factor29', 'factor30', 'factor31', 'factor32', 'factor33', 'factor34', 'factor35', 'factor36','factor37', 'factor38', 'factor39', 'factor40',
|
|
|
+ # 'factor41', 'factor42', 'factor43', 'factor44']].apply(lambda x: ' '.join(x.astype(str)), axis=1)
|
|
|
+
|
|
|
+
|
|
|
+ X = df[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9', 'factor10',
|
|
|
+ 'factor11', 'factor12', 'factor13', 'factor14', 'factor15', 'factor16', 'factor17', 'factor18', 'factor19',
|
|
|
+ 'factor20', 'factor21', 'factor22', 'factor23', 'factor24', 'factor25', 'factor26', 'factor27', 'factor28',
|
|
|
+ 'factor29', 'factor30', 'factor31', 'factor32', 'factor33', 'factor34', 'factor35', 'factor36', 'factor37',
|
|
|
+ 'factor38', 'factor39', 'factor40', 'factor41', 'factor42', 'factor43', 'factor44']]
|
|
|
+
|
|
|
+ le_X = LabelEncoder()
|
|
|
+
|
|
|
+ # 对X中的每一列应用LabelEncoder
|
|
|
+ # for column in X.columns:
|
|
|
+ # X[column] = le_X.fit_transform(X[column])
|
|
|
+
|
|
|
+ X = X.apply(le_X.fit_transform)
|
|
|
+ y = df[['finalGrade']]
|
|
|
+ le_y = LabelEncoder()
|
|
|
+ y = le_y.fit_transform(y)
|
|
|
+ lda = LinearDiscriminantAnalysis()
|
|
|
+ lda.fit(X, y)
|
|
|
+ model_data = {
|
|
|
+ 'model': lda,
|
|
|
+ 'label_encoder_y': le_y,
|
|
|
+ 'label_encoder_X': le_X
|
|
|
+ }
|
|
|
+ joblib.dump(model_data, model_path+'model_and_encoders.joblib')
|
|
|
+ return {'code': 200, 'msg': '成功'}
|
|
|
+ except Exception as e:
|
|
|
+ return {'code': 500, 'msg': str(e)}
|
|
|
+
|
|
|
+#调用训练并保存函数
|
|
|
+@app.route('/biz/score/train', methods=['POST'])
|
|
|
+def train_model():
|
|
|
+ try:
|
|
|
+ data = request.json
|
|
|
+ model_path = data.get('modelPath')
|
|
|
+ result = train_and_save_model(data, model_path)
|
|
|
+ return jsonify(result)
|
|
|
+ except Exception as e:
|
|
|
+ return jsonify({'code': 500, 'msg': str(e)})
|
|
|
+
|
|
|
+
|
|
|
+# 定义训练模型的函数
|
|
|
+def train_model2(data):
|
|
|
+ try:
|
|
|
+
|
|
|
+ # X = data[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9', 'factor10', 'factor11', 'factor12','factor13', 'factor14', 'factor15', 'factor16','factor17', 'factor18', 'factor19', 'factor20', 'factor21', 'factor22', 'factor23', 'factor24','factor25', 'factor26', 'factor27', 'factor28', 'factor29', 'factor30', 'factor31', 'factor32', 'factor33', 'factor34', 'factor35', 'factor36','factor37', 'factor38', 'factor39', 'factor40',
|
|
|
+ # 'factor41', 'factor42', 'factor43', 'factor44']].apply(lambda x: ' '.join(x.astype(str)), axis=1)
|
|
|
+ X = data[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9',
|
|
|
+ 'factor10', 'factor11', 'factor12', 'factor13', 'factor14', 'factor15', 'factor16', 'factor17',
|
|
|
+ 'factor18', 'factor19', 'factor20', 'factor21', 'factor22', 'factor23', 'factor24', 'factor25',
|
|
|
+ 'factor26', 'factor27', 'factor28', 'factor29', 'factor30', 'factor31', 'factor32', 'factor33',
|
|
|
+ 'factor34', 'factor35', 'factor36', 'factor37', 'factor38', 'factor39', 'factor40',
|
|
|
+ 'factor41', 'factor42', 'factor43', 'factor44']]
|
|
|
+ # data = data.dropna(subset=['finalGrade'])
|
|
|
+ # y = data['finalGrade'].values.ravel() # 将 y 转换为一维数组
|
|
|
+ y = data[['finalGrade']]
|
|
|
+
|
|
|
+ le = LabelEncoder()
|
|
|
+ X = X.apply(le.fit_transform)
|
|
|
+ y = le.fit_transform(y)
|
|
|
+
|
|
|
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=40)
|
|
|
+ print("Number of samples in X_train:", len(X_train))
|
|
|
+ print("y_train:", y_train)
|
|
|
+ lda = LinearDiscriminantAnalysis()
|
|
|
+ lda.fit(X_train, y_train)
|
|
|
+ y_pred = lda.predict(X_test)
|
|
|
+ y_pred_text = le.inverse_transform(y_pred)
|
|
|
+ accuracy = accuracy_score(y_test, y_pred)
|
|
|
+ recall = recall_score(y_test, y_pred, average='macro')
|
|
|
+ return accuracy, recall, X_test,X_train,y_pred_text
|
|
|
+ except Exception as e:
|
|
|
+ raise Exception(f"Failed to make Scoring: {str(e)}")
|
|
|
+
|
|
|
+# 最终的验证算法API
|
|
|
+@app.route('/biz/scoretaskdetail/verification', methods=['POST'])
|
|
|
+def verification():
|
|
|
+ try:
|
|
|
+ # 从请求中获取JSON数据
|
|
|
+ input_data = request.get_json()
|
|
|
+ # print(input_data)
|
|
|
+ # 将JSON数据转换为DataFrame
|
|
|
+ data = pd.DataFrame(input_data['dataSet'])
|
|
|
+ # 训练模型
|
|
|
+ accuracy, recall, X_test,X_train,y_pred_text= train_model2(data)
|
|
|
+ data['trained'] = 0
|
|
|
+ data['tested'] = 0
|
|
|
+ data.loc[data.index.isin(X_test.index), 'tested'] = 1
|
|
|
+ # 标记训练数据
|
|
|
+ data.loc[data.index.isin(X_train.index), 'trained'] = 1
|
|
|
+ # 预测结果
|
|
|
+ data.loc[data.index.isin(X_test.index), 'calculatedGrade'] = y_pred_text
|
|
|
+ output_data = {}
|
|
|
+ output_data['code'] = 200
|
|
|
+ output_data['msg'] = '成功'
|
|
|
+ output_data['data'] = {}
|
|
|
+ output_data['data']['accuracyRate'] = round(accuracy*100, 2)
|
|
|
+ output_data['data']['recallRate'] = round(recall*100, 2)
|
|
|
+ output_data['data']['dataSet'] = data.to_dict('records')
|
|
|
+ # 返回JSON格式的响应
|
|
|
+ return jsonify(output_data)
|
|
|
+ except Exception as e:
|
|
|
+ raise Exception(f"Failed to make Scoring: {str(e)}")
|
|
|
+# 运行应用
|
|
|
+if __name__ == '__main__':
|
|
|
+ app.run(debug=True, port=8082, host='0.0.0.0')
|