123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294 |
- #打分算法API
- from flask import Flask, request, jsonify
- import pandas as pd
- import joblib
- from sklearn.preprocessing import LabelEncoder # 用来将文本标签转换为数值
- from sklearn.discriminant_analysis import LinearDiscriminantAnalysis # 用来创建线性判别分析分类器
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import accuracy_score
- from sklearn.metrics import recall_score
- app = Flask(__name__)
- app.config['JSON_AS_ASCII'] = False
- app.config['JSONIFY_MIMETYPE'] = "application/json;charset=utf-8" # 指定浏览器渲染的文件类型,和解码格式;
- # def load_model(model_path):
- # try:
- # loaded_model = joblib.load(model_path)
- # return loaded_model
- # except Exception as e:
- # raise Exception(f"Failed to load the model: {str(e)}")
- #
- # def make_Scoring(model_data, data):
- # try:
- # df = pd.DataFrame(data['data'])
- # X = df.iloc[:, 1:-1]
- # le_y = model_data['label_encoder_y']
- # le_X = model_data['label_encoder_X']
- # X = X.apply(le_X.transform)
- # loaded_lda = model_data['model']
- # y_pred = loaded_lda.predict(X)
- # new_y_pred = le_y.inverse_transform(y_pred)
- # data['finalGrade'] = new_y_pred
- # return data
- # except Exception as e:
- # raise Exception(f"Failed to make Scoring: {str(e)}")
- #
- # @app.route('/biz/score/marking', methods=['POST'])
- # def Scoring_endpoint():
- # try:
- # # Get input parameters from the request
- # data = request.json
- # model_path = data.get('modelPath')
- # # Load the pre-trained model and vectorizer
- # model_data = joblib.load(model_path+'model_and_encoders.joblib')
- # # Make recommendations
- # result_data = make_Scoring(model_data, data)
- # return jsonify({'code': 200, 'msg': '成功', 'data': result_data})
- #
- # except Exception as e:
- # return jsonify({'code': 500, 'msg': str(e)})
- def load_model(model_path):
- try:
- loaded_model = joblib.load(model_path)
- return loaded_model
- except Exception as e:
- raise Exception(f"Failed to load the model: {str(e)}")
- def make_Scoring(model_data, data):
- try:
- # df = pd.DataFrame(data['dataSet'])
- df = pd.DataFrame(data['dataSet'], index=range(len(data['dataSet'])))
- # X = df[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9', 'factor10', 'factor11', 'factor12','factor13', 'factor14', 'factor15', 'factor16','factor17', 'factor18', 'factor19', 'factor20', 'factor21', 'factor22', 'factor23', 'factor24','factor25', 'factor26', 'factor27', 'factor28', 'factor29', 'factor30', 'factor31', 'factor32', 'factor33', 'factor34', 'factor35', 'factor36','factor37', 'factor38', 'factor39', 'factor40',
- # 'factor41', 'factor42', 'factor43', 'factor44']].apply(lambda x: ' '.join(x.astype(str)), axis=1)
- X = df[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9',
- 'factor10', 'factor11', 'factor12', 'factor13', 'factor14', 'factor15', 'factor16', 'factor17',
- 'factor18', 'factor19', 'factor20', 'factor21', 'factor22', 'factor23', 'factor24', 'factor25',
- 'factor26', 'factor27', 'factor28', 'factor29', 'factor30', 'factor31', 'factor32', 'factor33',
- 'factor34', 'factor35', 'factor36', 'factor37', 'factor38', 'factor39', 'factor40',
- 'factor41', 'factor42', 'factor43', 'factor44']]
- le_y = model_data['label_encoder_y']
- le_X = model_data['label_encoder_X']
- X = X.apply(le_X.transform)
- loaded_lda = model_data['model']
- y_pred = loaded_lda.predict(X)
- new_y_pred = le_y.inverse_transform(y_pred)
- # new_y_pred = le_y.inverse_transform(y_pred).tolist()
- data['dataSet']['finalGrade'] = new_y_pred[0]
- return data
- except Exception as e:
- raise Exception(f"Failed to make Scoring: {str(e)}")
- @app.route('/biz/score/marking', methods=['POST'])
- def Scoring_endpoint():
- try:
- # Get input parameters from the request
- data = request.json
- model_path = data.get('modelPath')
- # Load the pre-trained model and vectorizer
- model_data = joblib.load(model_path+'model_and_encoders.joblib')
- # Make recommendations
- result_data = make_Scoring(model_data, data)
- return jsonify({'code': 200, 'msg': '成功', 'data': result_data})
- except Exception as e:
- return jsonify({'code': 500, 'msg': str(e)})
- # def train_and_save_model(data, model_path):
- # print(data)
- # try:
- # df = pd.DataFrame(data['dataSet'])
- # X = df.iloc[:, 2:-1]
- # # y = df.iloc[:, -1]
- # y = data.iloc[:,-1]
- # le_X = LabelEncoder()
- # X = X.apply(le_X.fit_transform)
- # le_y = LabelEncoder()
- # y = le_y.fit_transform(y)
- # lda = LinearDiscriminantAnalysis()
- # lda.fit(X, y)
- # model_data = {
- # 'model': lda,
- # 'label_encoder_y': le_y,
- # 'label_encoder_X': le_X
- # }
- # joblib.dump(model_data, model_path+'model_and_encoders.joblib')
- # return {'code': 200, 'msg': '成功'}
- # except Exception as e:
- # return {'code': 500, 'msg': str(e)}
- #
- # @app.route('/biz/score/train', methods=['POST'])
- # def train_model():
- # try:
- # data = request.json
- # model_path = data.get('modelPath')
- # result = train_and_save_model(data, model_path)
- # return jsonify(result)
- # except Exception as e:
- # return jsonify({'code': 500, 'msg': str(e)})
- def train_and_save_model(data, model_path):
- try:
- df = pd.DataFrame(data['dataSet'])
- # X = df[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9', 'factor10', 'factor11', 'factor12','factor13', 'factor14', 'factor15', 'factor16','factor17', 'factor18', 'factor19', 'factor20', 'factor21', 'factor22', 'factor23', 'factor24','factor25', 'factor26', 'factor27', 'factor28', 'factor29', 'factor30', 'factor31', 'factor32', 'factor33', 'factor34', 'factor35', 'factor36','factor37', 'factor38', 'factor39', 'factor40',
- # 'factor41', 'factor42', 'factor43', 'factor44']].apply(lambda x: ' '.join(x.astype(str)), axis=1)
- X = df[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9', 'factor10',
- 'factor11', 'factor12', 'factor13', 'factor14', 'factor15', 'factor16', 'factor17', 'factor18', 'factor19',
- 'factor20', 'factor21', 'factor22', 'factor23', 'factor24', 'factor25', 'factor26', 'factor27', 'factor28',
- 'factor29', 'factor30', 'factor31', 'factor32', 'factor33', 'factor34', 'factor35', 'factor36', 'factor37',
- 'factor38', 'factor39', 'factor40', 'factor41', 'factor42', 'factor43', 'factor44']]
- le_X = LabelEncoder()
- # 对X中的每一列应用LabelEncoder
- # for column in X.columns:
- # X[column] = le_X.fit_transform(X[column])
- X = X.apply(le_X.fit_transform)
- y = df[['finalGrade']]
- le_y = LabelEncoder()
- y = le_y.fit_transform(y)
- lda = LinearDiscriminantAnalysis()
- lda.fit(X, y)
- model_data = {
- 'model': lda,
- 'label_encoder_y': le_y,
- 'label_encoder_X': le_X
- }
- joblib.dump(model_data, model_path+'model_and_encoders.joblib')
- return {'code': 200, 'msg': '成功'}
- except Exception as e:
- return {'code': 500, 'msg': str(e)}
- @app.route('/biz/score/train', methods=['POST'])
- def train_model():
- try:
- data = request.json
- model_path = data.get('modelPath')
- result = train_and_save_model(data, model_path)
- return jsonify(result)
- except Exception as e:
- return jsonify({'code': 500, 'msg': str(e)})
- #验证算法API
- # 创建Flask应用
- # app = Flask(__name__)
- # 定义训练模型的函数
- # def train_model2(data):
- # try:
- # X = data.iloc[:, 2:-1]
- # y = data.iloc[:, -1]
- # le = LabelEncoder()
- # X = X.apply(le.fit_transform)
- # y = le.fit_transform(y)
- # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
- # lda = LinearDiscriminantAnalysis()
- # lda.fit(X_train, y_train)
- # y_pred = lda.predict(X_test)
- # y_pred_text = le.inverse_transform(y_pred)
- # accuracy = accuracy_score(y_test, y_pred)
- # recall = recall_score(y_test, y_pred, average='macro')
- # return accuracy, recall, X_test,X_train,y_pred_text
- # except Exception as e:
- # raise Exception(f"Failed to make Scoring: {str(e)}")
- # # 定义路由和处理函数
- # @app.route('/biz/scoretaskdetail/verification', methods=['POST'])
- # def verification():
- # try:
- # # 从请求中获取JSON数据
- # input_data = request.get_json()
- # # 将JSON数据转换为DataFrame
- # data = pd.DataFrame(input_data['dataSet'])
- # # 训练模型
- # accuracy, recall, X_test,X_train,y_pred_text= train_model2(data)
- # data['trained'] = 0
- # data['tested'] = 0
- # data.loc[data.index.isin(X_test.index), 'tested'] = 1
- # # 标记训练数据
- # data.loc[data.index.isin(X_train.index), 'trained'] = 1
- # # 预测结果
- # data.loc[data.index.isin(X_test.index), 'calculatedGrade'] = y_pred_text
- # output_data = {}
- # output_data['code'] = 200
- # output_data['msg'] = '成功'
- # output_data['data'] = {}
- # output_data['data']['accuracyRate'] = round(accuracy*100, 2)
- # output_data['data']['recallRate'] = round(recall*100, 2)
- # output_data['data']['dataSet'] = data.to_dict('records')
- # # 返回JSON格式的响应
- # return jsonify(output_data)
- # except Exception as e:
- # raise Exception(f"Failed to make Scoring: {str(e)}")
- # 定义训练模型的函数
- def train_model2(data):
- try:
- # X = data[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9', 'factor10', 'factor11', 'factor12','factor13', 'factor14', 'factor15', 'factor16','factor17', 'factor18', 'factor19', 'factor20', 'factor21', 'factor22', 'factor23', 'factor24','factor25', 'factor26', 'factor27', 'factor28', 'factor29', 'factor30', 'factor31', 'factor32', 'factor33', 'factor34', 'factor35', 'factor36','factor37', 'factor38', 'factor39', 'factor40',
- # 'factor41', 'factor42', 'factor43', 'factor44']].apply(lambda x: ' '.join(x.astype(str)), axis=1)
- X = data[['factor1', 'factor2', 'factor3', 'factor4', 'factor5', 'factor6', 'factor7', 'factor8', 'factor9',
- 'factor10', 'factor11', 'factor12', 'factor13', 'factor14', 'factor15', 'factor16', 'factor17',
- 'factor18', 'factor19', 'factor20', 'factor21', 'factor22', 'factor23', 'factor24', 'factor25',
- 'factor26', 'factor27', 'factor28', 'factor29', 'factor30', 'factor31', 'factor32', 'factor33',
- 'factor34', 'factor35', 'factor36', 'factor37', 'factor38', 'factor39', 'factor40',
- 'factor41', 'factor42', 'factor43', 'factor44']]
- # data = data.dropna(subset=['finalGrade'])
- # y = data['finalGrade'].values.ravel() # 将 y 转换为一维数组
- y = data[['finalGrade']]
- le = LabelEncoder()
- X = X.apply(le.fit_transform)
- y = le.fit_transform(y)
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=40)
- print("Number of samples in X_train:", len(X_train))
- print("y_train:", y_train)
- lda = LinearDiscriminantAnalysis()
- lda.fit(X_train, y_train)
- y_pred = lda.predict(X_test)
- y_pred_text = le.inverse_transform(y_pred)
- accuracy = accuracy_score(y_test, y_pred)
- recall = recall_score(y_test, y_pred, average='macro')
- return accuracy, recall, X_test,X_train,y_pred_text
- except Exception as e:
- raise Exception(f"Failed to make Scoring: {str(e)}")
- # 定义路由和处理函数
- @app.route('/biz/scoretaskdetail/verification', methods=['POST'])
- def verification():
- try:
- # 从请求中获取JSON数据
- input_data = request.get_json()
- # print(input_data)
- # 将JSON数据转换为DataFrame
- data = pd.DataFrame(input_data['dataSet'])
- # 训练模型
- accuracy, recall, X_test,X_train,y_pred_text= train_model2(data)
- data['trained'] = 0
- data['tested'] = 0
- data.loc[data.index.isin(X_test.index), 'tested'] = 1
- # 标记训练数据
- data.loc[data.index.isin(X_train.index), 'trained'] = 1
- # 预测结果
- data.loc[data.index.isin(X_test.index), 'calculatedGrade'] = y_pred_text
- output_data = {}
- output_data['code'] = 200
- output_data['msg'] = '成功'
- output_data['data'] = {}
- output_data['data']['accuracyRate'] = round(accuracy*100, 2)
- output_data['data']['recallRate'] = round(recall*100, 2)
- output_data['data']['dataSet'] = data.to_dict('records')
- # 返回JSON格式的响应
- return jsonify(output_data)
- except Exception as e:
- raise Exception(f"Failed to make Scoring: {str(e)}")
- # 运行应用
- if __name__ == '__main__':
- app.run(debug=True, port=8082, host='0.0.0.0')
|