recommend_re.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. from flask import Flask, request, jsonify
  2. import pandas as pd
  3. import joblib
  4. from sklearn.model_selection import train_test_split
  5. from sklearn.ensemble import RandomForestClassifier
  6. import json
  7. from sklearn.pipeline import Pipeline
  8. from sklearn.feature_extraction.text import TfidfVectorizer
  9. app = Flask(__name__)
  10. app.config['JSON_AS_ASCII'] = False
  11. app.config['JSONIFY_MIMETYPE'] = "application/json;charset=utf-8" # 指定浏览器渲染的文件类型,和解码格式;
  12. #读取训练好的模型
  13. def load_model(model_path):
  14. try:
  15. # Load the pre-trained model
  16. loaded_model = joblib.load(model_path)
  17. return loaded_model
  18. except Exception as e:
  19. raise Exception(f"Failed to load the model: {str(e)}")
  20. #定义推荐预测函数
  21. def make_recommendations(model, use_scene, search_condition):
  22. try:
  23. # Convert input data to the format used for training
  24. input_data = pd.DataFrame({'useScene': [use_scene], 'searchCondition': [search_condition]})
  25. X_input = input_data[['useScene', 'searchCondition']].apply(lambda x: ' '.join(x.astype(str)), axis=1)
  26. # Predict recommendations using the trained model
  27. y_pred = model.predict(X_input)
  28. y_pred_list = y_pred[0].tolist()
  29. # Format the output data
  30. output_data = {
  31. 'useScene': use_scene,
  32. 'searchCondition': search_condition,
  33. 'result1Id': y_pred_list[0],
  34. 'result2Id': y_pred_list[1],
  35. 'result3Id': y_pred_list[2],
  36. 'result4Id': y_pred_list[3],
  37. 'result5Id': y_pred_list[4]
  38. }
  39. return output_data
  40. except Exception as e:
  41. raise Exception(f"Failed to make recommendations: {str(e)}")
  42. #调用推荐预测函数
  43. @app.route('/recommend', methods=['POST'])
  44. def recommend_endpoint():
  45. try:
  46. # Get input parameters from the request
  47. data = request.json
  48. model_path = data.get('modelPath')
  49. use_scene = data.get('useScene')
  50. search_condition = data.get('searchCondition')
  51. # Load the pre-trained model and vectorizer
  52. model = load_model(model_path+'model.pkl')
  53. # Make recommendations
  54. result_data = make_recommendations(model, use_scene, search_condition)
  55. return jsonify({'code': 200, 'msg': '成功', 'data': result_data})
  56. except Exception as e:
  57. return jsonify({'code': 500, 'msg': str(e)})
  58. #训练算法API--完成版
  59. #定义训练和保存模型函数
  60. def train_and_save_model(data, model_path):
  61. try:
  62. # Convert the JSON data to a DataFrame
  63. df = pd.DataFrame(data['dataSet'])
  64. pipeline = Pipeline([
  65. ('vectorizer', TfidfVectorizer()),
  66. ('classifier', RandomForestClassifier(n_estimators=400, random_state=42))
  67. ])
  68. # Extract features and labels
  69. X = df[['useScene', 'searchCondition']].apply(lambda x: ' '.join(x.astype(str)), axis=1)
  70. y = df[['result1Id', 'result2Id', 'result3Id', 'result4Id', 'result5Id']]
  71. pipeline.fit(X, y)
  72. # Save the trained model to a file
  73. joblib.dump(pipeline, model_path+'model.pkl')
  74. return {'code': 200, 'msg': '成功'}
  75. except Exception as e:
  76. return {'code': 500, 'msg': str(e)}
  77. #调用训练和保存模型函数
  78. @app.route('/train', methods=['POST'])
  79. def train_model():
  80. try:
  81. # Get input parameters from the request
  82. data = request.json
  83. model_path = data.get('modelPath')
  84. # Call the function to train and save the model
  85. result = train_and_save_model(data, model_path)
  86. return jsonify(result)
  87. except Exception as e:
  88. return jsonify({'code': 500, 'msg': str(e)})
  89. #验证算法API--完成版2
  90. # 定义训练模型的函数
  91. def train_model2(data):
  92. train_data, test_data = train_test_split(data, test_size=0.3, random_state=48)
  93. X_train = train_data[['useScene', 'searchCondition']].apply(lambda x: ' '.join(x.astype(str)), axis=1)
  94. y_train = train_data[['result1Id', 'result2Id', 'result3Id', 'result4Id', 'result5Id']]
  95. X_test = test_data[['useScene', 'searchCondition']].apply(lambda x: ' '.join(x.astype(str)), axis=1)
  96. y_test = test_data[['result1Id', 'result2Id', 'result3Id', 'result4Id', 'result5Id']]
  97. train_data['trained'] = 1
  98. train_data['tested'] = 0
  99. # 使用CountVectorizer将文本转换为词袋模型
  100. vectorizer = TfidfVectorizer()
  101. X_train_matrix = vectorizer.fit_transform(X_train)
  102. X_test_matrix = vectorizer.transform(X_test)
  103. rf_classifier = RandomForestClassifier(n_estimators=400, random_state=42)
  104. rf_classifier.fit(X_train_matrix, y_train)
  105. return vectorizer, rf_classifier, test_data, y_test, train_data
  106. # 定义预测函数
  107. def predict(vectorizer, rf_classifier, test_data, y_test,train_data):
  108. X_test = test_data[['useScene', 'searchCondition']].apply(lambda x: ' '.join(x.astype(str)), axis=1)
  109. y_pred = rf_classifier.predict(vectorizer.transform(X_test))
  110. y_pred_df = pd.DataFrame(y_pred, columns=['calculate1Id', 'calculate2Id', 'calculate3Id', 'calculate4Id', 'calculate5Id'])
  111. y_pred_df.index = test_data.index
  112. output_df = pd.concat([test_data[['id','useScene','searchCondition','result1Id','result2Id','result3Id','result4Id','result5Id']], y_pred_df], axis=1)
  113. output_df['trained'] = 0 # 0 indicates the record is not in the training set
  114. output_df['tested'] = 1 # 1 indicates the record is in the testing set
  115. full_data = pd.concat([train_data, output_df]).sort_index().reset_index(drop=True)
  116. # 计算准确率和召回率
  117. TP_list = []
  118. TN_list = []
  119. FP_list = []
  120. FN_list = []
  121. for i in range(y_test.shape[0]):
  122. # Count true positives (TP), true negatives (TN), false positives (FP), and false negatives (FN) for each row
  123. y_test_row = y_test.iloc[i, :].astype(str)
  124. y_pred_row = y_pred_df.iloc[i, :].astype(str)
  125. TP = sum((y_test_row).isin(y_pred_row))
  126. TN = sum((y_test_row).isin(y_pred_row) == False)
  127. FP = sum((y_pred_row).isin(y_test_row) == False)
  128. FN = sum((y_test_row).isin(y_pred_row) == False)
  129. TP_list.append(TP)
  130. TN_list.append(TN)
  131. FP_list.append(FP)
  132. FN_list.append(FN)
  133. accuracy_list = [(TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) != 0 else 0 for TP, TN, FP, FN in zip(TP_list, TN_list, FP_list, FN_list)]
  134. recall_list = [TP / (TP + FN) if (TP + FN) != 0 else 0 for TP, FN in zip(TP_list, FN_list)]
  135. # 输出JSON格式的结果
  136. output_data = {}
  137. output_data['code'] = 200
  138. output_data['msg'] = '成功'
  139. output_data['data'] = {}
  140. output_data['data']['accuracyRate'] = round((sum(accuracy_list) / len(accuracy_list))*100, 2)
  141. output_data['data']['recallRate'] = round((sum(recall_list) / len(recall_list))*100, 2)
  142. output_data['data']['dataSet'] = full_data.to_dict('records')
  143. return output_data
  144. # 定义路由和处理函数
  145. @app.route('/verification', methods=['POST'])
  146. def verification():
  147. # 从请求中获取JSON数据
  148. input_data = request.get_json()
  149. # 将JSON数据转换为DataFrame
  150. data = pd.DataFrame(input_data['dataSet'])
  151. # print(data)
  152. # 训练模型
  153. vectorizer, rf_classifier, test_data, y_test,train_data = train_model2(data)
  154. # 预测结果
  155. # print(train_data)
  156. output_data = predict(vectorizer, rf_classifier, test_data, y_test, train_data)
  157. # print(output_data)
  158. # 返回JSON格式的响应
  159. return jsonify(output_data)
  160. # 运行应用
  161. if __name__ == '__main__':
  162. app.run(debug=True, port=8081, host='0.0.0.0')