1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- import requests
- from nltk.tokenize import sent_tokenize
- from fastapi import FastAPI
- from return_result import get_ner_result
- from return_result import get_re_result
- from pdfReader import extract_text_from_pdf
- from pdfReader import split_sentences
- from threading import Lock
- app = FastAPI()
- lock = Lock()
- @app.post("/extractKnowledge")
- async def process_doc(data: dict):
- # 尝试获取线程锁,如果已经被其他线程获取,则等待
- locked = lock.acquire(blocking=False)
- if not locked:
- return {"code": 500,
- "msg": "Task is already running. Please try again later."}
- try:
- # 在锁的保护下执行任务
- doc_address = data.get("docAddress")
- if not doc_address.endswith(".pdf"):
- return {"code": 500,
- "msg": "Invalid file format"}
-
- text = extract_text_from_pdf(doc_address)
- sentences = split_sentences(text)
- results = []
- for raw_text in sentences:
- entities = get_ner_result(raw_text)
- result = get_re_result(entities, raw_text)
- for i in result:
- results.append(i)
- jsonRes = {
- "docInfo": {
- "docId": data.get("docId"),
- "taskId": data.get("taskId"),
- "subTaskId": data.get("subTaskId"),
- "docName": data.get("docName"),
- "docAddress":data.get("docAddress")
- },
- "knowledgeList": results,
- "code": 200,
- "msg": "success"
- }
-
- url = "http://example.com/kg/saveKnowledge"
- headers = {"Content-Type": "application/json"}
- data = {"key1": "value1", "key2": "value2"} # 替换为您的json数据
- try:
- response = requests.post(url, headers=headers, data=jsonRes)
- response.raise_for_status() # 如果响应状态码不是 200,则会抛出异常
- if response.json()["code"] != 200:
- result = {"code": 500, "msg": response.json()["msg"]}
- else:
- result = {"code": 200, "msg": "成功"}
- except requests.exceptions.RequestException as e:
- result = {"code": 500, "msg": str(e)}
- return result
-
- except requests.exceptions.RequestException as e:
- return {"code": 500, "msg": str(e)}
- finally:
- # 释放线程锁
- lock.release()
-
-
- if __name__ == '__main__':
- import uvicorn
- uvicorn.run(app=app,
- host="0.0.0.0",
- port=9999,
- workers=1)
|