# from paddlenlp import Taskflow # similarity1 = Taskflow("text_similarity",device_id=3,precision='fp16')##checkRepeatText # from flask import Flask, request, jsonify # import threading # app = Flask(__name__) # # 创建一个锁对象 # lock = threading.Lock() # @app.route('/taskflow/checkRepeatText', methods=['POST']) # def process_request(): # with lock: # data = request.get_json() # # print("data",data) # # 提取文本数据 # text_data = data.get('data', {}).get('text') # # 处理文本数据,例如检查错误等 # # 这里您可以添加实际的逻辑来检查文档错误 # res =similarity1(text_data) # # 示例:简单打印接收到的文本 # # # 返回响应 # return jsonify({"status": "success", "data": res}), 200 # if __name__ == '__main__': # app.run(threaded=True,port=8192) from sentence_transformers import SentenceTransformer, util import itertools from fastapi import FastAPI, Request from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig import uvicorn from fastapi.responses import JSONResponse from pydantic import BaseModel import torch app = FastAPI() model = SentenceTransformer("shibing624/text2vec-base-chinese",device="npu:5") class RequestData(BaseModel): data: dict @app.post("/taskflow/checkRepeatText") async def process_request(request: RequestData): global model # 提取文本数据 text_data = request.data.get('text') a=text_data[0][0] b=text_data[0][1] emb_a = model.encode(a) emb_b = model.encode(b) cos_sim = util.cos_sim(emb_a, emb_b) results = [] results.append({"text1":a,"text2":b,"similarity":cos_sim.item()}) # 返回响应 return JSONResponse(content={"status": "success", "data": results}, status_code=200) @app.post("/taskflow/getRepeatText") async def process_request(request: RequestData): global model # 提取文本数据 text_data = request.data.get('text') allcorpus =text_data[0] #全部文档信息 query=text_data[1] #要查询的文档信息 corpus_embeddings = model.encode(allcorpus, convert_to_tensor=True) top_k = min(4, len(allcorpus)) query_embedding = model.encode(query, convert_to_tensor=True) cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0] top_results = torch.topk(cos_scores, k=top_k) results = [] for score, idx in zip(top_results[0], top_results[1]): print(allcorpus[idx], "(Score: {:.4f})".format(score.item())) results.append({"text1":allcorpus[idx],"text2":query,"similarity":score.item()}) # 返回响应 return JSONResponse(content={"status": "success", "data": results}, status_code=200) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8192)