From 6a406ec64e13dde96c7069d763883c1f5c9a3b6c Mon Sep 17 00:00:00 2001
From: zhouhaibin <zhouhaibin@qq.com>
Date: Thu, 17 Oct 2024 16:12:25 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=96=87=E4=BB=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 checkCompanyName.py   |  92 ++++----
 checkDocumentError.py |  90 ++++----
 checkPlaceName.py     |  13 +-
 checkRepeatText.py    | 190 ++++++++++------
 checkTitleName.py     |  85 ++++----
 daijian方案.py      | 489 +++++++++++++++++++++++++++++++++---------
 main.py               | 444 ++++++++++++++++++++++----------------
 myLogger.py           | 169 ++++-----------
 8 files changed, 963 insertions(+), 609 deletions(-)

diff --git a/checkCompanyName.py b/checkCompanyName.py
index 4d2f1fd..1735ff2 100644
--- a/checkCompanyName.py
+++ b/checkCompanyName.py
@@ -8,9 +8,10 @@ import math
 from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
 from docx.opc.oxml import parse_xml
 import requests
-from myLogger import outLog
+# from myLogger import outLog
 import time
 
+
 def load_from_xml_v2(baseURI, rels_item_xml):
     """
     Return |_SerializedRelationships| instance loaded with the
@@ -31,9 +32,9 @@ _SerializedRelationships.load_from_xml = load_from_xml_v2
 
 import logging
 
-outLog.logger = logging.getLogger("checkCompanyName")
-userLog=None
-prompt ='''
+# outLog.logger = logging.getLogger("checkCompanyName")
+userLog = None
+prompt = '''
 .根据上述文本判断，是否为具体的公司或组织名称，你可以使用工具利用互联网查询，
 你只能在[具体的公司或组织名称,公益组织,简称,统称,泛化组织,政府单位,机关单位,学校，行业类型，其他]选项中选择答案,
 回答格式[{“companyName”：“名称”,"回答":"答案"}，{“companyName”：“名称”,"回答":"答案"}]，不做过多的解释,严格按回答格式作答;
@@ -54,8 +55,8 @@ def getDocxToTextAll(name):
     docxPath = name
     loopCount = 0
     while True:
-        loopCount+=1
-        if(loopCount>=15):
+        loopCount += 1
+        if (loopCount >= 60):
             raise Exception("文档读取超时，或文档存在问题无法读取")
             break
         try:
@@ -76,17 +77,16 @@ def getDocxToTextAll(name):
             words.append(text)
     # 将所有段落文本拼接成一个字符串，并用换行符分隔
     text = '\n'.join(words)
-    # userLog.info("checkCompanyName----保存文件")
     # 将文本写入txt文件
     with open("checkCompanyName.txt", 'w', encoding='utf-8') as txt_file:
         txt_file.write(text)
 
 
 def companyNameTask(text):
-    yield "文档公司或组织名称检查---启动中...."
-    userLog.info("checkCompanyName----启动中....")
-    batchNum = 20
-    sentences = re.split(r'[。\n]', text)
+    yield "文档公司或组织名称检查---文档解析中...."
+    userLog.info("文档公司或组织名称检查---任务开始")
+    batchNum = 5
+    sentences = re.split(r'[、，。\n]', text)
     # 去掉空字符
     sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
     # 计算总字符数
@@ -101,19 +101,19 @@ def companyNameTask(text):
     # 打印每一份的内容
     for i, chunk in enumerate(chunks):
         yield f"文档公司或组织名称检查---文档解析进度:{i + 1}/{num_chunks}"
-        userLog.info(f"checkCompanyName----文档解析进度:{i + 1}/{num_chunks}")
         try:
-            wenBen = ".".join(chunk)
-            url = "http://0.0.0.0:8191/taskflow/checkPlaceName"
+            # wenBen = ".".join(chunk)
+            url = "http://0.0.0.0:8191/taskflow/checkPlaceNameServer"
             headers = {"Content-Type": "application/json"}
             data = {
                 "data": {
-                    "text": wenBen,
+                    "text": chunk,
+                    # "text":wenBen
                 }
             }
             r = requests.post(url=url, headers=headers, data=json.dumps(data))
             res = json.loads(r.text)
-            # userLog.info(res)
+            res = res["data"]
             # print(res)
         except Exception as e:
             userLog.warning(chunk)
@@ -121,44 +121,52 @@ def companyNameTask(text):
             userLog.warning(e)
             return
         isplace = False
-        for zuhe in res["result"]:
+
+        # for zuhe in res:
+        #     # 上一个的地名,这一个还是地名，就和上一个相加代替这个
+        #     if isplace:
+        #         name = placeList[len(placeList) - 1]
+        #         if zuhe[1].find("组织机构类") >= 0:  # or zuhe[1] == "ns"
+        #             isplace = True
+        #             new_text = zuhe[0].replace("\n", "")
+        #             placeList[len(placeList) - 1] = name + new_text
+        #             continue
+        #     if zuhe[1].find("组织机构类") >= 0:
+        #         isplace = True
+        #         new_text = zuhe[0].replace("\n", "")
+        #         placeList.append(new_text)
+        #     else:
+        #         isplace = False
+        ##案例[[('目前', 'TIME'), ('江北区历史文化档案馆', 'ORG')], [('宁波国研简直，并且在东软', 'ORG'), ('宁波市北仑区教育局', 'ORG'), ('国研信息', 'ORG'), ('浙江省', 'LOC'), ('宁波市金凤区', 'LOC'), ('金凤区', 'LOC')]]
+        for zuhe in res:
             # 上一个的地名,这一个还是地名，就和上一个相加代替这个
-            if isplace:
-                name = placeList[len(placeList) - 1]
-                if zuhe[1].find("组织机构类") >= 0:  # or zuhe[1] == "ns"
-                    isplace = True
-                    new_text = zuhe[0].replace("\n", "")
-                    placeList[len(placeList) - 1] = name + new_text
-                    continue
-            if zuhe[1].find("组织机构类") >= 0:
-                isplace = True
-                new_text = zuhe[0].replace("\n", "")
-                placeList.append(new_text)
-            else:
-                isplace = False
+            for chid in zuhe:
+                if (chid[1] == "ORG"):
+                    new_text = chid[0].replace("\n", "")
+                    placeList.append(new_text)
     # 打印总份数
     yield "文档公司或组织名称检查---文档解析完成"
-    userLog.info("checkCompanyName----文档解析完成")
     placeList = list(dict.fromkeys(placeList))
+    userLog.debug(placeList)
     yield placeList
-    userLog.info(placeList)
 
-def checkCompanyName(filename,user_id):
+
+def checkCompanyName(filename, user_id, outLog):
     yield f"文档公司或组织名称检查---开始处理文档..."
     global userLog
-    userLog=outLog.get_queue(user_id, "checkCompanyName")
+    userLog = outLog.get_queue(user_id, "checkCompanyName")
     try:
         getDocxToTextAll(filename)
     except Exception as e:
         userLog.warning(e)
         userLog.warning("文档公司或组织名称检查---文档无法打开，请检查文档内容")
-        yield "文档公司或组织名称检查---文档无法打开，请检查文档内容"
+        yield "文档公司或组织名称检查---文件无法正常打开。可以尝试用WORD或WPS打开文件，进行修复并另存，用另存的文件再做一次尝试。"
         outLog.mark_done(user_id, "checkCompanyName")
         return
     with open("checkCompanyName.txt", "r", encoding='utf-8') as f:
         gettext = f.read()
     yield f"文档公司或组织名称检查---开始解析文档..."  # 每次生成一个数字就发送
-    userLog.info("checkCompanyName----开始解析文档...")
+    final_list = ""
     for item in companyNameTask(gettext):
         if isinstance(item, str):
             yield item
@@ -174,7 +182,6 @@ def checkCompanyName(filename,user_id):
         if cishu > 3:
             cishu = 0
         yield "文档公司或组织名称检查---结果生成中" + '.' * cishu
-        userLog.info(f"checkCompanyName----结果生成中" + '.' * cishu)
         cishu += 1
     data = runList[len(runList) - 1][0]["content"]
     parsed_data = json_repair.loads(data.replace('`', ''))
@@ -182,14 +189,15 @@ def checkCompanyName(filename,user_id):
 
     for place in parsed_data:
         try:
-            if place['回答'] == '非泛化的公司或组织名称':
+            if place['回答'] == '具体的公司或组织名称':
+                if (place["companyName"] == "北京国研科技咨询有限公司浙江分公司"):
+                    continue
                 error_places.append(place)
         except Exception as e:
             userLog.warning(place)
             userLog.warning(e)
             userLog.warning("文档公司或组织名称检查---组织提出出错")
             continue
-    userLog.info(error_places)
     returnInfo = "发现异常公司或组织名称<br>"
     if len(error_places) > 0:
         for t in error_places:
@@ -199,9 +207,9 @@ def checkCompanyName(filename,user_id):
             t["yuanwen"] = paragraphs[0]
             yuanwen = paragraphs[0].replace(keyword, f"**{keyword}**").replace("\n", "")
             returnInfo += "原文：" + yuanwen + "<br>异常公司或组织名称：**" + keyword + "**！请注意" + "<br>"
-            userLog.info(returnInfo)
+            userLog.info("文档公司或组织名称检查---原文：" + yuanwen + "异常公司或组织名称：" + keyword + "！请注意")
         yield returnInfo
     else:
         yield "**未发现异常公司或组织名称**<br>"
-        userLog.info("**未发现异常公司或组织名称**<br>")
-    outLog.mark_done(user_id, "checkCompanyName")
\ No newline at end of file
+        userLog.info("文档公司或组织名称检查---未发现异常公司或组织名称")
+    outLog.mark_done(user_id, "checkCompanyName")
diff --git a/checkDocumentError.py b/checkDocumentError.py
index 33d7ed4..8728136 100644
--- a/checkDocumentError.py
+++ b/checkDocumentError.py
@@ -8,7 +8,7 @@ import math
 from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
 from docx.opc.oxml import parse_xml
 import requests
-from myLogger import outLog
+# from myLogger import outLog
 import time
 def load_from_xml_v2(baseURI, rels_item_xml):
     """
@@ -27,9 +27,9 @@ def load_from_xml_v2(baseURI, rels_item_xml):
 
 
 _SerializedRelationships.load_from_xml = load_from_xml_v2
-import logging
+# import logging
 
-outLog.logger = logging.getLogger("checkDocumentError")
+# outLog.logger = logging.getLogger("checkDocumentError")
 userLog=None
 llm_cfg = {
     # 'model': 'qwen1.5-72b-chat',
@@ -40,7 +40,7 @@ llm_cfg = {
 bot = Assistant(llm=llm_cfg,
                 name='Assistant',
                 # description='使用RAG检索并回答，支持文件类型：PDF/Word/PPT/TXT/HTML。'
-
+                system_message="你是一个错别字分析大师"
                 )
 # prompt='''
 # 是否存在错别字，若存在请指出，不做其他方面的校验，你只能在[存在，不存在，未知]选项中选择答案,
@@ -48,25 +48,25 @@ bot = Assistant(llm=llm_cfg,
 # '''
 prompt = '''
 请回答以上问题，[是，否]选项中选择答案,原文内容，标点符号保持不变，如果有错请给出详细的解析，没有错则不用给解析
-回答格式请按照以下json格式[{"placeName":"序号","回答":"答案","解析","解析内容"},{"placeName":"序号","回答":"答案","解析","解析内容"}]，不做过多的解释,严格按回答格式作答;
+回答格式请按照以下json格式[{"placeName":"序号值","回答":"答案","解析","解析内容"},{"placeName":"序号值","回答":"答案","解析","解析内容"}]，不做过多的解释,严格按回答格式作答;
 '''
 
 
 def getDocxToTextAll(name):
-    userLog.info("checkDocumentError----打开文档")
     docxPath = name
     loopCount = 0
-    while True:
-        loopCount+=1
-        if(loopCount>=15):
-            raise Exception("文档读取超时，或文档存在问题无法读取")
-            break
-        try:
-            document = Document(docxPath)
-            break
-        except Exception as e:
-            time.sleep(1)
-            pass
+    document = Document(docxPath)
+    # while True:
+    #     loopCount+=1
+    #     if(loopCount>=60):
+    #         raise Exception("文档读取超时，或文档存在问题无法读取")
+    #         break
+    #     try:
+    #         document = Document(docxPath)
+    #         break
+    #     except Exception as e:
+    #         time.sleep(1)
+    #         pass
     # 逐段读取docx文档的内容
     words = []
     for paragraph in document.paragraphs:
@@ -84,23 +84,21 @@ def getDocxToTextAll(name):
         txt_file.write(text)
 
 
-def checkDocumentError(filename,user_id):
+def checkDocumentError(filename,user_id,outLog):
     global userLog
     userLog=outLog.get_queue(user_id,"checkDocumentError")
     yield f"文档纠错---开始处理文档..."
-    userLog.info("checkDocumentError----开始处理文档...")
     try:
         getDocxToTextAll(filename)
     except Exception as e:
         userLog.warning(e)
         userLog.warning("文档纠错----文档无法打开，请检查文档内容")
-        yield "文档纠错----文档无法打开，请检查文档内容"
+        yield "文档纠错----文件无法正常打开。可以尝试用WORD或WPS打开文件，进行修复并另存，用另存的文件再做一次尝试。"
         outLog.mark_done(user_id, "checkDocumentError")
         return
     with open("checkDocumentError.txt", "r", encoding='utf-8') as f:
         gettext = f.read()
     yield f"文档纠错---开始解析文档..."  # 每次生成一个数字就发送
-    userLog.info("checkDocumentError----开始解析文档...")
     final_list = []
     for item in documentErrorTask(gettext):
         if isinstance(item, str):
@@ -113,12 +111,11 @@ def checkDocumentError(filename,user_id):
             yuanwen = i["placeName"].replace("\n", "")
             jianyi = i["jianyi"].replace("\n", "")
             resInfo += "原文：" + yuanwen + "<br>建议：**" + jianyi + "**<br>"
-            userLog.info(resInfo)
         yield resInfo
 
     else:
         yield "**未发现错别字**"
-        userLog.info("未发现错别字")
+        userLog.info("文档纠错---未发现错别字")
     outLog.mark_done(user_id,"checkDocumentError")
 
 
@@ -129,27 +126,33 @@ def documentErrorTask(text):
     :param batch_size: 每批处理的字符数
     :return: 生成器，每次返回一批文本
     """
-    yield "文档纠错---启动中...."
-    userLog.info("checkDocumentError----启动中....")
+    yield "文档纠错---文档解析中...."
+    userLog.info("文档纠错---任务开始")
     batchNum = 20
     sentences = re.split(r'[。\n]', text)
     # 去掉空字符
     sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
     # 计算总字符数
     total_chars = len(sentences)
-
     # 计算有多少份
     num_chunks = math.ceil(total_chars / batchNum)
-
     # 按batchNum字为一份进行处理
     chunks = [sentences[i:i + batchNum] for i in range(0, total_chars, batchNum)]
     # 打印每一份的内容
     err = []
     for i, chunk in enumerate(chunks):
         yield f"文档纠错---文档解析进度:{i + 1}/{num_chunks}"
-        userLog.info(f"checkDocumentError----文档解析进度:{i + 1}/{num_chunks}")
         try:
-            url = "http://0.0.0.0:8190/taskflow/checkDocumentError"
+            # url = "http://0.0.0.0:8190/taskflow/checkDocumentError"
+            # headers = {"Content-Type": "application/json"}
+            # data = {
+            #     "data": {
+            #         "text": chunk,
+            #     }
+            # }
+            # r = requests.post(url=url, headers=headers, data=json.dumps(data))
+            # res = json.loads(r.text)
+            url = "http://127.0.0.1:5001/taskflow/checkDocumentError"
             headers = {"Content-Type": "application/json"}
             data = {
                 "data": {
@@ -158,12 +161,13 @@ def documentErrorTask(text):
             }
             r = requests.post(url=url, headers=headers, data=json.dumps(data))
             res = json.loads(r.text)
-            # print(res)
         except Exception as e:
             userLog.warning(chunk)
-            userLog.warning("文档纠错--错别字识别出错\n", e)
+            userLog.warning("文档纠错--错别字识别出错\n")
+            userLog.warning(e)
             continue
-        lines_with_greeting = [place for place in res["result"] if len(place['errors']) > 0]
+        lines_with_greeting = [place for place in res["data"] if len(place['errors']) > 0]
+        userLog.debug(lines_with_greeting)
         if len(lines_with_greeting) > 0:
             num = 0
             wenti = []  # 记录问题的数组
@@ -173,26 +177,28 @@ def documentErrorTask(text):
                 keyword = t['source']
                 keyword_list.append(keyword)
                 for item in t["errors"]:
-                    for key, value in item['correction'].items():
-                        temp_errorWords.append(key)
+                    # for key, value in item['correction'].items():
+                    #     temp_errorWords.append(key)
+                    temp_errorWords.append(item[0])
                 wenti.append(
-                    "序号：{}，原文：{}。问题：【{}】这些字是否为当前原文的错别字".format(num, keyword, ",".join(temp_errorWords)))
+                    # "{}：原文是{}。问题：【{}】这些字是否为当前原文的错别字".format(num, keyword, ",".join(temp_errorWords)))
+                    "{}：原文是{}。问题：当前原文是否存在错别字,只检查错被子，其他不做分析".format(num, keyword))
                 num += 1
             words = "\n".join(wenti)
+            userLog.debug(words)
             messages = [{'role': 'user', 'content': [{'text': words + prompt}]}]
             runList = []
             yield f"文档纠错---内容解析中..."  # 每次生成一个数字就发送
-            userLog.info(f"checkDocumentError----内容解析中...")
             cishu = 0
             for rsp in bot.run(messages):
                 runList.append(rsp)
                 if cishu > 3:
                     cishu = 0
                 yield "文档纠错---内容解析中" + '.' * cishu
-                userLog.info(f"checkDocumentError----内容解析中内容解析中" + '.' * cishu)
                 cishu += 1
             data = runList[len(runList) - 1][0]["content"]
             parsed_data = json_repair.loads(data.replace("\\", "").replace('`', ''))
+            userLog.debug(parsed_data)
             resListerr = []
             for place in parsed_data:
                 try:
@@ -200,14 +206,16 @@ def documentErrorTask(text):
                         place["placeName"] = keyword_list[int(place["placeName"])]
                         place["jianyi"] = place["解析"]
                         resListerr.append(place)
+                        userLog.info("文档纠错---原文：" + place["placeName"] + "<br>建议：" + place["jianyi"])
                 except Exception as e:
                     userLog.warning(parsed_data)
                     userLog.warning(place)
-                    userLog.warning("文档纠错--错别字提取出错\n", e)
+                    userLog.warning("文档纠错--错别字提取出错\n")
+                    userLog.warning(e)
                     continue
             if (len(resListerr) > 0):
                 err.extend(resListerr)
     # 打印总份数
-    yield "文档地名检查---文档解析完成"
-    userLog.info(err)
-    yield err
+    yield "文档纠错---文档解析完成"
+    userLog.info("文档纠错---任务结束")
+    yield err
\ No newline at end of file
diff --git a/checkPlaceName.py b/checkPlaceName.py
index 851827d..5c69bc3 100644
--- a/checkPlaceName.py
+++ b/checkPlaceName.py
@@ -87,7 +87,6 @@ def getDocxToTextAll(docxPath):
 #得到全文和地名有关的内容
 def placeNameTask(text):
     yield "文档地名检查---启动中...."
-    userLog.info("checkPlaceName----启动中....")
     batchNum=20
     sentences = re.split(r'[。\n]', text)
     # 去掉空字符
@@ -104,7 +103,6 @@ def placeNameTask(text):
     # 打印每一份的内容
     for i, chunk in enumerate(chunks):
         yield f"文档地名检查---文档解析进度:{i + 1}/{num_chunks}"
-        userLog.info(f"checkPlaceName----文档解析进度:{i + 1}/{num_chunks}")
         wenBen=".".join(chunk)
         try:
             url = "http://0.0.0.0:8191/taskflow/checkPlaceName"
@@ -139,7 +137,6 @@ def placeNameTask(text):
                 isplace = False
     # 打印总份数
     yield "文档地名检查---文档解析完成"
-    userLog.info("checkPlaceName---文档解析完成")
     placeList=list(dict.fromkeys(placeList))
     yield placeList
 
@@ -175,7 +172,6 @@ def checkPlaceName(filename,user_id):
         if cishu>3:
             cishu=0
         yield "文档地名检查---结果生成中"+'.'*cishu
-        userLog.info("checkPlaceName---结果生成中"+'.'*cishu)
         cishu+=1
     data = runList[len(runList) - 1][0]["content"]
     parsed_data = json_repair.loads(data.replace('`', ''))
@@ -186,12 +182,11 @@ def checkPlaceName(filename,user_id):
             if place['回答'] == '错误':
                 error_places.append(place)
         except Exception as e:
-            userLog.warning(parsed_data)
             userLog.warning(place)
+            userLog.warning(parsed_data)
             userLog.warning("文档地名检查---组织提出出错")
             userLog.warning(e)
             continue
-    userLog.info(error_places)
     returnInfo = "发现异常地名<br>"
     if len(error_places)>0:
         for t in error_places:
@@ -200,9 +195,9 @@ def checkPlaceName(filename,user_id):
             paragraphs = re.findall(r'.*?' + re.escape(keyword) + r'.*?\n', gettext)
             yuanwen= paragraphs[0].replace(keyword,f"**{keyword}**").replace("\n","")
             returnInfo+="原文：" + yuanwen + "<br>出现异常地名：**" + keyword + "**！请注意" + "<br>"
-            userLog.info(returnInfo)
+            userLog.info("文档地名检查---原文：" + yuanwen + "出现异常地名：" + keyword + "！请注意")
         yield returnInfo
     else:
         yield "**未发现发现异常地名**"
-        userLog.info("未发现发现异常地名")
-        outLog.mark_done(user_id, "checkPlaceName")
\ No newline at end of file
+        userLog.info("文档地名检查---未发现发现异常地名")
+    outLog.mark_done(user_id, "checkPlaceName")
\ No newline at end of file
diff --git a/checkRepeatText.py b/checkRepeatText.py
index c8688e7..ee5309e 100644
--- a/checkRepeatText.py
+++ b/checkRepeatText.py
@@ -7,6 +7,7 @@ from qwen_agent.agents import Assistant
 import json_repair
 import json
 embeddings = DashScopeEmbeddings(dashscope_api_key="sk-ea89cf04431645b185990b8af8c9bb13")
+# embeddings = HuggingFaceEmbeddings(model_name="shibing624/text2vec-base-chinese",model_kwargs={"device":"npu:5"})
 device_id=0
 import re
 import time
@@ -17,9 +18,9 @@ from docx.opc.oxml import parse_xml
 import logging
 import logging.config
 import requests
-from myLogger import outLog
+# from myLogger import outLog
 
-outLog.logger = logging.getLogger("checkRepeatText")
+# outLog.logger = logging.getLogger("checkRepeatText")
 userLog=None
 def load_from_xml_v2(baseURI, rels_item_xml):
     """
@@ -79,11 +80,10 @@ def isTitle(paragraph):
 
 #寻找标题名称
 def findTitleName(docxPath):
-    yield '文档相似性检查----检查是否存在详细设计方案'
     loopCount = 0
     while True:
         loopCount+=1
-        if(loopCount>=15):
+        if(loopCount>=60):
             raise Exception("文档读取超时，或文档存在问题无法读取")
             break
         try:
@@ -95,9 +95,19 @@ def findTitleName(docxPath):
     # 逐段读取docx文档的内容
     titleWords=[]
     firstTitle = 0
+    firstTitleName=""
     secondTitle = 0
     sanjiTitle = 0
+    levelText=""
+    count = 0
+    numid =0
+    wordContent={}
+    total = len(document.paragraphs)
+    addStart = False#是否重新添加
+    yield "文档相似性检查----文档内容解析中",str(count),str(total)
     for paragraph in document.paragraphs:
+        count+=1
+        yield "文档相似性检查----文档内容解析中",str(count),str(total)
         # 判断该段落的标题级别
         # 这里用isTitle()临时代表，具体见下文介绍的方法
         text = paragraph.text
@@ -109,6 +119,8 @@ def findTitleName(docxPath):
                 if(text.find("附件")>=0):
                     continue
                 titleWords.append("一级标题:".format(firstTitle)+text)
+                addStart=True
+                firstTitleName=text
             elif level=="1":
                 secondTitle+=1
                 sanjiTitle=0
@@ -118,15 +130,28 @@ def findTitleName(docxPath):
                 sanjiTitle += 1
                 # words.append("\t"+"{}.{}".format(firstTitle,secondTitle)+text)
                 # titleWords.append("第{}章的三级标题".format(firstTitle, secondTitle,firstTitle, secondTitle,sanjiTitle) + text)
+            ##先判断是不是一级标题
+            if addStart:
+                wordContent[firstTitleName]=[]
+                addStart=False
+            if level:
+                levelText=f"{int(level)+1}级标题-"+text
+            else:
+                if(text.startswith("图") or text.startswith("注：")):
+                    continue
+                if (len(text)>30 and firstTitleName):
+                    numid+=1
+                    wordContent[firstTitleName].append("{}：".format(levelText)+text)
     findTitleName_llm_cfg = {
     #'model': 'qwen1.5-72b-chat',
     'model':"qwen2-72b",
     'model_server': 'http://127.0.0.1:1025/v1',  # base_url, also known as api_base
     # 'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
     }
+    yield '文档相似性检查----检查是否存在详细设计方案'
     findTitleName_bot = Assistant(llm=findTitleName_llm_cfg,
                                     name='Assistant',
-                                    # system_message='1：这样的是一级标题。1.1：这样的是二级标题。1.1.1：这样的是三级标题'
+                                    system_message='按照要求选择最合适的，是唯一的'
                                 )
     prompt='''\n是文档的大纲，一级标题组成，哪一章存在与方案相关的内容
     类似详细设计方案,详细服务方案，详细建设方案为最相关的，优先选择
@@ -142,60 +167,78 @@ def findTitleName(docxPath):
         runList.append(rsp)
     data = runList[len(runList) - 1][0]["content"]
     parsed_data = json_repair.loads(data.replace('`', ''))
-    if(parsed_data["answer"]=="存在"):
-        yield parsed_data["name"]
-    else:
-        yield "文档相似性检查----未找到与详细设计方案相关内容，无法进行相似性比较"
+    try:
+        if(parsed_data["answer"]=="存在"):
+            yield parsed_data["name"],wordContent
+        else:
+            yield "文档相似性检查----未找到与详细设计方案相关内容，无法进行相似性比较"
+    except Exception as e:
+        userLog.warning(e)
+        userLog.warning(data)
+        userLog.warning(parsed_data)
+        yield "文档相似性检查----检查遇到问题，请联系管理员"
 #获取文档中 详细设计方案 章节的所有内容
-def getDocxToText(docxPath,titleName,vector_store_path):
-    loopCount = 0
-    while True:
-        loopCount+=1
-        if(loopCount>=15):
-            raise Exception("文档读取超时，或文档存在问题无法读取")
-            break
-        try:
-            document = Document(docxPath)
-            break
-        except Exception as e:
-            time.sleep(1)
-            pass
-    # 逐段读取docx文档的内容
-    levelList=[]
+# def getDocxToText(docxPath,titleName,vector_store_path):
+def getDocxToText(titleName,wordContent,vector_store_path):
+
+    # loopCount = 0
+    # while True:
+    #     loopCount+=1
+    #     if(loopCount>=15):
+    #         raise Exception("文档读取超时，或文档存在问题无法读取")
+    #         break
+    #     try:
+    #         document = Document(docxPath)
+    #         break
+    #     except Exception as e:
+    #         time.sleep(1)
+    #         pass
+    # # 逐段读取docx文档的内容
+    # levelList=[]
     words=[]
-    addStart = False
-    levelText=""
-    i = 0
-    for paragraph in document.paragraphs:
-        # 判断该段落的标题级别
-        # 这里用isTitle()临时代表，具体见下文介绍的方法
-        text = paragraph.text
-        if text.strip():#非空判断
-            if titleName:
-                level = isTitle(paragraph)
-                if(addStart and level=="0"):
-                    addStart=False
-                if(level=="0" and (titleName.find(text)>=0 or text.find(titleName)>=0)):
-                    addStart=True
-                if level:
-                    levelList.append("{}：".format(level)+paragraph.text)
-                    levelText=f"{int(level)+1}级标题-"+text
-                else:
-                    if addStart:
-                        if(text.startswith("图") or text.startswith("注：")):
-                            continue
-                        if(len(text)>30):
-                            i=i+1
-                            words.append("{}：".format(levelText)+text)
+    # addStart = False
+    # levelText=""
+    # i = 0
+    # count = 0
+    # total = len(document.paragraphs)
+    # yield "文档相似性检查----文档内容解析中",count,total
+    # for paragraph in document.paragraphs:
+    #     count+=1
+    #     yield "文档相似性检查----文档内容解析中",count,total
+    #     # 判断该段落的标题级别
+    #     # 这里用isTitle()临时代表，具体见下文介绍的方法
+    #     text = paragraph.text
+    #     if text.strip():#非空判断
+    #         if titleName:
+    #             level = isTitle(paragraph)
+    #             if(addStart and level=="0"):
+    #                 addStart=False
+    #             if(level=="0" and (titleName.find(text)>=0 or text.find(titleName)>=0)):
+    #                 addStart=True
+    #             if level:
+    #                 levelList.append("{}：".format(level)+paragraph.text)
+    #                 levelText=f"{int(level)+1}级标题-"+text
+    #             else:
+    #                 if addStart:
+    #                     if(text.startswith("图") or text.startswith("注：")):
+    #                         continue
+    #                     if(len(text)>30):
+    #                         i=i+1
+    #                         words.append("{}：".format(levelText)+text)
     # 将所有段落文本拼接成一个字符串，并用换行符分隔
+    # 遍历字典，查找包含 "标题的" 的键
+    for key, value in wordContent.items():
+        if (titleName.find(key)>=0 or key.find(titleName)>=0):
+            words.extend(value)  # 将对应的值添加
     if len(words)==0:
         raise Exception("checkRepeatText，获取长度为0")
     text = '\n'.join(words)
-
+    userLog.info(f"文档相似性检查----需要处理的总数是{len(words)}")
     # 将文本写入txt文件
     with open("checkRepeatText.txt", 'w', ) as txt_file:
         txt_file.write(text)
-    time.sleep(3)
+    time.sleep(1)
+    yield "文档相似性检查----文档内容转换中",".","."
     loader = TextLoader(file_path='checkRepeatText.txt')
     docs = loader.load()
     # print(docs)
@@ -204,44 +247,56 @@ def getDocxToText(docxPath,titleName,vector_store_path):
 
     splits = text_splitter.split_documents(docs)
     uuids = []
+    yield "文档相似性检查----文档保存中",".","."
+    global embeddings
+    vectorstore = Chroma(persist_directory=vector_store_path, embedding_function=embeddings)
     for i in range(len(splits)):
-        uuids.append(str(uuid.uuid4()))
+        uuidStr=str(uuid.uuid4())
+        uuids.append(uuidStr)
     logging.info(f"checkRepeatTextuuidLen{len(uuids)}")
 
-    vectorstore = Chroma(persist_directory=vector_store_path, embedding_function=embeddings)
     vectorstore.add_documents(documents=splits, ids=uuids)
+    yield "文档相似性检查----校验文档是否已经完成保存",".","."
     while True:
         time.sleep(0.3)
         ress = vectorstore.similarity_search(words[0])
         if (len(ress) > 0):
             break
-    return words,uuids,vectorstore
+    yield words,uuids,vectorstore
 
 
 # @app.route('/checkRepeatText/<filename>', methods=['GET'])
-def checkRepeatText(filename,user_id):
+def checkRepeatText(filename,user_id,outLog):
     global userLog
     userLog=outLog.get_queue(user_id,"checkRepeatText")
     yield "文档相似性检查---启动中...."
+    userLog.info("文档相似性检查---任务开始")
     vector_store_path="vector_store"+str(uuid.uuid4())
     for titleName in findTitleName(filename):
-        yield titleName
-    if(titleName!="文档相似性检查----未找到与详细设计方案相关内容，无法进行相似性比较"):
+        if(isinstance(titleName ,tuple)):
+            if(len(titleName)==3):
+                yield titleName[0]+titleName[1]+"/"+titleName[2]
+        else:
+            yield titleName
+    if(isinstance(titleName ,tuple)):
+        # try:
+        yield "文档相似性检查----文档内容转换中"
         try:
-            yield "文档相似性检查----文档内容解析中"
-            words,uuids,vectorstore=getDocxToText(filename,titleName,vector_store_path)
+            for words,uuids,vectorstore in getDocxToText(titleName[0],titleName[1],vector_store_path):
+                if isinstance(words, str):
+                    yield words+uuids+vectorstore
         except Exception as e:
-            yield f"文档相似性检查----文档内容获取失败，未找到**{titleName}**相关内容或文档打开失败"
+            yield f"文档相似性检查----文档内容获取失败，未找到**{titleName}**相关内容或文件无法正常打开。可以尝试用WORD或WPS打开文件，进行修复并另存，用另存的文件再做一次尝试。"
             userLog.warning(e)
             userLog.warning(f"文档相似性检查----文档内容获取失败，未找到**{titleName}**相关内容或文档打开失败")
             outLog.mark_done(user_id, "checkRepeatText")
             return
-    # 记录程序开始的时间戳‘
+        # 记录程序开始的时间戳‘
         reslist = []
         count = 0
         for i in words:
             count += 1
-            yield f"文档相似性检查--对{titleName}章节，进行文档内容检查中{count}/{len(words)}"
+            yield f"文档相似性检查--对{titleName[0]}章节，进行文档内容检查中{count}/{len(words)}"
             result = vectorstore.similarity_search(i)
             textTag = i.split("：")[0]
             for content in result:
@@ -259,6 +314,7 @@ def checkRepeatText(filename,user_id):
                     }
                     r = requests.post(url=url, headers=headers, data=json.dumps(data))
                     res = json.loads(r.text)
+                    res=res["data"]
                     # res = similarity([[i[i.find('：') + 1:], text[text.find('：') + 1:]]])
                 except Exception as e:
                     userLog.warning("文档相似性检查--发生异常:")
@@ -266,7 +322,7 @@ def checkRepeatText(filename,user_id):
                     userLog.warning(i)
                     userLog.warning(text)
                     continue
-                if (res["result"][0]["similarity"] > 0.90):
+                if (res[0]["similarity"] >= 0.96):
                     # 判断重复内容是否被放入
                     if (len(reslist) > 0):
                         isExist = False
@@ -276,15 +332,15 @@ def checkRepeatText(filename,user_id):
                                 break
                         if not isExist:
                             # reslist.append({"yuanwen1":i[i.find('：') + 1:],"yuanwen2":text[text.find('：') + 1:],"similarity":res[0]["similarity"]})
-                            userLog.info("【在"+i[:i.find('：')].replace("\n","")+"下包含："+i[i.find('：') + 1:].replace("\n","")+"<br>在"+text[:text.find('：')].replace("\n","")+"**下包含："+text[text.find('：') + 1:].replace("\n","")+"<br>以上两段内容相似度："+'{:.2f}'.format(res["result"][0]["similarity"])+"】")
-                            reslist.append({"yuanwen1":i.replace("\n",""),"yuanwen2":text.replace("\n",""),"similarity":res["result"][0]["similarity"]})
+                            userLog.info("【在"+i[:i.find('：')].replace("\n","")+"下包含："+i[i.find('：') + 1:].replace("\n","")+"<br>在"+text[:text.find('：')].replace("\n","")+"**下包含："+text[text.find('：') + 1:].replace("\n","")+"<br>以上两段内容相似度："+'{:.2f}'.format(res[0]["similarity"])+"】")
+                            reslist.append({"yuanwen1":i.replace("\n",""),"yuanwen2":text.replace("\n",""),"similarity":res[0]["similarity"]})
                     else:
-                        reslist.append({"yuanwen1":i.replace("\n",""),"yuanwen2":text.replace("\n",""),"similarity":res["result"][0]["similarity"]})
+                        reslist.append({"yuanwen1":i.replace("\n",""),"yuanwen2":text.replace("\n",""),"similarity":res[0]["similarity"]})
                         # print(i.split("：")[1] + "\n" + text.split("：")[1])
-                        userLog.info("【在"+i[:i.find('：')].replace("\n","")+"下包含："+i[i.find('：') + 1:].replace("\n","")+"<br>在"+text[:text.find('：')].replace("\n","")+"**下包含："+text[text.find('：') + 1:].replace("\n","")+"<br>以上两段内容相似度："+'{:.2f}'.format(res["result"][0]["similarity"])+"】")
+                        userLog.info("【在"+i[:i.find('：')].replace("\n","")+"下包含："+i[i.find('：') + 1:].replace("\n","")+"<br>在"+text[:text.find('：')].replace("\n","")+"**下包含："+text[text.find('：') + 1:].replace("\n","")+"<br>以上两段内容相似度："+'{:.2f}'.format(res[0]["similarity"])+"】")
         # vectorstore.delete(ids=uuids)
         shutil.rmtree(vector_store_path)
-        resInfo=f"对{titleName}章节，发现相似内容：<br>"
+        resInfo=f"对{titleName[0]}章节，发现相似内容：<br>"
         if(len(reslist)>0):
             for res in reslist:
                 resInfo+="【在**"+res["yuanwen1"][:res["yuanwen1"].find('：')]+"**下包含："+res["yuanwen1"][res["yuanwen1"].find('：') + 1:]+"<br>在**"+res["yuanwen2"][:res["yuanwen2"].find('：')]+"**下包含："+res["yuanwen2"][res["yuanwen2"].find('：') + 1:]+"<br>以上两段内容***相似度***："+'{:.2f}'.format(res['similarity'])+"】<br>"
diff --git a/checkTitleName.py b/checkTitleName.py
index 7a0c25b..d2eee5f 100644
--- a/checkTitleName.py
+++ b/checkTitleName.py
@@ -8,7 +8,9 @@ import json_repair
 import math
 from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
 from docx.opc.oxml import parse_xml
-from myLogger import outLog
+
+
+# from myLogger import outLog
 
 def load_from_xml_v2(baseURI, rels_item_xml):
     """
@@ -29,11 +31,11 @@ def load_from_xml_v2(baseURI, rels_item_xml):
 _SerializedRelationships.load_from_xml = load_from_xml_v2
 import logging
 
-outLog.logger = logging.getLogger("checkTitleName")
-userLog=None
+# outLog.logger = logging.getLogger("checkTitleName")
+userLog = None
 llm_cfg = {
-    #'model': 'qwen1.5-72b-chat',
-    'model':"qwen2-72b-instruct",
+    # 'model': 'qwen1.5-72b-chat',
+    'model': "qwen2-72b-instruct",
     'model_server': 'DashScope',  # base_url, also known as api_base
     'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
 }
@@ -81,12 +83,13 @@ def isTitle(paragraph):
     # 如果在段落、样式里都没有找到大纲级别，返回None
     return None
 
-#获取文档中 详细设计方案 章节的所有内容
+
+# 获取文档中 详细设计方案 章节的所有内容
 def getDocxToTitleName(docxPath):
     loopCount = 0
     while True:
-        loopCount+=1
-        if(loopCount>=15):
+        loopCount += 1
+        if (loopCount >= 60):
             raise Exception("文档读取超时，或文档存在问题无法读取")
             break
         try:
@@ -96,64 +99,72 @@ def getDocxToTitleName(docxPath):
             time.sleep(1)
             pass
     # 逐段读取docx文档的内容
-    levelList=[]
-    words=[]
+    levelList = []
+    words = []
     addStart = False
-    levelText=""
-    i = 0
+    levelText = ""
+    count = 0
+    total = len(document.paragraphs)
+    yield f"文档结构检查----文档内容解析中{str(count)}/{str(total)}"
     for paragraph in document.paragraphs:
+        count += 1
+        yield f"文档结构检查----文档内容解析中{str(count)}/{str(total)}"
         # 判断该段落的标题级别
         # 这里用isTitle()临时代表，具体见下文介绍的方法
         text = paragraph.text
-        if text.strip():#非空判断
+        if text.strip():  # 非空判断
             level = isTitle(paragraph)
-            if level=="0":
+            if level == "0":
                 words.append(text)
-    return words
+    yield words
 
-def checkTitleName(filename,user_id):
+
+def checkTitleName(filename, user_id, outLog):
     global userLog
-    userLog=outLog.get_queue(user_id,"checkTitleName")
+    userLog = outLog.get_queue(user_id, "checkTitleName")
     yield '文档结构检查----启动中'
-    userLog.info("checkTitleName----启动中")
-    with open("ce模板.txt", "r",encoding='utf-8') as f:
+    userLog.info("文档结构检查---任务开始")
+    with open("ce模板.txt", "r", encoding='utf-8') as f:
         gettext = f.readlines()
-    count=0
+    count = 0
     reserr = []
     try:
-        word = getDocxToTitleName(filename)
+        for i in getDocxToTitleName(filename):
+            word = i
+            if (isinstance(word, str)):
+                yield word
+                continue
     except Exception as e:
         userLog.warning(e)
-        yield "文档结构检查----文档无法打开，请检查文档内容"
-        outLog.mark_done(user_id, "checkTitleName")
+        yield "文档结构检查----文件无法正常打开。可以尝试用WORD或WPS打开文件，进行修复并另存，用另存的文件再做一次尝试。"
         userLog.warning("checkTitleName----文档无法打开，请检查文档内容")
+        outLog.mark_done(user_id, "checkTitleName")
         return
     for text in gettext:
-        count+=1
+        count += 1
         prompt = f'''
         \n 这些是文章的标题，请问【{text}】在标题中是否可以配对的，若有请指出是哪个标题，若没有请回到不存在
         '''
-        xushang="回答格式{‘name’:‘名称’,'answer'：‘回答’，“标题”：“标题”}请严格按照格式回答问题，不要做过多我解释"
+        xushang = "回答格式{‘name’:‘名称’,'answer'：‘回答’，“标题”：“标题”}请严格按照格式回答问题，不要做过多我解释"
         yield f"文档结构检查----结构分析中{count}/{len(gettext)}"
-        userLog.info(f"checkTitleName----结构分析中{count}/{len(gettext)}")
-        strword = "\n".join(word)+prompt+xushang
-        messages = [{'role': 'user', 'content': [{'text':strword}]}]
+        strword = "\n".join(word) + prompt + xushang
+        messages = [{'role': 'user', 'content': [{'text': strword}]}]
         runList = []
         for rsp in bot.run(messages):
             runList.append(rsp)
             # print(rsp)
         data = runList[len(runList) - 1][0]["content"]
         parsed_data = json_repair.loads(data.replace('`', ''))
-        if(parsed_data["answer"]=="不存在"):
+        if (parsed_data["answer"] == "不存在"):
             reserr.append(text)
-
-    resInfo="文档结构存在异常：<br>"
-    if(len(reserr)>0):
+            userLog.info("文档结构检查----文档结构存在异常：" + text.replace('\n', ''))
+    resInfo = "文档结构存在异常：<br>"
+    if (len(reserr) > 0):
         for i in reserr:
-            resInfo+="**"+i.replace('\n','')+"**<br>"
-            userLog.info(resInfo)
+            resInfo += "**" + i.replace('\n', '') + "**<br>"
+
         yield resInfo
     else:
-        yield "文档结构未发现异常"
-        userLog.info("文档结构未发现异常")
-        outLog.mark_done(user_id, "checkTitleName")
+        yield "**文档结构未发现异常**"
+        userLog.info("文档结构检查----文档结构未发现异常")
+    outLog.mark_done(user_id, "checkTitleName")
diff --git a/daijian方案.py b/daijian方案.py
index 19badae..5210e54 100644
--- a/daijian方案.py
+++ b/daijian方案.py
@@ -1,11 +1,24 @@
-from docx import Document
-from pprint import pprint
+import uuid
+from langchain_community.embeddings import DashScopeEmbeddings
+from langchain_community.document_loaders import TextLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
 from qwen_agent.agents import Assistant
-import re
 import json_repair
-import math
+import json
+embeddings = DashScopeEmbeddings(dashscope_api_key="sk-ea89cf04431645b185990b8af8c9bb13")
+device_id=0
+import re
+import time
+from docx import Document
+import shutil
 from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
 from docx.opc.oxml import parse_xml
+import logging
+import logging.config
+import requests
+from collections import defaultdict
+
+userLog=None
 def load_from_xml_v2(baseURI, rels_item_xml):
     """
     Return |_SerializedRelationships| instance loaded with the
@@ -23,17 +36,6 @@ def load_from_xml_v2(baseURI, rels_item_xml):
 
 
 _SerializedRelationships.load_from_xml = load_from_xml_v2
-llm_cfg = {
-    #'model': 'qwen1.5-72b-chat',
-    'model':"qwen2-72b-instruct",
-    'model_server': 'DashScope',  # base_url, also known as api_base
-    'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
-}
-bot = Assistant(llm=llm_cfg,
-                name='Assistant',
-                )
-
-
 # 记录程序开始的时间戳
 def getOutlineLevel(inputXml):
     """
@@ -73,15 +75,26 @@ def isTitle(paragraph):
     # 如果在段落、样式里都没有找到大纲级别，返回None
     return None
 
-#获取文档中 详细设计方案 章节的所有内容
-def getDocxToTitleName(docxPath):
-    document = Document(docxPath)
+#寻找标题名称
+def findTitleName(docxPath):
+    yield '文档相似性检查----检查是否存在详细设计方案'
+    loopCount = 0
+    while True:
+        loopCount+=1
+        if(loopCount>=15):
+            raise Exception("文档读取超时，或文档存在问题无法读取")
+            break
+        try:
+            document = Document(docxPath)
+            break
+        except Exception as e:
+            time.sleep(1)
+            pass
     # 逐段读取docx文档的内容
-    levelList=[]
-    words=[]
-    addStart = False
-    levelText=""
-    i = 0
+    titleWords=[]
+    firstTitle = 0
+    secondTitle = 0
+    sanjiTitle = 0
     for paragraph in document.paragraphs:
         # 判断该段落的标题级别
         # 这里用isTitle()临时代表，具体见下文介绍的方法
@@ -89,88 +102,360 @@ def getDocxToTitleName(docxPath):
         if text.strip():#非空判断
             level = isTitle(paragraph)
             if level=="0":
-                words.append(text)
-    return words
-
-def checkTitleName(filename):
-    prompt = f'''
-            \n 这些是文章的标题，请问【{text}】在标题中是否可以配对的，若有请指出是哪个标题，若没有请回到不存在
-            '''
-    xushang = "回答格式{‘name’:‘名称’,'answer'：‘回答’，“标题”：“标题”}请严格按照格式回答问题，不要做过多我解释"
-    yield f"文档结构检查----结构分析中{count}/{len(gettext)}"
-    strword = "\n".join(word) + prompt + xushang
-    # print(strword)
-    messages = [{'role': 'user', 'content': [{'text': strword}]}]
-    runList = []
-    cishu = 0
-    for rsp in bot.run(messages):
+                firstTitle+=1
+                secondTitle = 0
+                if(text.find("附件")>=0):
+                    continue
+                titleWords.append("一级标题:".format(firstTitle)+text)
+            elif level=="1":
+                secondTitle+=1
+                sanjiTitle=0
+                # words.append("\t"+"{}.{}".format(firstTitle,secondTitle)+text)
+                # titleWords.append("第{}章的二级标题:".format(firstTitle,firstTitle,secondTitle)+text)
+            elif level=="2":
+                sanjiTitle += 1
+                # words.append("\t"+"{}.{}".format(firstTitle,secondTitle)+text)
+                # titleWords.append("第{}章的三级标题".format(firstTitle, secondTitle,firstTitle, secondTitle,sanjiTitle) + text)
+    findTitleName_llm_cfg = {
+    #'model': 'qwen1.5-72b-chat',
+    'model':"qwen2-72b",
+    'model_server': 'http://127.0.0.1:1025/v1',  # base_url, also known as api_base
+    # 'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+    }
+    findTitleName_bot = Assistant(llm=findTitleName_llm_cfg,
+                                    name='Assistant',
+                                    # system_message='1：这样的是一级标题。1.1：这样的是二级标题。1.1.1：这样的是三级标题'
+                                )
+    prompt='''\n是文档的大纲，一级标题组成，哪一章存在与方案相关的内容
+    类似详细设计方案,详细服务方案，详细建设方案为最相关的，优先选择
+    类似设计方案，服务方案，建设方案为次相关，次级选择
+    类似方案是最后选择
+    按照这样的顺序选择最合适的
+    你只能从这两个答案中选择一个：{"name":"一级标题名称","answer":"存在"}或{"name":"","answer":"不存在"}，不做过多的解释,严格按回答格式作答
+    '''
+    # print("\n".join(titleWords)+prompt)
+    messages = [({'role': 'user', 'content': "\n".join(titleWords)+prompt})]
+    runList=[]
+    for rsp in findTitleName_bot.run(messages):
         runList.append(rsp)
-        # print(rsp)
     data = runList[len(runList) - 1][0]["content"]
     parsed_data = json_repair.loads(data.replace('`', ''))
-    print(parsed_data)
-    # yield '文档结构检查----启动中'
-    # with open("ce模板.txt", "r",encoding='utf-8') as f:
-    #     gettext = f.readlines()
-    # count=0
-    # reserr = []
-    # try:
-    #     word = getDocxToTitleName(filename)
-    # except Exception as e:
-    #     print(e)
-    #     yield "文档无法打开，请检查文档内容"
-    #     return
-    # for text in gettext:
-    #     count+=1
-    #     prompt = f'''
-    #     \n 这些是文章的标题，请问【{text}】在标题中是否可以配对的，若有请指出是哪个标题，若没有请回到不存在
-    #     '''
-    #     xushang="回答格式{‘name’:‘名称’,'answer'：‘回答’，“标题”：“标题”}请严格按照格式回答问题，不要做过多我解释"
-    #     yield f"文档结构检查----结构分析中{count}/{len(gettext)}"
-    #     strword = "\n".join(word)+prompt+xushang
-    #     # print(strword)
-    #     messages = [{'role': 'user', 'content': [{'text':strword}]}]
-    #     runList = []
-    #     cishu = 0
-    #     for rsp in bot.run(messages):
-    #         runList.append(rsp)
-    #         # print(rsp)
-    #     data = runList[len(runList) - 1][0]["content"]
-    #     parsed_data = json_repair.loads(data.replace('`', ''))
-    #     print(parsed_data)
-    #     if(parsed_data["answer"]=="不存在"):
-    #         reserr.append(text)
-    # resInfo="文档结构存在异常：<br>"
-    # if(len(reserr)>0):
-    #     for i in reserr:
-    #         resInfo+=f"**{i}**<br>"
-    #     yield resInfo
-    # else:
-    #     yield "文档结构未发现异常"
+    if(parsed_data["answer"]=="存在"):
+        yield parsed_data["name"]
+    else:
+        yield "文档相似性检查----未找到与详细设计方案相关内容，无法进行相似性比较"
 
+def merge_chapters(words):
+    merged_text = {}
+    for line in words:
+        if "：" in line:
+            key, value = line.split("：", 1)  # 根据第一个冒号分割
+            if key in merged_text:
+                merged_text[key].append(value.strip())  # 添加到列表
+            else:
+                merged_text[key] = [value.strip()]  # 初始化列表
+        else:
+            logging.warning(f"Skipping line without key-value pair: {line}")
 
-import logging
+    # 合并结果格式化为列表输出
+    merged_words = []
+    for key, values in merged_text.items():
+        combined_value = "，".join(values)  # 将内容合并
+        merged_words.append(f"{key}：{combined_value}")
+    return merged_words
+#获取文档中 详细设计方案 章节的所有内容
+def getDocxToText(docxPath, titleName, vector_store_path):
+    loopCount = 0
+    while True:
+        loopCount += 1
+        if loopCount >= 15:
+            raise Exception("文档读取超时，或文档存在问题无法读取")
+            break
+        try:
+            document = Document(docxPath)
+            break
+        except Exception as e:
+            time.sleep(1)
+            pass
+
+    # 逐段读取docx文档的内容
+    levelList = []
+    words = []
+    addStart = False
+    title_counter = []  # 用于存储当前标题的计数
+    title_texts = []    # 用于存储当前各级标题的文本
+    i = 0
+
+    for paragraph in document.paragraphs:
+        text = paragraph.text.strip()
+        if text:  # 非空判断
+            level = isTitle(paragraph)  # 确保这个函数在代码中定义
+
+            # 当前标题的层级
+            current_level = int(level) if level is not None else -1
+
+            if current_level >= 0:  # 标题段落
+                # 确保标题计数器足够长
+                while len(title_counter) <= current_level:
+                    title_counter.append(0)  # 初始化新级别的标题计数
+                    title_texts.append('')   # 初始化对应的标题文本
+
+                # 更新当前级别及以下的标题计数和标题文本
+                title_counter[current_level] += 1  # 当前级别计数加1
+                title_counter = title_counter[:current_level+1]
+                title_texts[current_level] = text  # 保存当前级别的标题文本
+                title_texts = title_texts[:current_level+1]
+
+                # 重置更低级别的计数和标题文本
+                for idx in range(current_level + 1, len(title_counter)):
+                    title_counter[idx] = 0
+                    title_texts[idx] = ''
+
+                # 检查是否与 titleName 匹配
+                if current_level == 0:
+                    addStart = titleName in text  # 检查是否与 titleName 匹配
+
+            else:  # 非标题段落
+                if addStart:
+                    if len(text) > 30:  # 仅记录长度大于30的内容
+                        i += 1
+                        # 获取当前完整的标题编号和标题名称
+                        levelText = ".".join(map(str, title_counter))
+                        # 使用非空的标题名称
+                        current_title = title_texts[-1] if title_texts else ''
+                        words.append(f"{levelText}-{current_title}：{text}")
+
+    if len(words) == 0:
+        raise Exception("checkRepeatText，获取长度为0")
+
+    # 使用封装的合并函数
+    merged_words = merge_chapters(words)
+
+    # 将合并后的内容写入 txt 文件
+    with open("checkRepeatText.txt", 'w') as txt_file:
+        for line in merged_words:
+            txt_file.write(f"{line}\n")
+
+    time.sleep(3)
+
+    # 加载文本
+    loader = TextLoader(file_path='checkRepeatText.txt')
+    docs = loader.load()
+
+    # 创建唯一标识符
+    uuids = []
+    for _ in range(len(merged_words)):
+        uuids.append(str(uuid.uuid4()))
+    logging.info(f"checkRepeatTextuuidLen{len(uuids)}")
+
+    return merged_words, uuids
+
+
+# @app.route('/checkRepeatText/<filename>', methods=['GET'])
+def checkRepeatText(filename):
+    yield "文档相似性检查---启动中...."
+    vector_store_path="vector_store"+str(uuid.uuid4())
+    for titleName in findTitleName(filename):
+        yield titleName
+    if(titleName!="文档相似性检查----未找到与详细设计方案相关内容，无法进行相似性比较"):
+        yield "文档相似性检查----文档内容解析中"
+        words,uuids=getDocxToText(filename,titleName,vector_store_path)
+    # 记录程序开始的时间戳‘
+        reslist = []
+        count = 0
+        standard = {
+            "清晰性": """对软件功能描述的完整性主要体现在以下两个方面：
+                        a. 功能描述是否简洁明了，避免使用过于复杂或专业的术语，使得用户能够轻松理解。
+                        b. 是否明确指出了功能的具体作用，没有模糊不清或含糊其辞的表述。
+                        如果要将软件功能描述的清晰性划分为优秀、良好、一般、差四个从高到低的等级，每个等级的评判标准是什么？
+                        将软件功能描述的清晰性划分为优秀、良好、一般、差四个等级时，每个等级的评判标准可以如下定义：
+                        优秀（90~100分）
+                        简洁明了：功能描述极其精炼，没有多余的词汇，每个字都承载着必要的信息。
+                        通俗易懂：完全避免了专业术语或行业黑话，即使是非专业用户也能轻松理解。
+                        具体明确：功能的作用、范围、限制以及用户期望的结果都被清晰、准确地阐述，没有任何模糊或含糊的表述。
+                        良好（70分~90分，不包含90分）
+                        较为简洁：功能描述相对简短，但可能包含一些必要的细节或背景信息。
+                        易于理解：大部分术语都是通俗易懂的，对于少数专业术语，提供了简短的解释或上下文。
+                        明确具体：功能的主要作用、范围和用户期望的结果都被明确阐述，但可能在某些细节上稍显模糊。
+                        一般（60~70分，不包含70分）
+                        稍显冗长：功能描述可能包含一些不必要的细节或重复信息，导致用户需要花费更多时间来理解。
+                        有一定难度：使用了一些专业术语或行业黑话，但没有提供足够的解释或上下文，导致非专业用户可能难以理解。
+                        基本明确：功能的主要作用被阐述，但在范围、限制或用户期望的结果上可能存在一些模糊或含糊的表述。
+                        差（60分以下，不包含60分）
+                        冗长复杂：功能描述过于详细和复杂，包含大量不必要的细节和背景信息，导致用户难以抓住重点。
+                        难以理解：大量使用专业术语或行业黑话，且没有提供任何解释或上下文，使得大部分用户都难以理解。
+                        模糊不清：功能的作用、范围、限制以及用户期望的结果都没有被明确阐述，存在大量的模糊和含糊表述。
+                        评估的提示词举例：
+                        根据这些评判标准，对下面的软件功能描述的清晰性进行客观的评价，给出优秀、良好、一般、差四个等级之一的评价，并给出具体得分。并在此基础上润色和完善，使之达到优秀的等级。
+                        """,
+            "完整性": """对软件功能描述的完整性主要体现在以下两个方面：
+                        a. 是否涵盖了功能的所有重要方面，包括输入、输出、处理过程等。
+                        b. 是否提供了足够的信息，以便用户能够全面了解功能的工作原理和用途。
+                        如果要将软件功能描述的完整性划分为优秀、良好、一般、差四个从高到低的等级，每个等级的评判标准是什么？
+                        将软件功能描述的完整性划分为优秀、良好、一般、差四个等级时，每个等级的评判标准可以如下定义：
+                        优秀：（90~100分）
+                        描述全面涵盖了功能的所有重要方面，包括但不限于输入、输出、处理过程、异常处理等。
+                        提供了详尽的信息，用户能够清晰地了解功能的工作原理、用途以及在不同场景下的表现。
+                        包含了必要的示例、图表或流程图，以直观展示功能的工作流程和效果。
+                        没有遗漏任何对用户理解和使用功能至关重要的信息。
+                        良好：（70分~90分，不包含90分）
+                        描述基本涵盖了功能的主要方面，但可能有个别不太重要的细节未提及。
+                        提供了足够的信息，用户能够较好地理解功能的工作原理和用途，但在某些复杂场景下可能需要额外说明。
+                        可能包含一些示例或图表，但可能不如优秀等级那么全面或详细。
+                        一般：（60~70分，不包含70分）
+                        描述涵盖了功能的一部分重要方面，但存在较明显的遗漏或不足。
+                        提供的信息有限，用户可能只能对功能有一个大致的了解，无法深入了解其工作原理和详细用途。
+                        可能缺乏示例、图表或流程图等辅助材料，导致用户难以理解功能的某些复杂部分。
+                        差：（60分以下，不包含60分）
+                        描述严重缺失，未涵盖功能的关键方面，甚至可能误导用户。
+                        提供的信息极少，用户无法全面了解功能的工作原理和用途。
+                        可能存在错误或矛盾的信息，导致用户无法准确理解功能。
+                        根据这些评判标准，对下面的软件功能描述的完整性进行客观的评价，给出优秀、良好、一般、差四个等级之一的评价。并在此基础上润色和完善，使之达到优秀的等级。
+                        """,
+            "可测试性": """软件功能描述的可测试性主要体现为以下方面：
+                        a. 功能描述是否具体、明确，以便能够进行功能测试和验证。
+                        b. 是否提供了足够的细节，以便开发人员和测试人员能够准确理解和实现功能。
+                        如果要将软件功能描述的可测试性划分为优秀、良好、一般、差四个从高到低的等级，每个等级的评判标准是什么？
+                        将软件功能描述的可测试性划分为优秀、良好、一般、差四个等级时，每个等级的评判标准可以如下定义：
+                        优秀：（90~100分）
+                        功能描述非常具体和明确，能够直接转化为测试用例。
+                        提供了详尽的细节，包括输入、输出、边界条件、异常处理等。
+                        开发人员和测试人员能够轻松理解和实现功能，无需额外澄清或假设。
+                        功能描述中包含了预期的行为和非预期的行为，有助于全面覆盖测试场景。
+                        良好：（70分~90分，不包含90分）
+                        功能描述相对具体和明确，大部分内容可以直接用于测试。
+                        提供了足够的细节，但可能需要一些额外的解释或澄清才能完全理解。
+                        开发人员和测试人员能够基于描述实现和测试功能，但可能需要一些额外的沟通和协调。
+                        功能描述中基本涵盖了主要的行为和边界条件，但可能缺少对某些异常情况的详细描述。
+                        一般：（60~70分，不包含70分）
+                        功能描述较为笼统，需要较多的解释和澄清才能用于测试和开发。
+                        细节不够充分，可能导致开发人员和测试人员在实现和测试过程中产生误解或遗漏。
+                        需要较多的沟通和协调来确保功能的正确实现和测试。
+                        功能描述中可能只涵盖了主要的行为，对边界条件和异常情况的描述较为模糊或缺失。
+                        差：（60分以下，不包含60分）
+                        功能描述非常模糊和笼统，无法直接用于测试和开发。
+                        缺乏必要的细节，导致开发人员和测试人员无法准确理解和实现功能。
+                        需要大量的沟通和协调，甚至可能需要重新编写功能描述才能进行有效的测试和开发。
+                        功能描述中可能只提到了大致的目标或意图，没有具体的行为描述、边界条件或异常处理。
+                        根据这些评判标准，对下面的软件功能描述的可测试性进行客观的评价，给出优秀、良好、一般、差四个等级之一的评价。并在此基础上润色和完善，使之达到优秀的等级。
+                        """,
+            "详细性": """软件功能详细性主要体现在：
+                        a. 功能描述是否详细，可以根据功能描述进行功能点评价，计算出ILF、EIF、EI、EO、EQ的数量；
+                        如果要将软件功能描述的详细性划分为优秀、良好、一般、差四个从高到低的等级，每个等级的评判标准是什么？
+                        将软件功能描述的详细性划分为优秀、良好、一般、差四个等级时，每个等级的评判标准可以如下定义：
+                        优秀：（90~100分）
+                        功能描述非常详尽，包含了所有必要的信息，使得评估者能够轻松地根据描述进行功能点评价。
+                        ILF、EIF、EI、EO、EQ的数量可以明确且无误地计算出来，没有遗漏或模糊之处。
+                        描述中不仅包含了功能的正常操作，还涵盖了异常处理、边界条件等特殊情况。
+                        使用了具体的例子、流程图或伪代码来进一步阐明功能。
+                        良好：（70分~90分，不包含90分）
+                        功能描述相对详细，提供了足够的信息来进行功能点评价。
+                        ILF、EIF、EI、EO、EQ的数量可以大致计算出来，但可能需要一些额外的解释或澄清。
+                        描述中基本涵盖了功能的各个方面，但对某些细节或特殊情况可能描述不够充分。
+                        整体而言，描述是清晰和准确的，但还有改进的空间。
+                        一般：（60~70分，不包含70分）
+                        功能描述较为笼统，缺乏具体的细节。
+                        ILF、EIF、EI、EO、EQ的数量计算可能存在一定的困难或不确定性，需要较多的假设或推测。
+                        描述中只涵盖了功能的主要方面，对细节和特殊情况的处理描述不足。
+                        可能需要额外的沟通或澄清才能准确理解功能需求。
+                        差：（60分以下，不包含60分）
+                        功能描述非常模糊，缺乏必要的信息和细节。
+                        无法根据描述进行准确的功能点评价，ILF、EIF、EI、EO、EQ的数量无法确定。
+                        描述中可能只提到了功能的大致目标或意图，没有具体的实现细节或操作步骤。
+                        需要大量的额外信息或澄清才能理解功能需求，甚至可能需要重新编写功能描述。
+                        根据这些评判标准，对下面的软件功能描述的详细性进行客观的评价，给出优秀、良好、一般、差四个等级之一的评价。并在此基础上润色和完善，使之达到优秀的等级。
+                        """,
+        }
+        weight = {
+            "清晰性" : 0.4,
+            "完整性" : 0.3,
+            "可测试性" : 0.2,
+            "详细性" : 0.1,
+
+        }
+
+        findTitleName_llm_cfg = {
+            'model': "qwen2-72b",
+            'model_server': 'http://127.0.0.1:1025/v1',
+        }
+        findTitleName_bot = Assistant(llm=findTitleName_llm_cfg, name='Assistant')
+        for i in words:
+            count += 1
+            yield f"文档相似性检查--对{titleName}章节，进行文档内容检查中{count}/{len(words)}"
+            chapter, rest = i.split('-', 1)
+            title, text = rest.split('：', 1)
+
+            # 生成字典
+            example = {
+                "chapter": chapter.strip(),
+                "title": title.strip(),
+                "text": text.strip()
+            }
+            result = {
+                "title": title.strip(),
+                "text": text.strip()
+            }
+            # 循环提取键和值
+            weighted_score = 0
+            for key, value in standard.items():
+                prompt_score = f"""对软件功能{key}的定义：
+                                    {value}
+                                    模块名称：【{example['title']}】
+                                    模块描述：【{example['text']}】
+                                    回答格式为：{{"模块名称"："{example['text']}",
+                                                "等级":"优秀/良好/一般/差",
+                                                "得分":"0~100",
+                                                "理由及扣分原因":"理由及扣分原因",
+                                                }}，不做过多的解释,严格按回答格式作答,只给出一个回答。
+                                    """
+
+                messages = [({'role': 'user', 'content': prompt_score})]
+                runList = []
+                for rsp in findTitleName_bot.run(messages):
+                    runList.append(rsp)
+                data = runList[len(runList) - 1][0]["content"]
+                parsed_data = json_repair.loads(data.replace('`', ''))
+                if isinstance(parsed_data, list):  # 检查parsed_data是否为列表
+                    parsed_data = parsed_data[0]  # 取第一个元素
+                else:
+                    parsed_data = parsed_data
+                result[f"{key}等级"] = parsed_data['等级']
+                result[f"{key}得分"] = parsed_data['得分']
+                score = int(parsed_data['得分'])  # 假设 '得分' 是字符串，需要转换为整数
+                key_weight = weight.get(key, 0)  # 根据键获取权重，如果没有匹配的权重，默认为 0
+                # 计算加权得分并累加
+                weighted_score += score * key_weight
+            result["加权得分"] = round(weighted_score, 2)  # 保留两位小数
+            answer = f"{example['text']}"
+            for key, value in standard.items():
+                prompt_answer = f"""对软件功能{key}的定义：\n 
+                            {value}\n
+                            模块名称：【{example['title']}】\n
+                            模块描述：f【{answer}】\n
+                            回答格式为：{{"模块名称"："{example['text']}",
+                                        "改进后的描述":"改进后的描述",
+                                        }}，不做过多的解释,严格按回答格式作答。
+                            """
+                messages = [({'role': 'user', 'content': prompt_answer})]
+                runList = []
+                for rsp in findTitleName_bot.run(messages):
+                    runList.append(rsp)
+                data = runList[len(runList) - 1][0]["content"]
+                parsed_data = json_repair.loads(data.replace('`', ''))
+                answer = parsed_data['改进后的描述']
+            result["改进后的描述"] = answer
+            textTag = i.split("：")[0]
+            breakpoint()
+        # vectorstore.delete(ids=uuids)
+        shutil.rmtree(vector_store_path)
+        resInfo=f"对{titleName}章节，发现相似内容：<br>"
+        if(len(reslist)>0):
+            for res in reslist:
+                resInfo+="【在**"+res["yuanwen1"][:res["yuanwen1"].find('：')]+"**下包含："+res["yuanwen1"][res["yuanwen1"].find('：') + 1:]+"<br>在**"+res["yuanwen2"][:res["yuanwen2"].find('：')]+"**下包含："+res["yuanwen2"][res["yuanwen2"].find('：') + 1:]+"<br>以上两段内容***相似度***："+'{:.2f}'.format(res['similarity'])+"】<br>"
+            yield resInfo
+        else:
+            yield "**未发现相似内容**"
+            userLog.info("文档相似性检查----未发现相似内容**")
 
-# 创建一个记录器
-logger = logging.getLogger('my_logger')
-logger.setLevel(logging.DEBUG)
-
-# 创建一个处理器
-ch = logging.StreamHandler()
-ch.setLevel(logging.DEBUG)
-
-# 创建一个格式化器并将其添加到处理器中
-formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-ch.setFormatter(formatter)
-
-# 将处理器添加到记录器中
-logger.addHandler(ch)
-try:
-# 记录一些日志消息
-    logger.debug('这是一个调试消息')
-    logger.info('这是一个信息消息')
-    logger.warning('这是一个警告消息')
-    logger.error('这是一个错误消息')
-    logger.critical('这是一个致命错误消息')
-except Exception as e:
-    logger.warning(e)
\ No newline at end of file
+for i  in checkRepeatText("./北仑区综合行政执法局协同监管系统项目建设方案_20240824.docx"):
+ print(i)
diff --git a/main.py b/main.py
index 8e89845..9a11197 100644
--- a/main.py
+++ b/main.py
@@ -1,206 +1,286 @@
-from flask import Flask, request, jsonify, Response
+# from flask import Flask, request, jsonify, Response
 import os
 from checkPlaceName import checkPlaceName
 from checkRepeatText import checkRepeatText
 from checkCompanyName import checkCompanyName
 from checkDocumentError import checkDocumentError
 from checkTitleName import checkTitleName
-from flask_cors import CORS
+# from flask_cors import CORS
 import qwen_agenttext
 from myLogger import outLog
 import time
-app = Flask(__name__)
-cros = CORS(app)
+# app = Flask(__name__)
+# cros = CORS(app)
+import uvicorn
+from fastapi import FastAPI, Request, File, UploadFile, HTTPException
+from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from sse_starlette.sse import EventSourceResponse
+import asyncio
+
+app = FastAPI()
+# 允许所有来源的跨域请求
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"]
+)
+
 UPLOAD_FOLDER = 'uploads'
 if not os.path.exists(UPLOAD_FOLDER):
     os.makedirs(UPLOAD_FOLDER)
 
 
-@app.route('/upload', methods=['POST'])
-def upload_file():
-    if 'file' not in request.files:
-        return jsonify({"error": "No file part"}), 400
-    file = request.files['file']
-    if file.filename == '':
-        return jsonify({"error": "No selected file"}), 400
-    if file:
-        filename = file.filename
-        file.save(os.path.join(UPLOAD_FOLDER, filename))
-        return jsonify({"message": "File uploaded successfully"}), 200
-
-
-@app.route('/stream', methods=["GET", "POST"])
-def stream_numbers():
-    context = request.args.get('context')
-    # def generate_numbers():
-    #     event_id=0
-    #     for number in range(1, 10):
-    #         json_data = json.dumps({"number": number})
-    #         print(json_data)
-    #         event_id += 1
-    #         yield f"id: {event_id}\n"
-    #         yield f"event: time-update\n"
-    #         yield f"data: {json_data}\n\n"  # 每次生成一个数字就发送
-    #         time.sleep(0.5)  # 为了演示，加入短暂延迟
-    #     json_data = json.dumps({"number": "done"})
-    #     yield f"id: {1}\n"
-    #     yield f"event: time-update\n"
-    #     yield f"data: {json_data}\n\n"  # 发送完成信号
-
-    headers = {
-        "Content-Type": "text/event-stream",
-        "Cache-Control": "no-cache",
-        "X-Accel-Buffering": "no",
-        "Access-Control-Allow-Origin": "*",
-        "Access-Control-Allow-Methods": "GET,POST",
-        "Access-Control-Allow-Headers": "x-requested-with,content-type",
-    }
-    return Response(qwen_agenttext.getxinx(context), headers=headers)
-
-
-@app.route('/sse/checkRepeatText', methods=['GET'])
-def checkRepeatTextWeb():
-    filename = request.args.get('filename')
-    userId = request.args.get("userId")
-
-    def generate_checkRepeatText(filename,userId):
+# @app.route('/upload', methods=['POST'])
+# def upload_file():
+#     if 'file' not in request.files:
+#         return jsonify({"error": "No file part"}), 400
+#     file = request.files['file']
+#     if file.filename == '':
+#         return jsonify({"error": "No selected file"}), 400
+#     if file:
+#         filename = file.filename
+#         file.save(os.path.join(UPLOAD_FOLDER, filename))
+#         return jsonify({"message": "File uploaded successfully"}), 200
+@app.post("/sse/upload")
+async def upload_file(file: UploadFile = File(...)):
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="No selected file")
+
+    # 保存文件
+    try:
+        file_location = os.path.join(UPLOAD_FOLDER, file.filename)
+        with open(file_location, "wb") as f:
+            content = await file.read()
+            f.write(content)
+        return JSONResponse(content={"message": "文件上传成功"}, status_code=200)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail="文件上传失败，错误信息：" + str(e))
+
+
+@app.get("/sse")
+async def root(request: Request):
+    async def event_generator(request: Request):
+        res_str = "七夕情人节即将来临，我们为您准备了精美的鲜花和美味的蛋糕"
+        for i in res_str:
+            if await request.is_disconnected():
+                print("连接已中断")
+                break
+            yield {
+                "event": "message",
+                "id": "7",
+                "data": f"{i}"
+            }
+
+            await asyncio.sleep(0.1)
+
+    g = event_generator(request)
+    return EventSourceResponse(g)
+
+
+# def stream_numbers():
+#     context = request.args.get('context')
+#     # def generate_numbers():
+#     #     event_id=0
+#     #     for number in range(1, 10):
+#     #         json_data = json.dumps({"number": number})
+#     #         print(json_data)
+#     #         event_id += 1
+#     #         yield f"id: {event_id}\n"
+#     #         yield f"event: time-update\n"
+#     #         yield f"data: {json_data}\n\n"  # 每次生成一个数字就发送
+#     #         time.sleep(0.5)  # 为了演示，加入短暂延迟
+#     #     json_data = json.dumps({"number": "done"})
+#     #     yield f"id: {1}\n"
+#     #     yield f"event: time-update\n"
+#     #     yield f"data: {json_data}\n\n"  # 发送完成信号
+
+#     headers = {
+#         "Content-Type": "text/event-stream",
+#         "Cache-Control": "no-cache",
+#         "X-Accel-Buffering": "no",
+#         "Access-Control-Allow-Origin": "*",
+#         "Access-Control-Allow-Methods": "GET,POST",
+#         "Access-Control-Allow-Headers": "x-requested-with,content-type",
+#     }
+#     return Response(qwen_agenttext.getxinx(context), headers=headers)
+
+@app.get("/sse/checkRepeatText")
+async def checkRepeatTextWeb(filename, userId, request: Request):
+    async def generate_checkRepeatText(filename, userId, request: Request):
+        global outLog
         id = 0
-        for i in checkRepeatText(filename,userId):
-            yield f"id: {id + 1}\n"
-            yield f"event: checkRepeatText\n"
-            yield f"data: {i}\n\n"  # 发送完成信号
-        # except Exception as e:
-
-        #     yield f"id: {id+1}\n"
-        #     yield f"event: checkRepeatText\n"
-        #     yield f"data: **程序出现异常**\n\n"  # 发送完成信号
-
-    headers = {
-        "Content-Type": "text/event-stream",
-        "Cache-Control": "no-cache",
-        "X-Accel-Buffering": "no",
-        "Access-Control-Allow-Origin": "*",
-        "Access-Control-Allow-Methods": "GET,POST",
-        "Access-Control-Allow-Headers": "x-requested-with,content-type",
-    }
-    return Response(generate_checkRepeatText(filename,userId), headers=headers)
-
-
-@app.route('/sse/checkPlaceName', methods=['GET'])
-def checkPlaceNameWebSse():
-    filename = request.args.get('filename')
-    userId = request.args.get("userId")
-    def generate_checkPlaceName(filename,userId):
+        for i in checkRepeatText(filename, userId, outLog):
+            id += 1
+            if await request.is_disconnected():
+                yield {
+                    "id": f"{id}",
+                    "event": "checkRepeatText",
+                    "data": "checkRepeatText连接已中断"
+                }
+                break
+            yield {
+                "id": f"{id}",
+                "event": "checkRepeatText",
+                "data": i
+            }
+
+    g = generate_checkRepeatText(filename, userId, request)
+    return EventSourceResponse(g)
+
+
+@app.get('/sse/checkPlaceName')
+def checkPlaceNameWebSse(filename, userId, request: Request):
+    async def generate_checkPlaceName(filename, userId, request: Request):
         id = 0
-        for i in checkPlaceName(filename,userId):
-            yield f"id: {id + 1}\n"
-            yield f"event: checkPlaceName\n"
-            yield f"data: {i}\n\n"  # 发送完成信号
-
-    headers = {
-        "Content-Type": "text/event-stream",
-        "Cache-Control": "no-cache",
-        "X-Accel-Buffering": "no",
-        "Access-Control-Allow-Origin": "*",
-        "Access-Control-Allow-Methods": "GET,POST",
-        "Access-Control-Allow-Headers": "x-requested-with,content-type",
-    }
-    return Response(generate_checkPlaceName(filename,userId), headers=headers)
-
-
-@app.route('/sse/checkCompanyName', methods=['GET'])
-def checkCompanyNameWebSse():
-    filename = request.args.get('filename')
-    userId = request.args.get("userId")
-    def generate_checkCompanyName(filename,userId):
+        global outLog
+        for i in checkPlaceName(filename, userId, outLog):
+            id += 1
+            if await request.is_disconnected():
+                yield {
+                    "id": f"{id}",
+                    "event": "checkPlaceName",
+                    "data": "checkPlaceName连接已中断"
+                }
+                break
+            yield {
+                "id": f"{id}",
+                "event": "checkPlaceName",
+                "data": i
+            }
+
+    g = generate_checkPlaceName(filename, userId, request)
+    return EventSourceResponse(g)
+
+
+@app.get('/sse/checkCompanyName')
+def checkCompanyNameWebSse(filename, userId, request: Request):
+    async def generate_checkCompanyName(filename, userId, request: Request):
         id = 0
-        for i in checkCompanyName(filename,userId):
-            yield f"id: {id + 1}\n"
-            yield f"event: checkCompanyName\n"
-            yield f"data: {i}\n\n"  # 发送完成信号
-
-    headers = {
-        "Content-Type": "text/event-stream",
-        "Cache-Control": "no-cache",
-        "X-Accel-Buffering": "no",
-        "Access-Control-Allow-Origin": "*",
-        "Access-Control-Allow-Methods": "GET,POST",
-        "Access-Control-Allow-Headers": "x-requested-with,content-type",
-    }
-    return Response(generate_checkCompanyName(filename,userId), headers=headers)
-
-
-@app.route('/sse/checkDocumentErrorWeb', methods=['GET'])
-def checkDocumentErrorWebSse():
-    filename = request.args.get('filename')
-    userId = request.args.get("userId")
-    def generate_checkDocumentError(filename,userId):
+        global outLog
+        for i in checkCompanyName(filename, userId, outLog):
+            id += 1
+            if await request.is_disconnected():
+                yield {
+                    "id": f"{id}",
+                    "event": "checkCompanyName",
+                    "data": "checkCompanyName连接已中断"
+                }
+                break
+            yield {
+                "id": f"{id}",
+                "event": "checkCompanyName",
+                "data": i
+            }
+
+    g = generate_checkCompanyName(filename, userId, request)
+    return EventSourceResponse(g)
+
+
+@app.get('/sse/checkDocumentErrorWeb')
+def checkDocumentErrorWebSse(filename, userId, request: Request):
+    async def generate_checkDocumentError(filename, userId, request: Request):
         id = 0
-        for i in checkDocumentError(filename,userId):
-            yield f"id: {id + 1}\n"
-            yield f"event: checkDocumentError\n"
-            yield f"data: {i}\n\n"  # 发送完成信号
-
-    headers = {
-        "Content-Type": "text/event-stream",
-        "Cache-Control": "no-cache",
-        "X-Accel-Buffering": "no",
-        "Access-Control-Allow-Origin": "*",
-        "Access-Control-Allow-Methods": "GET,POST",
-        "Access-Control-Allow-Headers": "x-requested-with,content-type",
-    }
-    return Response(generate_checkDocumentError(filename,userId), headers=headers)
-
-
-@app.route('/sse/checkTitleName', methods=['GET'])
-def checkTitleNameWebSse():
-    filename = request.args.get('filename')
-    userId = request.args.get("userId")
-    def generate_checkTitleName(filename,userId):
+        global outLog
+        for i in checkDocumentError(filename, userId, outLog):
+            id += 1
+            if await request.is_disconnected():
+                yield {
+                    "id": f"{id}",
+                    "event": "checkDocumentError",
+                    "data": "checkDocumentError连接已中断"
+                }
+                break
+            yield {
+                "id": f"{id}",
+                "event": "checkDocumentError",
+                "data": i
+            }
+
+    g = generate_checkDocumentError(filename, userId, request)
+    return EventSourceResponse(g)
+
+
+@app.get('/sse/checkTitleName')
+def checkTitleNameWebSse(filename, userId, request: Request):
+    async def generate_checkTitleName(filename, userId, request: Request):
         id = 0
-        for i in checkTitleName(filename,userId):
-            yield f"id: {id + 1}\n"
-            yield f"event: checkTitleName\n"
-            yield f"data: {i}\n\n"  # 发送完成信号
-
-    headers = {
-        "Content-Type": "text/event-stream",
-        "Cache-Control": "no-cache",
-        "X-Accel-Buffering": "no",
-        "Access-Control-Allow-Origin": "*",
-        "Access-Control-Allow-Methods": "GET,POST",
-        "Access-Control-Allow-Headers": "x-requested-with,content-type",
-    }
-    return Response(generate_checkTitleName(filename,userId), headers=headers)
-
-@app.route('/sse/getLog', methods=['GET'])
-def getlog():
-    userId = request.args.get("userId")
-    def generate_getLog(userId):
-        time.sleep(1)
+        global outLog
+        for i in checkTitleName(filename, userId, outLog):
+            id += 1
+            if await request.is_disconnected():
+                yield {
+                    "id": f"{id}",
+                    "event": "checkTitleName",
+                    "data": "checkTitleName连接已中断"
+                }
+                break
+            yield {
+                "id": f"{id}",
+                "event": "checkTitleName",
+                "data": i
+            }
+
+    g = generate_checkTitleName(filename, userId, request)
+    return EventSourceResponse(g)
+
+
+@app.get("/sse/getLog")
+# @app.route('/sse/getLog', methods=['GET'])
+async def getlog(userId, request: Request):
+    # userId = request.args.get("userId")
+    async def generate_getLog(userId):
         id = 0
+        global outLog
+        await asyncio.sleep(5)
         while True:
-            if outLog.is_done(userId):
+            isbreak = outLog.is_done(userId)
+            if isbreak:
+                break  # 完成了
+            text = outLog.get_queueData(userId)
+            if await request.is_disconnected():
+                yield {
+                    "id": f"{id}",
+                    "event": "checkTitleName",
+                    "data": "checkTitleName连接已中断"
+                }
                 break
-            q = outLog.get_queueData(userId)
-            if q:
-                id+=1
-                text = q.pop(0)
-                yield f"id: {id}\n"
-                yield f"event: getlog\n"
-                yield f"data: {text}\n\n"  # 发送完成信号
-        yield f"id: {id}\n"
-        yield f"event: getlog\n"
-        yield f"data: 任务结束！！！！！\n\n"  # 发送完成信号
+            if text:
+                id += 1
+                yield {
+                    "id": id,
+                    "event": "getlog",
+                    "data": text
+                }
+                # yield f"id: {id}\n"
+                # yield f"event: getlog\n"
+                # yield f"data: {text}\n\n"  # 发送完成信号
+        # yield f"id: {id}\n"
+        # yield f"event: getlog\n"
+        # yield f"data: 任务结束！！！！！\n\n"  # 发送完成信号
+        yield {
+            "id": id,
+            "event": "getlog",
+            "data": "任务结束！！！！"
+        }
         outLog.del_queue(userId)
-    headers = {
-        "Content-Type": "text/event-stream",
-        "Cache-Control": "no-cache",
-        "X-Accel-Buffering": "no",
-        "Access-Control-Allow-Origin": "*",
-        "Access-Control-Allow-Methods": "GET,POST",
-        "Access-Control-Allow-Headers": "x-requested-with,content-type",
-    }
-    return Response(generate_getLog(userId), headers=headers)
+
+    # headers = {
+    #     "Content-Type": "text/event-stream",
+    #     "Cache-Control": "no-cache",
+    #     "X-Accel-Buffering": "no",
+    #     "Access-Control-Allow-Origin": "*",
+    #     "Access-Control-Allow-Methods": "GET,POST",
+    #     "Access-Control-Allow-Headers": "x-requested-with,content-type",
+    # }
+    g = generate_getLog(userId)
+    return EventSourceResponse(g)
+    # return Response(generate_getLog(userId), headers=headers)
+
+
 if __name__ == '__main__':
-    app.run(host="0.0.0.0", port=80)
+    # app.run(host="0.0.0.0", port=80,threaded=True)
+    # uvicorn.run(app='main:app', host="0.0.0.0", port=80,workers=1)
+    app.run()
diff --git a/myLogger.py b/myLogger.py
index 6ea3059..7244d53 100644
--- a/myLogger.py
+++ b/myLogger.py
@@ -1,117 +1,8 @@
 # -*- coding: utf-8 -*-
-"""
-@author:  bingyl123@163.com
-@version: 1.0.0
-@file:    OutLog.py
-@time:    2023/2/23 20:25
-"""
-# import logging
-# import logging.config
-# import re
-# import datetime
-# import queue
-#
-#
-# class OutLog:
-#     _instance = None
-#     logger = None
-#
-#     def __new__(cls):
-#         if cls._instance is None:
-#             cls._instance = super(OutLog, cls).__new__(cls)
-#             cls.logger = logging.getLogger("app")  # 默认logger名称为"app"
-#             cls._instance.queue_dict = {}
-#             cls._instance.done_dict = {}
-#         return cls._instance
-#
-#     def get_queue(self, user_id):
-#         if user_id not in self.queue_dict:
-#             self.queue_dict[user_id] = []
-#             self.done_dict[user_id] = {}  # 初始化为未完成的字典
-#         return self.queue_dict[user_id]
-#
-#     def mark_done(self, user_id, producer_name):
-#         self.done_dict[user_id][producer_name] = True
-#
-#     def is_done(self, user_id):
-#         return all(self.done_dict.get(user_id, {}).values())  # 检查所有生产者是否完成
-#     @staticmethod
-#     def put(item: str, level="INFO"):
-#         dtf = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-#         mq.put(f"{dtf}[{level}]: {item}")
-#
-#     @staticmethod
-#     def debug(item, log=True):
-#         OutLog.put(item, level="DEBUG")
-#         if log:
-#             OutLog._instance.logger.debug(item)
-#
-#     @staticmethod
-#     def info(item, log=True):
-#         OutLog.put(item, level="INFO")
-#         if log:
-#             OutLog._instance.logger.info(item)
-#
-#     @staticmethod
-#     def warning(item, log=True):
-#         OutLog.put(item, level="WARNING")
-#         if log:
-#             OutLog._instance.logger.warning(item)
-#
-#     @staticmethod
-#     def error(item, log=True):
-#         OutLog.put(item, level="ERROR")
-#         if log:
-#             OutLog._instance.logger.error(item)
-#
-#     @staticmethod
-#     def critical(item, log=True):
-#         OutLog.put(item, level="CRITICAL")
-#         if log:
-#             OutLog._instance.logger.critical(item)
-#
-#
-#
-# # 日志配置
-# log_config = {
-#     'version': 1,
-#     'disable_existing_loggers': False,
-#     'formatters': {
-#         'standard': {
-#             'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-#         },
-#     },
-#     'handlers': {
-#         'console': {
-#             'class': 'logging.StreamHandler',
-#             'formatter': 'standard',
-#             'level': logging.INFO,
-#         },
-#         'file': {
-#             'class': 'logging.FileHandler',
-#             'filename': 'Logger.log',
-#             'formatter': 'standard',
-#             'level': logging.WARNING,
-#         },
-#     },
-#     'loggers': {
-#         '': {
-#             'handlers': ['console', 'file'],
-#             'level': logging.WARNING,
-#             'propagate': True,
-#         },
-#     }
-# }
-#
-# logging.config.dictConfig(log_config)
-#
-# outLog = OutLog()  # 获取单例实例
-
-
-
 import logging
 import logging.config
 import datetime
+import redis
 
 class OutLog:
     _instance = None
@@ -121,35 +12,49 @@ class OutLog:
         if cls._instance is None:
             cls._instance = super(OutLog, cls).__new__(cls)
             cls.logger = logging.getLogger("app")  # 默认logger名称为"app"
-            cls._instance.queue_dict = {}
-            cls._instance.done_dict = {}
+            # cls._instance.queue_dict = {}
+            # cls._instance.done_dict = {}
+                        # 初始化 Redis 连接
+            cls._instance.redis_client = redis.StrictRedis(host='localhost', port=6379, password="root",db=0, decode_responses=True)
         return cls._instance
 
-    def get_queue(self, user_id,producer_name):
-        if user_id not in self.queue_dict:
-            self.queue_dict[user_id] = []
-            self.done_dict[user_id] = {}  # 初始化为未完成的字典
-        if user_id not in self.done_dict:
-            self.done_dict[user_id][producer_name] = False
+    def get_queue(self,user_id,producer_name):
+        # if user_id not in self.queue_dict:
+        #     self.queue_dict[user_id] = []
+        #     self.done_dict[user_id]={}
+        # self.done_dict[user_id][producer_name] = False  # 初始化为未完成的字典
+         # 使用 Redis 进行存储和查询
+        if not self.redis_client.exists(f"queue:{user_id}"):
+            # self.redis_client.rpush(f"queue:{user_id}")
+            self.logger.info(f"queue:{user_id}")
+        self.redis_client.hset(f"done:{user_id}", producer_name, "0")  # 初始化为未完成
         return self.UserLogger(user_id)
     def get_queueData(self, user_id):
-        if user_id in self.queue_dict:
-           return OutLog._instance.queue_dict[self.user_id]
+        # if user_id in self.queue_dict:
+        #     return self.queue_dict[user_id]
+        if self.redis_client.exists(f"queue:{user_id}"):
+            return self.redis_client.lpop(f"queue:{user_id}")  # 获取队列首个并删除数据
     def del_queue(self,user_id):
+        # if self.is_done(user_id):
+        #     del self.queue_dict[user_id]
+        #     del self.done_dict[user_id]
         if self.is_done(user_id):
-            del self.queue_dict[user_id]
-            del self.done_dict[user_id]
+            self.redis_client.delete(f"queue:{user_id}")
+            self.redis_client.delete(f"done:{user_id}")
     class UserLogger:
         def __init__(self, user_id):
             self.user_id = user_id
             self.logger = OutLog._instance.logger
 
         def log(self, item: str, level: str):
+            self._log_to_logger(item, level)
+            if(level != "INFO"):
+                return
             dtf = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
             log_entry = f"{dtf}[{level}]: {item}"
-            OutLog._instance.queue_dict[self.user_id].append(log_entry)  # 保存到对应用户的队列
-            self._log_to_logger(item, level)
-
+            # print(log_entry)
+            # OutLog._instance.queue_dict[self.user_id].append(log_entry)  # 保存到对应用户的队列
+            OutLog._instance.redis_client.rpush(f"queue:{self.user_id}", log_entry)  # 保存到对应用户的队列
         def _log_to_logger(self, item: str, level: str):
             if level == "DEBUG":
                 self.logger.debug(item)
@@ -177,11 +82,17 @@ class OutLog:
         def critical(self, item: str):
             self.log(item, "CRITICAL")
 
+    # def mark_done(self, user_id, producer_name):
+    #     self.done_dict[user_id][producer_name] = True
+    # def is_done(self, user_id):
+    #     # print(self.done_dict.get(user_id, {}),self.done_dict.get(user_id, {}).values())
+    #     return all(self.done_dict.get(user_id, {}).values())  # 检查所有生产者是否完成
     def mark_done(self, user_id, producer_name):
-        self.done_dict[user_id][producer_name] = True
+        self.redis_client.hset(f"done:{user_id}", producer_name, "1")
 
     def is_done(self, user_id):
-        return all(self.done_dict.get(user_id, {}).values())  # 检查所有生产者是否完成
+        done_dict = self.redis_client.hgetall(f"done:{user_id}")
+        return all(value == "1" for value in done_dict.values()) if done_dict else False # 检查所有生产者是否完成
 
 
 # 日志配置
@@ -203,13 +114,13 @@ log_config = {
             'class': 'logging.FileHandler',
             'filename': 'Logger.log',
             'formatter': 'standard',
-            'level': logging.WARNING,
+            'level': logging.INFO,
         },
     },
     'loggers': {
         '': {
             'handlers': ['console', 'file'],
-            'level': logging.WARNING,
+            'level': logging.INFO,
             'propagate': True,
         },
     }