更新文件

9 months ago · 6a406ec64e
8 changed files with 963 additions and 609 deletions
--- a/checkCompanyName.py
+++ b/checkCompanyName.py
@ -8,9 +8,10 @@ import math
 from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
 from docx.opc.oxml import parse_xml
 import requests
-from myLogger import outLog
+# from myLogger import outLog
 import time
 def load_from_xml_v2(baseURI, rels_item_xml):
    """
    Return |_SerializedRelationships| instance loaded with the
@ -31,9 +32,9 @@ _SerializedRelationships.load_from_xml = load_from_xml_v2
 import logging
-outLog.logger = logging.getLogger("checkCompanyName")
+# outLog.logger = logging.getLogger("checkCompanyName")
-userLog=None
+userLog = None
-prompt ='''
+prompt = '''
 .根据上述文本判断，是否为具体的公司或组织名称，你可以使用工具利用互联网查询，
 你只能在[具体的公司或组织名称,公益组织,简称,统称,泛化组织,政府单位,机关单位,学校，行业类型，其他]选项中选择答案,
 回答格式[{“companyName”：“名称”,"回答":"答案"}，{“companyName”：“名称”,"回答":"答案"}]，不做过多的解释,严格按回答格式作答;
@ -54,8 +55,8 @@ def getDocxToTextAll(name):
    docxPath = name
    loopCount = 0
    while True:
-        loopCount+=1
+        loopCount += 1
-        if(loopCount>=15):
+        if (loopCount >= 60):
            raise Exception("文档读取超时，或文档存在问题无法读取")
            break
        try:
@ -76,17 +77,16 @@ def getDocxToTextAll(name):
            words.append(text)
    # 将所有段落文本拼接成一个字符串，并用换行符分隔
    text = '\n'.join(words)
    # userLog.info("checkCompanyName----保存文件")
    # 将文本写入txt文件
    with open("checkCompanyName.txt", 'w', encoding='utf-8') as txt_file:
        txt_file.write(text)
 def companyNameTask(text):
-    yield "文档公司或组织名称检查---启动中...."
+    yield "文档公司或组织名称检查---文档解析中...."
-    userLog.info("checkCompanyName----启动中....")
+    userLog.info("文档公司或组织名称检查---任务开始")
-    batchNum = 20
+    batchNum = 5
-    sentences = re.split(r'[。\n]', text)
+    sentences = re.split(r'[、，。\n]', text)
    # 去掉空字符
    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
    # 计算总字符数
@ -101,19 +101,19 @@ def companyNameTask(text):
    # 打印每一份的内容
    for i, chunk in enumerate(chunks):
        yield f"文档公司或组织名称检查---文档解析进度:{i + 1}/{num_chunks}"
        userLog.info(f"checkCompanyName----文档解析进度:{i + 1}/{num_chunks}")
        try:
-            wenBen = ".".join(chunk)
+            # wenBen = ".".join(chunk)
-            url = "http://0.0.0.0:8191/taskflow/checkPlaceName"
+            url = "http://0.0.0.0:8191/taskflow/checkPlaceNameServer"
            headers = {"Content-Type": "application/json"}
            data = {
                "data": {
-                    "text": wenBen,
+                    "text": chunk,
                    # "text":wenBen
                }
            }
            r = requests.post(url=url, headers=headers, data=json.dumps(data))
            res = json.loads(r.text)
-            # userLog.info(res)
+            res = res["data"]
            # print(res)
        except Exception as e:
            userLog.warning(chunk)
@ -121,44 +121,52 @@ def companyNameTask(text):
            userLog.warning(e)
            return
        isplace = False
-        for zuhe in res["result"]:
+
        # for zuhe in res:
        #     # 上一个的地名,这一个还是地名，就和上一个相加代替这个
        #     if isplace:
        #         name = placeList[len(placeList) - 1]
        #         if zuhe[1].find("组织机构类") >= 0:  # or zuhe[1] == "ns"
        #             isplace = True
        #             new_text = zuhe[0].replace("\n", "")
        #             placeList[len(placeList) - 1] = name + new_text
        #             continue
        #     if zuhe[1].find("组织机构类") >= 0:
        #         isplace = True
        #         new_text = zuhe[0].replace("\n", "")
        #         placeList.append(new_text)
        #     else:
        #         isplace = False
        ##案例[[('目前', 'TIME'), ('江北区历史文化档案馆', 'ORG')], [('宁波国研简直，并且在东软', 'ORG'), ('宁波市北仑区教育局', 'ORG'), ('国研信息', 'ORG'), ('浙江省', 'LOC'), ('宁波市金凤区', 'LOC'), ('金凤区', 'LOC')]]
        for zuhe in res:
            # 上一个的地名,这一个还是地名，就和上一个相加代替这个
-            if isplace:
+            for chid in zuhe:
-                name = placeList[len(placeList) - 1]
+                if (chid[1] == "ORG"):
-                if zuhe[1].find("组织机构类") >= 0:  # or zuhe[1] == "ns"
+                    new_text = chid[0].replace("\n", "")
                    isplace = True
                    new_text = zuhe[0].replace("\n", "")
                    placeList[len(placeList) - 1] = name + new_text
                    continue
            if zuhe[1].find("组织机构类") >= 0:
                isplace = True
                new_text = zuhe[0].replace("\n", "")
                    placeList.append(new_text)
            else:
                isplace = False
    # 打印总份数
    yield "文档公司或组织名称检查---文档解析完成"
    userLog.info("checkCompanyName----文档解析完成")
    placeList = list(dict.fromkeys(placeList))
    userLog.debug(placeList)
    yield placeList
    userLog.info(placeList)
-def checkCompanyName(filename,user_id):
+
 def checkCompanyName(filename, user_id, outLog):
    yield f"文档公司或组织名称检查---开始处理文档..."
    global userLog
-    userLog=outLog.get_queue(user_id, "checkCompanyName")
+    userLog = outLog.get_queue(user_id, "checkCompanyName")
    try:
        getDocxToTextAll(filename)
    except Exception as e:
        userLog.warning(e)
        userLog.warning("文档公司或组织名称检查---文档无法打开，请检查文档内容")
-        yield "文档公司或组织名称检查---文档无法打开，请检查文档内容"
+        yield "文档公司或组织名称检查---文件无法正常打开。可以尝试用WORD或WPS打开文件，进行修复并另存，用另存的文件再做一次尝试。"
        outLog.mark_done(user_id, "checkCompanyName")
        return
    with open("checkCompanyName.txt", "r", encoding='utf-8') as f:
        gettext = f.read()
    yield f"文档公司或组织名称检查---开始解析文档..."  # 每次生成一个数字就发送
-    userLog.info("checkCompanyName----开始解析文档...")
+    final_list = ""
    for item in companyNameTask(gettext):
        if isinstance(item, str):
            yield item
@ -174,7 +182,6 @@ def checkCompanyName(filename,user_id):
        if cishu > 3:
            cishu = 0
        yield "文档公司或组织名称检查---结果生成中" + '.' * cishu
        userLog.info(f"checkCompanyName----结果生成中" + '.' * cishu)
        cishu += 1
    data = runList[len(runList) - 1][0]["content"]
    parsed_data = json_repair.loads(data.replace('`', ''))
@ -182,14 +189,15 @@ def checkCompanyName(filename,user_id):
    for place in parsed_data:
        try:
-            if place['回答'] == '非泛化的公司或组织名称':
+            if place['回答'] == '具体的公司或组织名称':
                if (place["companyName"] == "北京国研科技咨询有限公司浙江分公司"):
                    continue
                error_places.append(place)
        except Exception as e:
            userLog.warning(place)
            userLog.warning(e)
            userLog.warning("文档公司或组织名称检查---组织提出出错")
            continue
    userLog.info(error_places)
    returnInfo = "发现异常公司或组织名称<br>"
    if len(error_places) > 0:
        for t in error_places:
@ -199,9 +207,9 @@ def checkCompanyName(filename,user_id):
            t["yuanwen"] = paragraphs[0]
            yuanwen = paragraphs[0].replace(keyword, f"**{keyword}**").replace("\n", "")
            returnInfo += "原文：" + yuanwen + "<br>异常公司或组织名称：**" + keyword + "**！请注意" + "<br>"
-            userLog.info(returnInfo)
+            userLog.info("文档公司或组织名称检查---原文：" + yuanwen + "异常公司或组织名称：" + keyword + "！请注意")
        yield returnInfo
    else:
        yield "**未发现异常公司或组织名称**<br>"
-        userLog.info("**未发现异常公司或组织名称**<br>")
+        userLog.info("文档公司或组织名称检查---未发现异常公司或组织名称")
    outLog.mark_done(user_id, "checkCompanyName")
--- a/checkDocumentError.py
+++ b/checkDocumentError.py
@ -8,7 +8,7 @@ import math
 from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
 from docx.opc.oxml import parse_xml
 import requests
-from myLogger import outLog
+# from myLogger import outLog
 import time
 def load_from_xml_v2(baseURI, rels_item_xml):
    """
@ -27,9 +27,9 @@ def load_from_xml_v2(baseURI, rels_item_xml):
 _SerializedRelationships.load_from_xml = load_from_xml_v2
-import logging
+# import logging
-outLog.logger = logging.getLogger("checkDocumentError")
+# outLog.logger = logging.getLogger("checkDocumentError")
 userLog=None
 llm_cfg = {
    # 'model': 'qwen1.5-72b-chat',
@ -40,7 +40,7 @@ llm_cfg = {
 bot = Assistant(llm=llm_cfg,
                name='Assistant',
                # description='使用RAG检索并回答，支持文件类型：PDF/Word/PPT/TXT/HTML。'
-
+                system_message="你是一个错别字分析大师"
                )
 # prompt='''
 # 是否存在错别字，若存在请指出，不做其他方面的校验，你只能在[存在，不存在，未知]选项中选择答案,
@ -48,25 +48,25 @@ bot = Assistant(llm=llm_cfg,
 # '''
 prompt = '''
 请回答以上问题，[是，否]选项中选择答案,原文内容，标点符号保持不变，如果有错请给出详细的解析，没有错则不用给解析
-回答格式请按照以下json格式[{"placeName":"序号","回答":"答案","解析","解析内容"},{"placeName":"序号","回答":"答案","解析","解析内容"}]，不做过多的解释,严格按回答格式作答;
+回答格式请按照以下json格式[{"placeName":"序号值","回答":"答案","解析","解析内容"},{"placeName":"序号值","回答":"答案","解析","解析内容"}]，不做过多的解释,严格按回答格式作答;
 '''
 def getDocxToTextAll(name):
    userLog.info("checkDocumentError----打开文档")
    docxPath = name
    loopCount = 0
    while True:
        loopCount+=1
        if(loopCount>=15):
            raise Exception("文档读取超时，或文档存在问题无法读取")
            break
        try:
    document = Document(docxPath)
-            break
+    # while True:
-        except Exception as e:
+    #     loopCount+=1
-            time.sleep(1)
+    #     if(loopCount>=60):
-            pass
+    #         raise Exception("文档读取超时，或文档存在问题无法读取")
    #         break
    #     try:
    #         document = Document(docxPath)
    #         break
    #     except Exception as e:
    #         time.sleep(1)
    #         pass
    # 逐段读取docx文档的内容
    words = []
    for paragraph in document.paragraphs:
@ -84,23 +84,21 @@ def getDocxToTextAll(name):
        txt_file.write(text)
-def checkDocumentError(filename,user_id):
+def checkDocumentError(filename,user_id,outLog):
    global userLog
    userLog=outLog.get_queue(user_id,"checkDocumentError")
    yield f"文档纠错---开始处理文档..."
    userLog.info("checkDocumentError----开始处理文档...")
    try:
        getDocxToTextAll(filename)
    except Exception as e:
        userLog.warning(e)
        userLog.warning("文档纠错----文档无法打开，请检查文档内容")
-        yield "文档纠错----文档无法打开，请检查文档内容"
+        yield "文档纠错----文件无法正常打开。可以尝试用WORD或WPS打开文件，进行修复并另存，用另存的文件再做一次尝试。"
        outLog.mark_done(user_id, "checkDocumentError")
        return
    with open("checkDocumentError.txt", "r", encoding='utf-8') as f:
        gettext = f.read()
    yield f"文档纠错---开始解析文档..."  # 每次生成一个数字就发送
    userLog.info("checkDocumentError----开始解析文档...")
    final_list = []
    for item in documentErrorTask(gettext):
        if isinstance(item, str):
@ -113,12 +111,11 @@ def checkDocumentError(filename,user_id):
            yuanwen = i["placeName"].replace("\n", "")
            jianyi = i["jianyi"].replace("\n", "")
            resInfo += "原文：" + yuanwen + "<br>建议：**" + jianyi + "**<br>"
            userLog.info(resInfo)
        yield resInfo
    else:
        yield "**未发现错别字**"
-        userLog.info("未发现错别字")
+        userLog.info("文档纠错---未发现错别字")
    outLog.mark_done(user_id,"checkDocumentError")
@ -129,27 +126,33 @@ def documentErrorTask(text):
    :param batch_size: 每批处理的字符数
    :return: 生成器，每次返回一批文本
    """
-    yield "文档纠错---启动中...."
+    yield "文档纠错---文档解析中...."
-    userLog.info("checkDocumentError----启动中....")
+    userLog.info("文档纠错---任务开始")
    batchNum = 20
    sentences = re.split(r'[。\n]', text)
    # 去掉空字符
    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
    # 计算总字符数
    total_chars = len(sentences)
    # 计算有多少份
    num_chunks = math.ceil(total_chars / batchNum)
    # 按batchNum字为一份进行处理
    chunks = [sentences[i:i + batchNum] for i in range(0, total_chars, batchNum)]
    # 打印每一份的内容
    err = []
    for i, chunk in enumerate(chunks):
        yield f"文档纠错---文档解析进度:{i + 1}/{num_chunks}"
        userLog.info(f"checkDocumentError----文档解析进度:{i + 1}/{num_chunks}")
        try:
-            url = "http://0.0.0.0:8190/taskflow/checkDocumentError"
+            # url = "http://0.0.0.0:8190/taskflow/checkDocumentError"
            # headers = {"Content-Type": "application/json"}
            # data = {
            #     "data": {
            #         "text": chunk,
            #     }
            # }
            # r = requests.post(url=url, headers=headers, data=json.dumps(data))
            # res = json.loads(r.text)
            url = "http://127.0.0.1:5001/taskflow/checkDocumentError"
            headers = {"Content-Type": "application/json"}
            data = {
                "data": {
@ -158,12 +161,13 @@ def documentErrorTask(text):
            }
            r = requests.post(url=url, headers=headers, data=json.dumps(data))
            res = json.loads(r.text)
            # print(res)
        except Exception as e:
            userLog.warning(chunk)
-            userLog.warning("文档纠错--错别字识别出错\n", e)
+            userLog.warning("文档纠错--错别字识别出错\n")
            userLog.warning(e)
            continue
-        lines_with_greeting = [place for place in res["result"] if len(place['errors']) > 0]
+        lines_with_greeting = [place for place in res["data"] if len(place['errors']) > 0]
        userLog.debug(lines_with_greeting)
        if len(lines_with_greeting) > 0:
            num = 0
            wenti = []  # 记录问题的数组
@ -173,26 +177,28 @@ def documentErrorTask(text):
                keyword = t['source']
                keyword_list.append(keyword)
                for item in t["errors"]:
-                    for key, value in item['correction'].items():
+                    # for key, value in item['correction'].items():
-                        temp_errorWords.append(key)
+                    #     temp_errorWords.append(key)
                    temp_errorWords.append(item[0])
                wenti.append(
-                    "序号：{}，原文：{}。问题：【{}】这些字是否为当前原文的错别字".format(num, keyword, ",".join(temp_errorWords)))
+                    # "{}：原文是{}。问题：【{}】这些字是否为当前原文的错别字".format(num, keyword, ",".join(temp_errorWords)))
                    "{}：原文是{}。问题：当前原文是否存在错别字,只检查错被子，其他不做分析".format(num, keyword))
                num += 1
            words = "\n".join(wenti)
            userLog.debug(words)
            messages = [{'role': 'user', 'content': [{'text': words + prompt}]}]
            runList = []
            yield f"文档纠错---内容解析中..."  # 每次生成一个数字就发送
            userLog.info(f"checkDocumentError----内容解析中...")
            cishu = 0
            for rsp in bot.run(messages):
                runList.append(rsp)
                if cishu > 3:
                    cishu = 0
                yield "文档纠错---内容解析中" + '.' * cishu
                userLog.info(f"checkDocumentError----内容解析中内容解析中" + '.' * cishu)
                cishu += 1
            data = runList[len(runList) - 1][0]["content"]
            parsed_data = json_repair.loads(data.replace("\\", "").replace('`', ''))
            userLog.debug(parsed_data)
            resListerr = []
            for place in parsed_data:
                try:
@ -200,14 +206,16 @@ def documentErrorTask(text):
                        place["placeName"] = keyword_list[int(place["placeName"])]
                        place["jianyi"] = place["解析"]
                        resListerr.append(place)
                        userLog.info("文档纠错---原文：" + place["placeName"] + "<br>建议：" + place["jianyi"])
                except Exception as e:
                    userLog.warning(parsed_data)
                    userLog.warning(place)
-                    userLog.warning("文档纠错--错别字提取出错\n", e)
+                    userLog.warning("文档纠错--错别字提取出错\n")
                    userLog.warning(e)
                    continue
            if (len(resListerr) > 0):
                err.extend(resListerr)
    # 打印总份数
-    yield "文档地名检查---文档解析完成"
+    yield "文档纠错---文档解析完成"
-    userLog.info(err)
+    userLog.info("文档纠错---任务结束")
    yield err
--- a/checkPlaceName.py
+++ b/checkPlaceName.py
@ -87,7 +87,6 @@ def getDocxToTextAll(docxPath):
 #得到全文和地名有关的内容
 def placeNameTask(text):
    yield "文档地名检查---启动中...."
    userLog.info("checkPlaceName----启动中....")
    batchNum=20
    sentences = re.split(r'[。\n]', text)
    # 去掉空字符
@ -104,7 +103,6 @@ def placeNameTask(text):
    # 打印每一份的内容
    for i, chunk in enumerate(chunks):
        yield f"文档地名检查---文档解析进度:{i + 1}/{num_chunks}"
        userLog.info(f"checkPlaceName----文档解析进度:{i + 1}/{num_chunks}")
        wenBen=".".join(chunk)
        try:
            url = "http://0.0.0.0:8191/taskflow/checkPlaceName"
@ -139,7 +137,6 @@ def placeNameTask(text):
                isplace = False
    # 打印总份数
    yield "文档地名检查---文档解析完成"
    userLog.info("checkPlaceName---文档解析完成")
    placeList=list(dict.fromkeys(placeList))
    yield placeList
@ -175,7 +172,6 @@ def checkPlaceName(filename,user_id):
        if cishu>3:
            cishu=0
        yield "文档地名检查---结果生成中"+'.'*cishu
        userLog.info("checkPlaceName---结果生成中"+'.'*cishu)
        cishu+=1
    data = runList[len(runList) - 1][0]["content"]
    parsed_data = json_repair.loads(data.replace('`', ''))
@ -186,12 +182,11 @@ def checkPlaceName(filename,user_id):
            if place['回答'] == '错误':
                error_places.append(place)
        except Exception as e:
            userLog.warning(parsed_data)
            userLog.warning(place)
            userLog.warning(parsed_data)
            userLog.warning("文档地名检查---组织提出出错")
            userLog.warning(e)
            continue
    userLog.info(error_places)
    returnInfo = "发现异常地名<br>"
    if len(error_places)>0:
        for t in error_places:
@ -200,9 +195,9 @@ def checkPlaceName(filename,user_id):
            paragraphs = re.findall(r'.*?' + re.escape(keyword) + r'.*?\n', gettext)
            yuanwen= paragraphs[0].replace(keyword,f"**{keyword}**").replace("\n","")
            returnInfo+="原文：" + yuanwen + "<br>出现异常地名：**" + keyword + "**！请注意" + "<br>"
-            userLog.info(returnInfo)
+            userLog.info("文档地名检查---原文：" + yuanwen + "出现异常地名：" + keyword + "！请注意")
        yield returnInfo
    else:
        yield "**未发现发现异常地名**"
-        userLog.info("未发现发现异常地名")
+        userLog.info("文档地名检查---未发现发现异常地名")
    outLog.mark_done(user_id, "checkPlaceName")
--- a/checkRepeatText.py
+++ b/checkRepeatText.py
@ -7,6 +7,7 @@ from qwen_agent.agents import Assistant
 import json_repair
 import json
 embeddings = DashScopeEmbeddings(dashscope_api_key="sk-ea89cf04431645b185990b8af8c9bb13")
 # embeddings = HuggingFaceEmbeddings(model_name="shibing624/text2vec-base-chinese",model_kwargs={"device":"npu:5"})
 device_id=0
 import re
 import time
@ -17,9 +18,9 @@ from docx.opc.oxml import parse_xml
 import logging
 import logging.config
 import requests
-from myLogger import outLog
+# from myLogger import outLog
-outLog.logger = logging.getLogger("checkRepeatText")
+# outLog.logger = logging.getLogger("checkRepeatText")
 userLog=None
 def load_from_xml_v2(baseURI, rels_item_xml):
    """
@ -79,11 +80,10 @@ def isTitle(paragraph):
 #寻找标题名称
 def findTitleName(docxPath):
    yield '文档相似性检查----检查是否存在详细设计方案'
    loopCount = 0
    while True:
        loopCount+=1
-        if(loopCount>=15):
+        if(loopCount>=60):
            raise Exception("文档读取超时，或文档存在问题无法读取")
            break
        try:
@ -95,9 +95,19 @@ def findTitleName(docxPath):
    # 逐段读取docx文档的内容
    titleWords=[]
    firstTitle = 0
    firstTitleName=""
    secondTitle = 0
    sanjiTitle = 0
    levelText=""
    count = 0
    numid =0
    wordContent={}
    total = len(document.paragraphs)
    addStart = False#是否重新添加
    yield "文档相似性检查----文档内容解析中",str(count),str(total)
    for paragraph in document.paragraphs:
        count+=1
        yield "文档相似性检查----文档内容解析中",str(count),str(total)
        # 判断该段落的标题级别
        # 这里用isTitle()临时代表，具体见下文介绍的方法
        text = paragraph.text
@ -109,6 +119,8 @@ def findTitleName(docxPath):
                if(text.find("附件")>=0):
                    continue
                titleWords.append("一级标题:".format(firstTitle)+text)
                addStart=True
                firstTitleName=text
            elif level=="1":
                secondTitle+=1
                sanjiTitle=0
@ -118,15 +130,28 @@ def findTitleName(docxPath):
                sanjiTitle += 1
                # words.append("\t"+"{}.{}".format(firstTitle,secondTitle)+text)
                # titleWords.append("第{}章的三级标题".format(firstTitle, secondTitle,firstTitle, secondTitle,sanjiTitle) + text)
            ##先判断是不是一级标题
            if addStart:
                wordContent[firstTitleName]=[]
                addStart=False
            if level:
                levelText=f"{int(level)+1}级标题-"+text
            else:
                if(text.startswith("图") or text.startswith("注：")):
                    continue
                if (len(text)>30 and firstTitleName):
                    numid+=1
                    wordContent[firstTitleName].append("{}：".format(levelText)+text)
    findTitleName_llm_cfg = {
    #'model': 'qwen1.5-72b-chat',
    'model':"qwen2-72b",
    'model_server': 'http://127.0.0.1:1025/v1',  # base_url, also known as api_base
    # 'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
    }
    yield '文档相似性检查----检查是否存在详细设计方案'
    findTitleName_bot = Assistant(llm=findTitleName_llm_cfg,
                                    name='Assistant',
-                                    # system_message='1：这样的是一级标题。1.1：这样的是二级标题。1.1.1：这样的是三级标题'
+                                    system_message='按照要求选择最合适的，是唯一的'
                                )
    prompt='''\n是文档的大纲，一级标题组成，哪一章存在与方案相关的内容
    类似详细设计方案,详细服务方案，详细建设方案为最相关的，优先选择
@ -142,60 +167,78 @@ def findTitleName(docxPath):
        runList.append(rsp)
    data = runList[len(runList) - 1][0]["content"]
    parsed_data = json_repair.loads(data.replace('`', ''))
    try:
        if(parsed_data["answer"]=="存在"):
-        yield parsed_data["name"]
+            yield parsed_data["name"],wordContent
        else:
            yield "文档相似性检查----未找到与详细设计方案相关内容，无法进行相似性比较"
 #获取文档中 详细设计方案 章节的所有内容
 def getDocxToText(docxPath,titleName,vector_store_path):
    loopCount = 0
    while True:
        loopCount+=1
        if(loopCount>=15):
            raise Exception("文档读取超时，或文档存在问题无法读取")
            break
        try:
            document = Document(docxPath)
            break
    except Exception as e:
-            time.sleep(1)
+        userLog.warning(e)
-            pass
+        userLog.warning(data)
-    # 逐段读取docx文档的内容
+        userLog.warning(parsed_data)
-    levelList=[]
+        yield "文档相似性检查----检查遇到问题，请联系管理员"
 #获取文档中 详细设计方案 章节的所有内容
 # def getDocxToText(docxPath,titleName,vector_store_path):
 def getDocxToText(titleName,wordContent,vector_store_path):
    # loopCount = 0
    # while True:
    #     loopCount+=1
    #     if(loopCount>=15):
    #         raise Exception("文档读取超时，或文档存在问题无法读取")
    #         break
    #     try:
    #         document = Document(docxPath)
    #         break
    #     except Exception as e:
    #         time.sleep(1)
    #         pass
    # # 逐段读取docx文档的内容
    # levelList=[]
    words=[]
-    addStart = False
+    # addStart = False
-    levelText=""
+    # levelText=""
-    i = 0
+    # i = 0
-    for paragraph in document.paragraphs:
+    # count = 0
-        # 判断该段落的标题级别
+    # total = len(document.paragraphs)
-        # 这里用isTitle()临时代表，具体见下文介绍的方法
+    # yield "文档相似性检查----文档内容解析中",count,total
-        text = paragraph.text
+    # for paragraph in document.paragraphs:
-        if text.strip():#非空判断
+    #     count+=1
-            if titleName:
+    #     yield "文档相似性检查----文档内容解析中",count,total
-                level = isTitle(paragraph)
+    #     # 判断该段落的标题级别
-                if(addStart and level=="0"):
+    #     # 这里用isTitle()临时代表，具体见下文介绍的方法
-                    addStart=False
+    #     text = paragraph.text
-                if(level=="0" and (titleName.find(text)>=0 or text.find(titleName)>=0)):
+    #     if text.strip():#非空判断
-                    addStart=True
+    #         if titleName:
-                if level:
+    #             level = isTitle(paragraph)
-                    levelList.append("{}：".format(level)+paragraph.text)
+    #             if(addStart and level=="0"):
-                    levelText=f"{int(level)+1}级标题-"+text
+    #                 addStart=False
-                else:
+    #             if(level=="0" and (titleName.find(text)>=0 or text.find(titleName)>=0)):
-                    if addStart:
+    #                 addStart=True
-                        if(text.startswith("图") or text.startswith("注：")):
+    #             if level:
-                            continue
+    #                 levelList.append("{}：".format(level)+paragraph.text)
-                        if(len(text)>30):
+    #                 levelText=f"{int(level)+1}级标题-"+text
-                            i=i+1
+    #             else:
-                            words.append("{}：".format(levelText)+text)
+    #                 if addStart:
    #                     if(text.startswith("图") or text.startswith("注：")):
    #                         continue
    #                     if(len(text)>30):
    #                         i=i+1
    #                         words.append("{}：".format(levelText)+text)
    # 将所有段落文本拼接成一个字符串，并用换行符分隔
    # 遍历字典，查找包含 "标题的" 的键
    for key, value in wordContent.items():
        if (titleName.find(key)>=0 or key.find(titleName)>=0):
            words.extend(value)  # 将对应的值添加
    if len(words)==0:
        raise Exception("checkRepeatText，获取长度为0")
    text = '\n'.join(words)
-
+    userLog.info(f"文档相似性检查----需要处理的总数是{len(words)}")
    # 将文本写入txt文件
    with open("checkRepeatText.txt", 'w', ) as txt_file:
        txt_file.write(text)
-    time.sleep(3)
+    time.sleep(1)
    yield "文档相似性检查----文档内容转换中",".","."
    loader = TextLoader(file_path='checkRepeatText.txt')
    docs = loader.load()
    # print(docs)
@ -204,34 +247,46 @@ def getDocxToText(docxPath,titleName,vector_store_path):
    splits = text_splitter.split_documents(docs)
    uuids = []
    yield "文档相似性检查----文档保存中",".","."
    global embeddings
    vectorstore = Chroma(persist_directory=vector_store_path, embedding_function=embeddings)
    for i in range(len(splits)):
-        uuids.append(str(uuid.uuid4()))
+        uuidStr=str(uuid.uuid4())
        uuids.append(uuidStr)
    logging.info(f"checkRepeatTextuuidLen{len(uuids)}")
    vectorstore = Chroma(persist_directory=vector_store_path, embedding_function=embeddings)
    vectorstore.add_documents(documents=splits, ids=uuids)
    yield "文档相似性检查----校验文档是否已经完成保存",".","."
    while True:
        time.sleep(0.3)
        ress = vectorstore.similarity_search(words[0])
        if (len(ress) > 0):
            break
-    return words,uuids,vectorstore
+    yield words,uuids,vectorstore
 # @app.route('/checkRepeatText/<filename>', methods=['GET'])
-def checkRepeatText(filename,user_id):
+def checkRepeatText(filename,user_id,outLog):
    global userLog
    userLog=outLog.get_queue(user_id,"checkRepeatText")
    yield "文档相似性检查---启动中...."
    userLog.info("文档相似性检查---任务开始")
    vector_store_path="vector_store"+str(uuid.uuid4())
    for titleName in findTitleName(filename):
        if(isinstance(titleName ,tuple)):
            if(len(titleName)==3):
                yield titleName[0]+titleName[1]+"/"+titleName[2]
        else:
            yield titleName
-    if(titleName!="文档相似性检查----未找到与详细设计方案相关内容，无法进行相似性比较"):
+    if(isinstance(titleName ,tuple)):
        # try:
        yield "文档相似性检查----文档内容转换中"
        try:
-            yield "文档相似性检查----文档内容解析中"
+            for words,uuids,vectorstore in getDocxToText(titleName[0],titleName[1],vector_store_path):
-            words,uuids,vectorstore=getDocxToText(filename,titleName,vector_store_path)
+                if isinstance(words, str):
                    yield words+uuids+vectorstore
        except Exception as e:
-            yield f"文档相似性检查----文档内容获取失败，未找到**{titleName}**相关内容或文档打开失败"
+            yield f"文档相似性检查----文档内容获取失败，未找到**{titleName}**相关内容或文件无法正常打开。可以尝试用WORD或WPS打开文件，进行修复并另存，用另存的文件再做一次尝试。"
            userLog.warning(e)
            userLog.warning(f"文档相似性检查----文档内容获取失败，未找到**{titleName}**相关内容或文档打开失败")
            outLog.mark_done(user_id, "checkRepeatText")
@ -241,7 +296,7 @@ def checkRepeatText(filename,user_id):
        count = 0
        for i in words:
            count += 1
-            yield f"文档相似性检查--对{titleName}章节，进行文档内容检查中{count}/{len(words)}"
+            yield f"文档相似性检查--对{titleName[0]}章节，进行文档内容检查中{count}/{len(words)}"
            result = vectorstore.similarity_search(i)
            textTag = i.split("：")[0]
            for content in result:
@ -259,6 +314,7 @@ def checkRepeatText(filename,user_id):
                    }
                    r = requests.post(url=url, headers=headers, data=json.dumps(data))
                    res = json.loads(r.text)
                    res=res["data"]
                    # res = similarity([[i[i.find('：') + 1:], text[text.find('：') + 1:]]])
                except Exception as e:
                    userLog.warning("文档相似性检查--发生异常:")
@ -266,7 +322,7 @@ def checkRepeatText(filename,user_id):
                    userLog.warning(i)
                    userLog.warning(text)
                    continue
-                if (res["result"][0]["similarity"] > 0.90):
+                if (res[0]["similarity"] >= 0.96):
                    # 判断重复内容是否被放入
                    if (len(reslist) > 0):
                        isExist = False
@ -276,15 +332,15 @@ def checkRepeatText(filename,user_id):
                                break
                        if not isExist:
                            # reslist.append({"yuanwen1":i[i.find('：') + 1:],"yuanwen2":text[text.find('：') + 1:],"similarity":res[0]["similarity"]})
-                            userLog.info("【在"+i[:i.find('：')].replace("\n","")+"下包含："+i[i.find('：') + 1:].replace("\n","")+"<br>在"+text[:text.find('：')].replace("\n","")+"**下包含："+text[text.find('：') + 1:].replace("\n","")+"<br>以上两段内容相似度："+'{:.2f}'.format(res["result"][0]["similarity"])+"】")
+                            userLog.info("【在"+i[:i.find('：')].replace("\n","")+"下包含："+i[i.find('：') + 1:].replace("\n","")+"<br>在"+text[:text.find('：')].replace("\n","")+"**下包含："+text[text.find('：') + 1:].replace("\n","")+"<br>以上两段内容相似度："+'{:.2f}'.format(res[0]["similarity"])+"】")
-                            reslist.append({"yuanwen1":i.replace("\n",""),"yuanwen2":text.replace("\n",""),"similarity":res["result"][0]["similarity"]})
+                            reslist.append({"yuanwen1":i.replace("\n",""),"yuanwen2":text.replace("\n",""),"similarity":res[0]["similarity"]})
                    else:
-                        reslist.append({"yuanwen1":i.replace("\n",""),"yuanwen2":text.replace("\n",""),"similarity":res["result"][0]["similarity"]})
+                        reslist.append({"yuanwen1":i.replace("\n",""),"yuanwen2":text.replace("\n",""),"similarity":res[0]["similarity"]})
                        # print(i.split("：")[1] + "\n" + text.split("：")[1])
-                        userLog.info("【在"+i[:i.find('：')].replace("\n","")+"下包含："+i[i.find('：') + 1:].replace("\n","")+"<br>在"+text[:text.find('：')].replace("\n","")+"**下包含："+text[text.find('：') + 1:].replace("\n","")+"<br>以上两段内容相似度："+'{:.2f}'.format(res["result"][0]["similarity"])+"】")
+                        userLog.info("【在"+i[:i.find('：')].replace("\n","")+"下包含："+i[i.find('：') + 1:].replace("\n","")+"<br>在"+text[:text.find('：')].replace("\n","")+"**下包含："+text[text.find('：') + 1:].replace("\n","")+"<br>以上两段内容相似度："+'{:.2f}'.format(res[0]["similarity"])+"】")
        # vectorstore.delete(ids=uuids)
        shutil.rmtree(vector_store_path)
-        resInfo=f"对{titleName}章节，发现相似内容：<br>"
+        resInfo=f"对{titleName[0]}章节，发现相似内容：<br>"
        if(len(reslist)>0):
            for res in reslist:
                resInfo+="【在**"+res["yuanwen1"][:res["yuanwen1"].find('：')]+"**下包含："+res["yuanwen1"][res["yuanwen1"].find('：') + 1:]+"<br>在**"+res["yuanwen2"][:res["yuanwen2"].find('：')]+"**下包含："+res["yuanwen2"][res["yuanwen2"].find('：') + 1:]+"<br>以上两段内容***相似度***："+'{:.2f}'.format(res['similarity'])+"】<br>"
--- a/checkTitleName.py
+++ b/checkTitleName.py
@ -8,7 +8,9 @@ import json_repair
 import math
 from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
 from docx.opc.oxml import parse_xml
-from myLogger import outLog
+
 # from myLogger import outLog
 def load_from_xml_v2(baseURI, rels_item_xml):
    """
@ -29,11 +31,11 @@ def load_from_xml_v2(baseURI, rels_item_xml):
 _SerializedRelationships.load_from_xml = load_from_xml_v2
 import logging
-outLog.logger = logging.getLogger("checkTitleName")
+# outLog.logger = logging.getLogger("checkTitleName")
-userLog=None
+userLog = None
 llm_cfg = {
-    #'model': 'qwen1.5-72b-chat',
+    # 'model': 'qwen1.5-72b-chat',
-    'model':"qwen2-72b-instruct",
+    'model': "qwen2-72b-instruct",
    'model_server': 'DashScope',  # base_url, also known as api_base
    'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
 }
@ -81,12 +83,13 @@ def isTitle(paragraph):
    # 如果在段落、样式里都没有找到大纲级别，返回None
    return None
-#获取文档中 详细设计方案 章节的所有内容
+
 # 获取文档中 详细设计方案 章节的所有内容
 def getDocxToTitleName(docxPath):
    loopCount = 0
    while True:
-        loopCount+=1
+        loopCount += 1
-        if(loopCount>=15):
+        if (loopCount >= 60):
            raise Exception("文档读取超时，或文档存在问题无法读取")
            break
        try:
@ -96,64 +99,72 @@ def getDocxToTitleName(docxPath):
            time.sleep(1)
            pass
    # 逐段读取docx文档的内容
-    levelList=[]
+    levelList = []
-    words=[]
+    words = []
    addStart = False
-    levelText=""
+    levelText = ""
-    i = 0
+    count = 0
    total = len(document.paragraphs)
    yield f"文档结构检查----文档内容解析中{str(count)}/{str(total)}"
    for paragraph in document.paragraphs:
        count += 1
        yield f"文档结构检查----文档内容解析中{str(count)}/{str(total)}"
        # 判断该段落的标题级别
        # 这里用isTitle()临时代表，具体见下文介绍的方法
        text = paragraph.text
-        if text.strip():#非空判断
+        if text.strip():  # 非空判断
            level = isTitle(paragraph)
-            if level=="0":
+            if level == "0":
                words.append(text)
-    return words
+    yield words
-def checkTitleName(filename,user_id):
+
 def checkTitleName(filename, user_id, outLog):
    global userLog
-    userLog=outLog.get_queue(user_id,"checkTitleName")
+    userLog = outLog.get_queue(user_id, "checkTitleName")
    yield '文档结构检查----启动中'
-    userLog.info("checkTitleName----启动中")
+    userLog.info("文档结构检查---任务开始")
-    with open("ce模板.txt", "r",encoding='utf-8') as f:
+    with open("ce模板.txt", "r", encoding='utf-8') as f:
        gettext = f.readlines()
-    count=0
+    count = 0
    reserr = []
    try:
-        word = getDocxToTitleName(filename)
+        for i in getDocxToTitleName(filename):
            word = i
            if (isinstance(word, str)):
                yield word
                continue
    except Exception as e:
        userLog.warning(e)
-        yield "文档结构检查----文档无法打开，请检查文档内容"
+        yield "文档结构检查----文件无法正常打开。可以尝试用WORD或WPS打开文件，进行修复并另存，用另存的文件再做一次尝试。"
        outLog.mark_done(user_id, "checkTitleName")
        userLog.warning("checkTitleName----文档无法打开，请检查文档内容")
        outLog.mark_done(user_id, "checkTitleName")
        return
    for text in gettext:
-        count+=1
+        count += 1
        prompt = f'''
        \n 这些是文章的标题，请问【{text}】在标题中是否可以配对的，若有请指出是哪个标题，若没有请回到不存在
        '''
-        xushang="回答格式{‘name’:‘名称’,'answer'：‘回答’，“标题”：“标题”}请严格按照格式回答问题，不要做过多我解释"
+        xushang = "回答格式{‘name’:‘名称’,'answer'：‘回答’，“标题”：“标题”}请严格按照格式回答问题，不要做过多我解释"
        yield f"文档结构检查----结构分析中{count}/{len(gettext)}"
-        userLog.info(f"checkTitleName----结构分析中{count}/{len(gettext)}")
+        strword = "\n".join(word) + prompt + xushang
-        strword = "\n".join(word)+prompt+xushang
+        messages = [{'role': 'user', 'content': [{'text': strword}]}]
        messages = [{'role': 'user', 'content': [{'text':strword}]}]
        runList = []
        for rsp in bot.run(messages):
            runList.append(rsp)
            # print(rsp)
        data = runList[len(runList) - 1][0]["content"]
        parsed_data = json_repair.loads(data.replace('`', ''))
-        if(parsed_data["answer"]=="不存在"):
+        if (parsed_data["answer"] == "不存在"):
            reserr.append(text)
-
+            userLog.info("文档结构检查----文档结构存在异常：" + text.replace('\n', ''))
-    resInfo="文档结构存在异常：<br>"
+    resInfo = "文档结构存在异常：<br>"
-    if(len(reserr)>0):
+    if (len(reserr) > 0):
        for i in reserr:
-            resInfo+="**"+i.replace('\n','')+"**<br>"
+            resInfo += "**" + i.replace('\n', '') + "**<br>"
-            userLog.info(resInfo)
+
        yield resInfo
    else:
-        yield "文档结构未发现异常"
+        yield "**文档结构未发现异常**"
-        userLog.info("文档结构未发现异常")
+        userLog.info("文档结构检查----文档结构未发现异常")
    outLog.mark_done(user_id, "checkTitleName")
--- a/daijian方案.py
+++ b/daijian方案.py
@ -1,11 +1,24 @@
-from docx import Document
+import uuid
-from pprint import pprint
+from langchain_community.embeddings import DashScopeEmbeddings
 from langchain_community.document_loaders import TextLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from qwen_agent.agents import Assistant
 import re
 import json_repair
-import math
+import json
 embeddings = DashScopeEmbeddings(dashscope_api_key="sk-ea89cf04431645b185990b8af8c9bb13")
 device_id=0
 import re
 import time
 from docx import Document
 import shutil
 from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
 from docx.opc.oxml import parse_xml
 import logging
 import logging.config
 import requests
 from collections import defaultdict
 userLog=None
 def load_from_xml_v2(baseURI, rels_item_xml):
    """
    Return |_SerializedRelationships| instance loaded with the
@ -23,17 +36,6 @@ def load_from_xml_v2(baseURI, rels_item_xml):
 _SerializedRelationships.load_from_xml = load_from_xml_v2
 llm_cfg = {
    #'model': 'qwen1.5-72b-chat',
    'model':"qwen2-72b-instruct",
    'model_server': 'DashScope',  # base_url, also known as api_base
    'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
 }
 bot = Assistant(llm=llm_cfg,
                name='Assistant',
                )
 # 记录程序开始的时间戳
 def getOutlineLevel(inputXml):
    """
@ -73,15 +75,26 @@ def isTitle(paragraph):
    # 如果在段落、样式里都没有找到大纲级别，返回None
    return None
-#获取文档中 详细设计方案 章节的所有内容
+#寻找标题名称
-def getDocxToTitleName(docxPath):
+def findTitleName(docxPath):
    yield '文档相似性检查----检查是否存在详细设计方案'
    loopCount = 0
    while True:
        loopCount+=1
        if(loopCount>=15):
            raise Exception("文档读取超时，或文档存在问题无法读取")
            break
        try:
            document = Document(docxPath)
            break
        except Exception as e:
            time.sleep(1)
            pass
    # 逐段读取docx文档的内容
-    levelList=[]
+    titleWords=[]
-    words=[]
+    firstTitle = 0
-    addStart = False
+    secondTitle = 0
-    levelText=""
+    sanjiTitle = 0
    i = 0
    for paragraph in document.paragraphs:
        # 判断该段落的标题级别
        # 这里用isTitle()临时代表，具体见下文介绍的方法
@ -89,88 +102,360 @@ def getDocxToTitleName(docxPath):
        if text.strip():#非空判断
            level = isTitle(paragraph)
            if level=="0":
-                words.append(text)
+                firstTitle+=1
-    return words
+                secondTitle = 0
-
+                if(text.find("附件")>=0):
-def checkTitleName(filename):
+                    continue
-    prompt = f'''
+                titleWords.append("一级标题:".format(firstTitle)+text)
-            \n 这些是文章的标题，请问【{text}】在标题中是否可以配对的，若有请指出是哪个标题，若没有请回到不存在
+            elif level=="1":
                secondTitle+=1
                sanjiTitle=0
                # words.append("\t"+"{}.{}".format(firstTitle,secondTitle)+text)
                # titleWords.append("第{}章的二级标题:".format(firstTitle,firstTitle,secondTitle)+text)
            elif level=="2":
                sanjiTitle += 1
                # words.append("\t"+"{}.{}".format(firstTitle,secondTitle)+text)
                # titleWords.append("第{}章的三级标题".format(firstTitle, secondTitle,firstTitle, secondTitle,sanjiTitle) + text)
    findTitleName_llm_cfg = {
    #'model': 'qwen1.5-72b-chat',
    'model':"qwen2-72b",
    'model_server': 'http://127.0.0.1:1025/v1',  # base_url, also known as api_base
    # 'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
    }
    findTitleName_bot = Assistant(llm=findTitleName_llm_cfg,
                                    name='Assistant',
                                    # system_message='1：这样的是一级标题。1.1：这样的是二级标题。1.1.1：这样的是三级标题'
                                )
    prompt='''\n是文档的大纲，一级标题组成，哪一章存在与方案相关的内容
    类似详细设计方案,详细服务方案，详细建设方案为最相关的，优先选择
    类似设计方案，服务方案，建设方案为次相关，次级选择
    类似方案是最后选择
    按照这样的顺序选择最合适的
    你只能从这两个答案中选择一个：{"name":"一级标题名称","answer":"存在"}或{"name":"","answer":"不存在"}，不做过多的解释,严格按回答格式作答
    '''
-    xushang = "回答格式{‘name’:‘名称’,'answer'：‘回答’，“标题”：“标题”}请严格按照格式回答问题，不要做过多我解释"
+    # print("\n".join(titleWords)+prompt)
-    yield f"文档结构检查----结构分析中{count}/{len(gettext)}"
+    messages = [({'role': 'user', 'content': "\n".join(titleWords)+prompt})]
-    strword = "\n".join(word) + prompt + xushang
+    runList=[]
-    # print(strword)
+    for rsp in findTitleName_bot.run(messages):
    messages = [{'role': 'user', 'content': [{'text': strword}]}]
    runList = []
    cishu = 0
    for rsp in bot.run(messages):
        runList.append(rsp)
        # print(rsp)
    data = runList[len(runList) - 1][0]["content"]
    parsed_data = json_repair.loads(data.replace('`', ''))
-    print(parsed_data)
+    if(parsed_data["answer"]=="存在"):
-    # yield '文档结构检查----启动中'
+        yield parsed_data["name"]
-    # with open("ce模板.txt", "r",encoding='utf-8') as f:
+    else:
-    #     gettext = f.readlines()
+        yield "文档相似性检查----未找到与详细设计方案相关内容，无法进行相似性比较"
    # count=0
    # reserr = []
    # try:
    #     word = getDocxToTitleName(filename)
    # except Exception as e:
    #     print(e)
    #     yield "文档无法打开，请检查文档内容"
    #     return
    # for text in gettext:
    #     count+=1
    #     prompt = f'''
    #     \n 这些是文章的标题，请问【{text}】在标题中是否可以配对的，若有请指出是哪个标题，若没有请回到不存在
    #     '''
    #     xushang="回答格式{‘name’:‘名称’,'answer'：‘回答’，“标题”：“标题”}请严格按照格式回答问题，不要做过多我解释"
    #     yield f"文档结构检查----结构分析中{count}/{len(gettext)}"
    #     strword = "\n".join(word)+prompt+xushang
    #     # print(strword)
    #     messages = [{'role': 'user', 'content': [{'text':strword}]}]
    #     runList = []
    #     cishu = 0
    #     for rsp in bot.run(messages):
    #         runList.append(rsp)
    #         # print(rsp)
    #     data = runList[len(runList) - 1][0]["content"]
    #     parsed_data = json_repair.loads(data.replace('`', ''))
    #     print(parsed_data)
    #     if(parsed_data["answer"]=="不存在"):
    #         reserr.append(text)
    # resInfo="文档结构存在异常：<br>"
    # if(len(reserr)>0):
    #     for i in reserr:
    #         resInfo+=f"**{i}**<br>"
    #     yield resInfo
    # else:
    #     yield "文档结构未发现异常"
 def merge_chapters(words):
    merged_text = {}
    for line in words:
        if "：" in line:
            key, value = line.split("：", 1)  # 根据第一个冒号分割
            if key in merged_text:
                merged_text[key].append(value.strip())  # 添加到列表
            else:
                merged_text[key] = [value.strip()]  # 初始化列表
        else:
            logging.warning(f"Skipping line without key-value pair: {line}")
-import logging
+    # 合并结果格式化为列表输出
    merged_words = []
    for key, values in merged_text.items():
        combined_value = "，".join(values)  # 将内容合并
        merged_words.append(f"{key}：{combined_value}")
    return merged_words
 #获取文档中 详细设计方案 章节的所有内容
 def getDocxToText(docxPath, titleName, vector_store_path):
    loopCount = 0
    while True:
        loopCount += 1
        if loopCount >= 15:
            raise Exception("文档读取超时，或文档存在问题无法读取")
            break
        try:
            document = Document(docxPath)
            break
        except Exception as e:
            time.sleep(1)
            pass
    # 逐段读取docx文档的内容
    levelList = []
    words = []
    addStart = False
    title_counter = []  # 用于存储当前标题的计数
    title_texts = []    # 用于存储当前各级标题的文本
    i = 0
    for paragraph in document.paragraphs:
        text = paragraph.text.strip()
        if text:  # 非空判断
            level = isTitle(paragraph)  # 确保这个函数在代码中定义
            # 当前标题的层级
            current_level = int(level) if level is not None else -1
            if current_level >= 0:  # 标题段落
                # 确保标题计数器足够长
                while len(title_counter) <= current_level:
                    title_counter.append(0)  # 初始化新级别的标题计数
                    title_texts.append('')   # 初始化对应的标题文本
                # 更新当前级别及以下的标题计数和标题文本
                title_counter[current_level] += 1  # 当前级别计数加1
                title_counter = title_counter[:current_level+1]
                title_texts[current_level] = text  # 保存当前级别的标题文本
                title_texts = title_texts[:current_level+1]
                # 重置更低级别的计数和标题文本
                for idx in range(current_level + 1, len(title_counter)):
                    title_counter[idx] = 0
                    title_texts[idx] = ''
                # 检查是否与 titleName 匹配
                if current_level == 0:
                    addStart = titleName in text  # 检查是否与 titleName 匹配
            else:  # 非标题段落
                if addStart:
                    if len(text) > 30:  # 仅记录长度大于30的内容
                        i += 1
                        # 获取当前完整的标题编号和标题名称
                        levelText = ".".join(map(str, title_counter))
                        # 使用非空的标题名称
                        current_title = title_texts[-1] if title_texts else ''
                        words.append(f"{levelText}-{current_title}：{text}")
    if len(words) == 0:
        raise Exception("checkRepeatText，获取长度为0")
    # 使用封装的合并函数
    merged_words = merge_chapters(words)
    # 将合并后的内容写入 txt 文件
    with open("checkRepeatText.txt", 'w') as txt_file:
        for line in merged_words:
            txt_file.write(f"{line}\n")
    time.sleep(3)
    # 加载文本
    loader = TextLoader(file_path='checkRepeatText.txt')
    docs = loader.load()
    # 创建唯一标识符
    uuids = []
    for _ in range(len(merged_words)):
        uuids.append(str(uuid.uuid4()))
    logging.info(f"checkRepeatTextuuidLen{len(uuids)}")
    return merged_words, uuids
 # @app.route('/checkRepeatText/<filename>', methods=['GET'])
 def checkRepeatText(filename):
    yield "文档相似性检查---启动中...."
    vector_store_path="vector_store"+str(uuid.uuid4())
    for titleName in findTitleName(filename):
        yield titleName
    if(titleName!="文档相似性检查----未找到与详细设计方案相关内容，无法进行相似性比较"):
        yield "文档相似性检查----文档内容解析中"
        words,uuids=getDocxToText(filename,titleName,vector_store_path)
    # 记录程序开始的时间戳‘
        reslist = []
        count = 0
        standard = {
            "清晰性": """对软件功能描述的完整性主要体现在以下两个方面：
                        a. 功能描述是否简洁明了，避免使用过于复杂或专业的术语，使得用户能够轻松理解。
                        b. 是否明确指出了功能的具体作用，没有模糊不清或含糊其辞的表述。
                        如果要将软件功能描述的清晰性划分为优秀、良好、一般、差四个从高到低的等级，每个等级的评判标准是什么？
                        将软件功能描述的清晰性划分为优秀、良好、一般、差四个等级时，每个等级的评判标准可以如下定义：
                        优秀（90~100分）
                        简洁明了：功能描述极其精炼，没有多余的词汇，每个字都承载着必要的信息。
                        通俗易懂：完全避免了专业术语或行业黑话，即使是非专业用户也能轻松理解。
                        具体明确：功能的作用、范围、限制以及用户期望的结果都被清晰、准确地阐述，没有任何模糊或含糊的表述。
                        良好（70分~90分，不包含90分）
                        较为简洁：功能描述相对简短，但可能包含一些必要的细节或背景信息。
                        易于理解：大部分术语都是通俗易懂的，对于少数专业术语，提供了简短的解释或上下文。
                        明确具体：功能的主要作用、范围和用户期望的结果都被明确阐述，但可能在某些细节上稍显模糊。
                        一般（60~70分，不包含70分）
                        稍显冗长：功能描述可能包含一些不必要的细节或重复信息，导致用户需要花费更多时间来理解。
                        有一定难度：使用了一些专业术语或行业黑话，但没有提供足够的解释或上下文，导致非专业用户可能难以理解。
                        基本明确：功能的主要作用被阐述，但在范围、限制或用户期望的结果上可能存在一些模糊或含糊的表述。
                        差（60分以下，不包含60分）
                        冗长复杂：功能描述过于详细和复杂，包含大量不必要的细节和背景信息，导致用户难以抓住重点。
                        难以理解：大量使用专业术语或行业黑话，且没有提供任何解释或上下文，使得大部分用户都难以理解。
                        模糊不清：功能的作用、范围、限制以及用户期望的结果都没有被明确阐述，存在大量的模糊和含糊表述。
                        评估的提示词举例：
                        根据这些评判标准，对下面的软件功能描述的清晰性进行客观的评价，给出优秀、良好、一般、差四个等级之一的评价，并给出具体得分。并在此基础上润色和完善，使之达到优秀的等级。
                        """,
            "完整性": """对软件功能描述的完整性主要体现在以下两个方面：
                        a. 是否涵盖了功能的所有重要方面，包括输入、输出、处理过程等。
                        b. 是否提供了足够的信息，以便用户能够全面了解功能的工作原理和用途。
                        如果要将软件功能描述的完整性划分为优秀、良好、一般、差四个从高到低的等级，每个等级的评判标准是什么？
                        将软件功能描述的完整性划分为优秀、良好、一般、差四个等级时，每个等级的评判标准可以如下定义：
                        优秀：（90~100分）
                        描述全面涵盖了功能的所有重要方面，包括但不限于输入、输出、处理过程、异常处理等。
                        提供了详尽的信息，用户能够清晰地了解功能的工作原理、用途以及在不同场景下的表现。
                        包含了必要的示例、图表或流程图，以直观展示功能的工作流程和效果。
                        没有遗漏任何对用户理解和使用功能至关重要的信息。
                        良好：（70分~90分，不包含90分）
                        描述基本涵盖了功能的主要方面，但可能有个别不太重要的细节未提及。
                        提供了足够的信息，用户能够较好地理解功能的工作原理和用途，但在某些复杂场景下可能需要额外说明。
                        可能包含一些示例或图表，但可能不如优秀等级那么全面或详细。
                        一般：（60~70分，不包含70分）
                        描述涵盖了功能的一部分重要方面，但存在较明显的遗漏或不足。
                        提供的信息有限，用户可能只能对功能有一个大致的了解，无法深入了解其工作原理和详细用途。
                        可能缺乏示例、图表或流程图等辅助材料，导致用户难以理解功能的某些复杂部分。
                        差：（60分以下，不包含60分）
                        描述严重缺失，未涵盖功能的关键方面，甚至可能误导用户。
                        提供的信息极少，用户无法全面了解功能的工作原理和用途。
                        可能存在错误或矛盾的信息，导致用户无法准确理解功能。
                        根据这些评判标准，对下面的软件功能描述的完整性进行客观的评价，给出优秀、良好、一般、差四个等级之一的评价。并在此基础上润色和完善，使之达到优秀的等级。
                        """,
            "可测试性": """软件功能描述的可测试性主要体现为以下方面：
                        a. 功能描述是否具体、明确，以便能够进行功能测试和验证。
                        b. 是否提供了足够的细节，以便开发人员和测试人员能够准确理解和实现功能。
                        如果要将软件功能描述的可测试性划分为优秀、良好、一般、差四个从高到低的等级，每个等级的评判标准是什么？
                        将软件功能描述的可测试性划分为优秀、良好、一般、差四个等级时，每个等级的评判标准可以如下定义：
                        优秀：（90~100分）
                        功能描述非常具体和明确，能够直接转化为测试用例。
                        提供了详尽的细节，包括输入、输出、边界条件、异常处理等。
                        开发人员和测试人员能够轻松理解和实现功能，无需额外澄清或假设。
                        功能描述中包含了预期的行为和非预期的行为，有助于全面覆盖测试场景。
                        良好：（70分~90分，不包含90分）
                        功能描述相对具体和明确，大部分内容可以直接用于测试。
                        提供了足够的细节，但可能需要一些额外的解释或澄清才能完全理解。
                        开发人员和测试人员能够基于描述实现和测试功能，但可能需要一些额外的沟通和协调。
                        功能描述中基本涵盖了主要的行为和边界条件，但可能缺少对某些异常情况的详细描述。
                        一般：（60~70分，不包含70分）
                        功能描述较为笼统，需要较多的解释和澄清才能用于测试和开发。
                        细节不够充分，可能导致开发人员和测试人员在实现和测试过程中产生误解或遗漏。
                        需要较多的沟通和协调来确保功能的正确实现和测试。
                        功能描述中可能只涵盖了主要的行为，对边界条件和异常情况的描述较为模糊或缺失。
                        差：（60分以下，不包含60分）
                        功能描述非常模糊和笼统，无法直接用于测试和开发。
                        缺乏必要的细节，导致开发人员和测试人员无法准确理解和实现功能。
                        需要大量的沟通和协调，甚至可能需要重新编写功能描述才能进行有效的测试和开发。
                        功能描述中可能只提到了大致的目标或意图，没有具体的行为描述、边界条件或异常处理。
                        根据这些评判标准，对下面的软件功能描述的可测试性进行客观的评价，给出优秀、良好、一般、差四个等级之一的评价。并在此基础上润色和完善，使之达到优秀的等级。
                        """,
            "详细性": """软件功能详细性主要体现在：
                        a. 功能描述是否详细，可以根据功能描述进行功能点评价，计算出ILF、EIF、EI、EO、EQ的数量；
                        如果要将软件功能描述的详细性划分为优秀、良好、一般、差四个从高到低的等级，每个等级的评判标准是什么？
                        将软件功能描述的详细性划分为优秀、良好、一般、差四个等级时，每个等级的评判标准可以如下定义：
                        优秀：（90~100分）
                        功能描述非常详尽，包含了所有必要的信息，使得评估者能够轻松地根据描述进行功能点评价。
                        ILF、EIF、EI、EO、EQ的数量可以明确且无误地计算出来，没有遗漏或模糊之处。
                        描述中不仅包含了功能的正常操作，还涵盖了异常处理、边界条件等特殊情况。
                        使用了具体的例子、流程图或伪代码来进一步阐明功能。
                        良好：（70分~90分，不包含90分）
                        功能描述相对详细，提供了足够的信息来进行功能点评价。
                        ILF、EIF、EI、EO、EQ的数量可以大致计算出来，但可能需要一些额外的解释或澄清。
                        描述中基本涵盖了功能的各个方面，但对某些细节或特殊情况可能描述不够充分。
                        整体而言，描述是清晰和准确的，但还有改进的空间。
                        一般：（60~70分，不包含70分）
                        功能描述较为笼统，缺乏具体的细节。
                        ILF、EIF、EI、EO、EQ的数量计算可能存在一定的困难或不确定性，需要较多的假设或推测。
                        描述中只涵盖了功能的主要方面，对细节和特殊情况的处理描述不足。
                        可能需要额外的沟通或澄清才能准确理解功能需求。
                        差：（60分以下，不包含60分）
                        功能描述非常模糊，缺乏必要的信息和细节。
                        无法根据描述进行准确的功能点评价，ILF、EIF、EI、EO、EQ的数量无法确定。
                        描述中可能只提到了功能的大致目标或意图，没有具体的实现细节或操作步骤。
                        需要大量的额外信息或澄清才能理解功能需求，甚至可能需要重新编写功能描述。
                        根据这些评判标准，对下面的软件功能描述的详细性进行客观的评价，给出优秀、良好、一般、差四个等级之一的评价。并在此基础上润色和完善，使之达到优秀的等级。
                        """,
        }
        weight = {
            "清晰性" : 0.4,
            "完整性" : 0.3,
            "可测试性" : 0.2,
            "详细性" : 0.1,
        }
        findTitleName_llm_cfg = {
            'model': "qwen2-72b",
            'model_server': 'http://127.0.0.1:1025/v1',
        }
        findTitleName_bot = Assistant(llm=findTitleName_llm_cfg, name='Assistant')
        for i in words:
            count += 1
            yield f"文档相似性检查--对{titleName}章节，进行文档内容检查中{count}/{len(words)}"
            chapter, rest = i.split('-', 1)
            title, text = rest.split('：', 1)
            # 生成字典
            example = {
                "chapter": chapter.strip(),
                "title": title.strip(),
                "text": text.strip()
            }
            result = {
                "title": title.strip(),
                "text": text.strip()
            }
            # 循环提取键和值
            weighted_score = 0
            for key, value in standard.items():
                prompt_score = f"""对软件功能{key}的定义：
                                    {value}
                                    模块名称：【{example['title']}】
                                    模块描述：【{example['text']}】
                                    回答格式为：{{"模块名称"："{example['text']}",
                                                "等级":"优秀/良好/一般/差",
                                                "得分":"0~100",
                                                "理由及扣分原因":"理由及扣分原因",
                                                }}，不做过多的解释,严格按回答格式作答,只给出一个回答。
                                    """
                messages = [({'role': 'user', 'content': prompt_score})]
                runList = []
                for rsp in findTitleName_bot.run(messages):
                    runList.append(rsp)
                data = runList[len(runList) - 1][0]["content"]
                parsed_data = json_repair.loads(data.replace('`', ''))
                if isinstance(parsed_data, list):  # 检查parsed_data是否为列表
                    parsed_data = parsed_data[0]  # 取第一个元素
                else:
                    parsed_data = parsed_data
                result[f"{key}等级"] = parsed_data['等级']
                result[f"{key}得分"] = parsed_data['得分']
                score = int(parsed_data['得分'])  # 假设 '得分' 是字符串，需要转换为整数
                key_weight = weight.get(key, 0)  # 根据键获取权重，如果没有匹配的权重，默认为 0
                # 计算加权得分并累加
                weighted_score += score * key_weight
            result["加权得分"] = round(weighted_score, 2)  # 保留两位小数
            answer = f"{example['text']}"
            for key, value in standard.items():
                prompt_answer = f"""对软件功能{key}的定义：\n 
                            {value}\n
                            模块名称：【{example['title']}】\n
                            模块描述：f【{answer}】\n
                            回答格式为：{{"模块名称"："{example['text']}",
                                        "改进后的描述":"改进后的描述",
                                        }}，不做过多的解释,严格按回答格式作答。
                            """
                messages = [({'role': 'user', 'content': prompt_answer})]
                runList = []
                for rsp in findTitleName_bot.run(messages):
                    runList.append(rsp)
                data = runList[len(runList) - 1][0]["content"]
                parsed_data = json_repair.loads(data.replace('`', ''))
                answer = parsed_data['改进后的描述']
            result["改进后的描述"] = answer
            textTag = i.split("：")[0]
            breakpoint()
        # vectorstore.delete(ids=uuids)
        shutil.rmtree(vector_store_path)
        resInfo=f"对{titleName}章节，发现相似内容：<br>"
        if(len(reslist)>0):
            for res in reslist:
                resInfo+="【在**"+res["yuanwen1"][:res["yuanwen1"].find('：')]+"**下包含："+res["yuanwen1"][res["yuanwen1"].find('：') + 1:]+"<br>在**"+res["yuanwen2"][:res["yuanwen2"].find('：')]+"**下包含："+res["yuanwen2"][res["yuanwen2"].find('：') + 1:]+"<br>以上两段内容***相似度***："+'{:.2f}'.format(res['similarity'])+"】<br>"
            yield resInfo
        else:
            yield "**未发现相似内容**"
            userLog.info("文档相似性检查----未发现相似内容**")
-# 创建一个记录器
+for i  in checkRepeatText("./北仑区综合行政执法局协同监管系统项目建设方案_20240824.docx"):
-logger = logging.getLogger('my_logger')
+ print(i)
 logger.setLevel(logging.DEBUG)
 # 创建一个处理器
 ch = logging.StreamHandler()
 ch.setLevel(logging.DEBUG)
 # 创建一个格式化器并将其添加到处理器中
 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 ch.setFormatter(formatter)
 # 将处理器添加到记录器中
 logger.addHandler(ch)
 try:
 # 记录一些日志消息
    logger.debug('这是一个调试消息')
    logger.info('这是一个信息消息')
    logger.warning('这是一个警告消息')
    logger.error('这是一个错误消息')
    logger.critical('这是一个致命错误消息')
 except Exception as e:
    logger.warning(e)
--- a/main.py
+++ b/main.py
@ -1,206 +1,286 @@
-from flask import Flask, request, jsonify, Response
+# from flask import Flask, request, jsonify, Response
 import os
 from checkPlaceName import checkPlaceName
 from checkRepeatText import checkRepeatText
 from checkCompanyName import checkCompanyName
 from checkDocumentError import checkDocumentError
 from checkTitleName import checkTitleName
-from flask_cors import CORS
+# from flask_cors import CORS
 import qwen_agenttext
 from myLogger import outLog
 import time
-app = Flask(__name__)
+# app = Flask(__name__)
-cros = CORS(app)
+# cros = CORS(app)
 import uvicorn
 from fastapi import FastAPI, Request, File, UploadFile, HTTPException
 from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from sse_starlette.sse import EventSourceResponse
 import asyncio
 app = FastAPI()
 # 允许所有来源的跨域请求
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"]
 )
 UPLOAD_FOLDER = 'uploads'
 if not os.path.exists(UPLOAD_FOLDER):
    os.makedirs(UPLOAD_FOLDER)
-@app.route('/upload', methods=['POST'])
+# @app.route('/upload', methods=['POST'])
-def upload_file():
+# def upload_file():
-    if 'file' not in request.files:
+#     if 'file' not in request.files:
-        return jsonify({"error": "No file part"}), 400
+#         return jsonify({"error": "No file part"}), 400
-    file = request.files['file']
+#     file = request.files['file']
-    if file.filename == '':
+#     if file.filename == '':
-        return jsonify({"error": "No selected file"}), 400
+#         return jsonify({"error": "No selected file"}), 400
-    if file:
+#     if file:
-        filename = file.filename
+#         filename = file.filename
-        file.save(os.path.join(UPLOAD_FOLDER, filename))
+#         file.save(os.path.join(UPLOAD_FOLDER, filename))
-        return jsonify({"message": "File uploaded successfully"}), 200
+#         return jsonify({"message": "File uploaded successfully"}), 200
-
+@app.post("/sse/upload")
-
+async def upload_file(file: UploadFile = File(...)):
-@app.route('/stream', methods=["GET", "POST"])
+    if not file.filename:
-def stream_numbers():
+        raise HTTPException(status_code=400, detail="No selected file")
-    context = request.args.get('context')
+
-    # def generate_numbers():
+    # 保存文件
-    #     event_id=0
+    try:
-    #     for number in range(1, 10):
+        file_location = os.path.join(UPLOAD_FOLDER, file.filename)
-    #         json_data = json.dumps({"number": number})
+        with open(file_location, "wb") as f:
-    #         print(json_data)
+            content = await file.read()
-    #         event_id += 1
+            f.write(content)
-    #         yield f"id: {event_id}\n"
+        return JSONResponse(content={"message": "文件上传成功"}, status_code=200)
-    #         yield f"event: time-update\n"
+    except Exception as e:
-    #         yield f"data: {json_data}\n\n"  # 每次生成一个数字就发送
+        raise HTTPException(status_code=500, detail="文件上传失败，错误信息：" + str(e))
-    #         time.sleep(0.5)  # 为了演示，加入短暂延迟
+
-    #     json_data = json.dumps({"number": "done"})
+
-    #     yield f"id: {1}\n"
+@app.get("/sse")
-    #     yield f"event: time-update\n"
+async def root(request: Request):
-    #     yield f"data: {json_data}\n\n"  # 发送完成信号
+    async def event_generator(request: Request):
-
+        res_str = "七夕情人节即将来临，我们为您准备了精美的鲜花和美味的蛋糕"
-    headers = {
+        for i in res_str:
-        "Content-Type": "text/event-stream",
+            if await request.is_disconnected():
-        "Cache-Control": "no-cache",
+                print("连接已中断")
-        "X-Accel-Buffering": "no",
+                break
-        "Access-Control-Allow-Origin": "*",
+            yield {
-        "Access-Control-Allow-Methods": "GET,POST",
+                "event": "message",
-        "Access-Control-Allow-Headers": "x-requested-with,content-type",
+                "id": "7",
                "data": f"{i}"
            }
-    return Response(qwen_agenttext.getxinx(context), headers=headers)
+
            await asyncio.sleep(0.1)
    g = event_generator(request)
    return EventSourceResponse(g)
-@app.route('/sse/checkRepeatText', methods=['GET'])
+# def stream_numbers():
-def checkRepeatTextWeb():
+#     context = request.args.get('context')
-    filename = request.args.get('filename')
+#     # def generate_numbers():
-    userId = request.args.get("userId")
+#     #     event_id=0
 #     #     for number in range(1, 10):
 #     #         json_data = json.dumps({"number": number})
 #     #         print(json_data)
 #     #         event_id += 1
 #     #         yield f"id: {event_id}\n"
 #     #         yield f"event: time-update\n"
 #     #         yield f"data: {json_data}\n\n"  # 每次生成一个数字就发送
 #     #         time.sleep(0.5)  # 为了演示，加入短暂延迟
 #     #     json_data = json.dumps({"number": "done"})
 #     #     yield f"id: {1}\n"
 #     #     yield f"event: time-update\n"
 #     #     yield f"data: {json_data}\n\n"  # 发送完成信号
-    def generate_checkRepeatText(filename,userId):
+#     headers = {
 #         "Content-Type": "text/event-stream",
 #         "Cache-Control": "no-cache",
 #         "X-Accel-Buffering": "no",
 #         "Access-Control-Allow-Origin": "*",
 #         "Access-Control-Allow-Methods": "GET,POST",
 #         "Access-Control-Allow-Headers": "x-requested-with,content-type",
 #     }
 #     return Response(qwen_agenttext.getxinx(context), headers=headers)
@app.get("/sse/checkRepeatText")
 async def checkRepeatTextWeb(filename, userId, request: Request):
    async def generate_checkRepeatText(filename, userId, request: Request):
        global outLog
        id = 0
-        for i in checkRepeatText(filename,userId):
+        for i in checkRepeatText(filename, userId, outLog):
-            yield f"id: {id + 1}\n"
+            id += 1
-            yield f"event: checkRepeatText\n"
+            if await request.is_disconnected():
-            yield f"data: {i}\n\n"  # 发送完成信号
+                yield {
-        # except Exception as e:
+                    "id": f"{id}",
-
+                    "event": "checkRepeatText",
-        #     yield f"id: {id+1}\n"
+                    "data": "checkRepeatText连接已中断"
-        #     yield f"event: checkRepeatText\n"
+                }
-        #     yield f"data: **程序出现异常**\n\n"  # 发送完成信号
+                break
-
+            yield {
-    headers = {
+                "id": f"{id}",
-        "Content-Type": "text/event-stream",
+                "event": "checkRepeatText",
-        "Cache-Control": "no-cache",
+                "data": i
        "X-Accel-Buffering": "no",
        "Access-Control-Allow-Origin": "*",
        "Access-Control-Allow-Methods": "GET,POST",
        "Access-Control-Allow-Headers": "x-requested-with,content-type",
            }
-    return Response(generate_checkRepeatText(filename,userId), headers=headers)
+
    g = generate_checkRepeatText(filename, userId, request)
    return EventSourceResponse(g)
-@app.route('/sse/checkPlaceName', methods=['GET'])
+@app.get('/sse/checkPlaceName')
-def checkPlaceNameWebSse():
+def checkPlaceNameWebSse(filename, userId, request: Request):
-    filename = request.args.get('filename')
+    async def generate_checkPlaceName(filename, userId, request: Request):
    userId = request.args.get("userId")
    def generate_checkPlaceName(filename,userId):
        id = 0
-        for i in checkPlaceName(filename,userId):
+        global outLog
-            yield f"id: {id + 1}\n"
+        for i in checkPlaceName(filename, userId, outLog):
-            yield f"event: checkPlaceName\n"
+            id += 1
-            yield f"data: {i}\n\n"  # 发送完成信号
+            if await request.is_disconnected():
-
+                yield {
-    headers = {
+                    "id": f"{id}",
-        "Content-Type": "text/event-stream",
+                    "event": "checkPlaceName",
-        "Cache-Control": "no-cache",
+                    "data": "checkPlaceName连接已中断"
-        "X-Accel-Buffering": "no",
+                }
-        "Access-Control-Allow-Origin": "*",
+                break
-        "Access-Control-Allow-Methods": "GET,POST",
+            yield {
-        "Access-Control-Allow-Headers": "x-requested-with,content-type",
+                "id": f"{id}",
                "event": "checkPlaceName",
                "data": i
            }
    return Response(generate_checkPlaceName(filename,userId), headers=headers)
    g = generate_checkPlaceName(filename, userId, request)
    return EventSourceResponse(g)
-@app.route('/sse/checkCompanyName', methods=['GET'])
+
-def checkCompanyNameWebSse():
+@app.get('/sse/checkCompanyName')
-    filename = request.args.get('filename')
+def checkCompanyNameWebSse(filename, userId, request: Request):
-    userId = request.args.get("userId")
+    async def generate_checkCompanyName(filename, userId, request: Request):
    def generate_checkCompanyName(filename,userId):
        id = 0
-        for i in checkCompanyName(filename,userId):
+        global outLog
-            yield f"id: {id + 1}\n"
+        for i in checkCompanyName(filename, userId, outLog):
-            yield f"event: checkCompanyName\n"
+            id += 1
-            yield f"data: {i}\n\n"  # 发送完成信号
+            if await request.is_disconnected():
-
+                yield {
-    headers = {
+                    "id": f"{id}",
-        "Content-Type": "text/event-stream",
+                    "event": "checkCompanyName",
-        "Cache-Control": "no-cache",
+                    "data": "checkCompanyName连接已中断"
-        "X-Accel-Buffering": "no",
+                }
-        "Access-Control-Allow-Origin": "*",
+                break
-        "Access-Control-Allow-Methods": "GET,POST",
+            yield {
-        "Access-Control-Allow-Headers": "x-requested-with,content-type",
+                "id": f"{id}",
                "event": "checkCompanyName",
                "data": i
            }
-    return Response(generate_checkCompanyName(filename,userId), headers=headers)
+
    g = generate_checkCompanyName(filename, userId, request)
    return EventSourceResponse(g)
-@app.route('/sse/checkDocumentErrorWeb', methods=['GET'])
+@app.get('/sse/checkDocumentErrorWeb')
-def checkDocumentErrorWebSse():
+def checkDocumentErrorWebSse(filename, userId, request: Request):
-    filename = request.args.get('filename')
+    async def generate_checkDocumentError(filename, userId, request: Request):
    userId = request.args.get("userId")
    def generate_checkDocumentError(filename,userId):
        id = 0
-        for i in checkDocumentError(filename,userId):
+        global outLog
-            yield f"id: {id + 1}\n"
+        for i in checkDocumentError(filename, userId, outLog):
-            yield f"event: checkDocumentError\n"
+            id += 1
-            yield f"data: {i}\n\n"  # 发送完成信号
+            if await request.is_disconnected():
-
+                yield {
-    headers = {
+                    "id": f"{id}",
-        "Content-Type": "text/event-stream",
+                    "event": "checkDocumentError",
-        "Cache-Control": "no-cache",
+                    "data": "checkDocumentError连接已中断"
-        "X-Accel-Buffering": "no",
+                }
-        "Access-Control-Allow-Origin": "*",
+                break
-        "Access-Control-Allow-Methods": "GET,POST",
+            yield {
-        "Access-Control-Allow-Headers": "x-requested-with,content-type",
+                "id": f"{id}",
                "event": "checkDocumentError",
                "data": i
            }
    return Response(generate_checkDocumentError(filename,userId), headers=headers)
    g = generate_checkDocumentError(filename, userId, request)
    return EventSourceResponse(g)
-@app.route('/sse/checkTitleName', methods=['GET'])
+
-def checkTitleNameWebSse():
+@app.get('/sse/checkTitleName')
-    filename = request.args.get('filename')
+def checkTitleNameWebSse(filename, userId, request: Request):
-    userId = request.args.get("userId")
+    async def generate_checkTitleName(filename, userId, request: Request):
    def generate_checkTitleName(filename,userId):
        id = 0
-        for i in checkTitleName(filename,userId):
+        global outLog
-            yield f"id: {id + 1}\n"
+        for i in checkTitleName(filename, userId, outLog):
-            yield f"event: checkTitleName\n"
+            id += 1
-            yield f"data: {i}\n\n"  # 发送完成信号
+            if await request.is_disconnected():
-
+                yield {
-    headers = {
+                    "id": f"{id}",
-        "Content-Type": "text/event-stream",
+                    "event": "checkTitleName",
-        "Cache-Control": "no-cache",
+                    "data": "checkTitleName连接已中断"
-        "X-Accel-Buffering": "no",
+                }
-        "Access-Control-Allow-Origin": "*",
+                break
-        "Access-Control-Allow-Methods": "GET,POST",
+            yield {
-        "Access-Control-Allow-Headers": "x-requested-with,content-type",
+                "id": f"{id}",
                "event": "checkTitleName",
                "data": i
            }
    return Response(generate_checkTitleName(filename,userId), headers=headers)
-@app.route('/sse/getLog', methods=['GET'])
+    g = generate_checkTitleName(filename, userId, request)
-def getlog():
+    return EventSourceResponse(g)
-    userId = request.args.get("userId")
+
-    def generate_getLog(userId):
+
-        time.sleep(1)
+@app.get("/sse/getLog")
 # @app.route('/sse/getLog', methods=['GET'])
 async def getlog(userId, request: Request):
    # userId = request.args.get("userId")
    async def generate_getLog(userId):
        id = 0
        global outLog
        await asyncio.sleep(5)
        while True:
-            if outLog.is_done(userId):
+            isbreak = outLog.is_done(userId)
            if isbreak:
                break  # 完成了
            text = outLog.get_queueData(userId)
            if await request.is_disconnected():
                yield {
                    "id": f"{id}",
                    "event": "checkTitleName",
                    "data": "checkTitleName连接已中断"
                }
                break
-            q = outLog.get_queueData(userId)
+            if text:
-            if q:
+                id += 1
-                id+=1
+                yield {
-                text = q.pop(0)
+                    "id": id,
-                yield f"id: {id}\n"
+                    "event": "getlog",
-                yield f"event: getlog\n"
+                    "data": text
-                yield f"data: {text}\n\n"  # 发送完成信号
+                }
-        yield f"id: {id}\n"
+                # yield f"id: {id}\n"
-        yield f"event: getlog\n"
+                # yield f"event: getlog\n"
-        yield f"data: 任务结束！！！！！\n\n"  # 发送完成信号
+                # yield f"data: {text}\n\n"  # 发送完成信号
-        outLog.del_queue(userId)
+        # yield f"id: {id}\n"
-    headers = {
+        # yield f"event: getlog\n"
-        "Content-Type": "text/event-stream",
+        # yield f"data: 任务结束！！！！！\n\n"  # 发送完成信号
-        "Cache-Control": "no-cache",
+        yield {
-        "X-Accel-Buffering": "no",
+            "id": id,
-        "Access-Control-Allow-Origin": "*",
+            "event": "getlog",
-        "Access-Control-Allow-Methods": "GET,POST",
+            "data": "任务结束！！！！"
        "Access-Control-Allow-Headers": "x-requested-with,content-type",
        }
-    return Response(generate_getLog(userId), headers=headers)
+        outLog.del_queue(userId)
    # headers = {
    #     "Content-Type": "text/event-stream",
    #     "Cache-Control": "no-cache",
    #     "X-Accel-Buffering": "no",
    #     "Access-Control-Allow-Origin": "*",
    #     "Access-Control-Allow-Methods": "GET,POST",
    #     "Access-Control-Allow-Headers": "x-requested-with,content-type",
    # }
    g = generate_getLog(userId)
    return EventSourceResponse(g)
    # return Response(generate_getLog(userId), headers=headers)
 if __name__ == '__main__':
-    app.run(host="0.0.0.0", port=80)
+    # app.run(host="0.0.0.0", port=80,threaded=True)
    # uvicorn.run(app='main:app', host="0.0.0.0", port=80,workers=1)
    app.run()
--- a/myLogger.py
+++ b/myLogger.py
@ -1,117 +1,8 @@
 # -*- coding: utf-8 -*-
 """
@author:  bingyl123@163.com
@version: 1.0.0
@file:    OutLog.py
@time:    2023/2/23 20:25
 """
 # import logging
 # import logging.config
 # import re
 # import datetime
 # import queue
 #
 #
 # class OutLog:
 #     _instance = None
 #     logger = None
 #
 #     def __new__(cls):
 #         if cls._instance is None:
 #             cls._instance = super(OutLog, cls).__new__(cls)
 #             cls.logger = logging.getLogger("app")  # 默认logger名称为"app"
 #             cls._instance.queue_dict = {}
 #             cls._instance.done_dict = {}
 #         return cls._instance
 #
 #     def get_queue(self, user_id):
 #         if user_id not in self.queue_dict:
 #             self.queue_dict[user_id] = []
 #             self.done_dict[user_id] = {}  # 初始化为未完成的字典
 #         return self.queue_dict[user_id]
 #
 #     def mark_done(self, user_id, producer_name):
 #         self.done_dict[user_id][producer_name] = True
 #
 #     def is_done(self, user_id):
 #         return all(self.done_dict.get(user_id, {}).values())  # 检查所有生产者是否完成
 #     @staticmethod
 #     def put(item: str, level="INFO"):
 #         dtf = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 #         mq.put(f"{dtf}[{level}]: {item}")
 #
 #     @staticmethod
 #     def debug(item, log=True):
 #         OutLog.put(item, level="DEBUG")
 #         if log:
 #             OutLog._instance.logger.debug(item)
 #
 #     @staticmethod
 #     def info(item, log=True):
 #         OutLog.put(item, level="INFO")
 #         if log:
 #             OutLog._instance.logger.info(item)
 #
 #     @staticmethod
 #     def warning(item, log=True):
 #         OutLog.put(item, level="WARNING")
 #         if log:
 #             OutLog._instance.logger.warning(item)
 #
 #     @staticmethod
 #     def error(item, log=True):
 #         OutLog.put(item, level="ERROR")
 #         if log:
 #             OutLog._instance.logger.error(item)
 #
 #     @staticmethod
 #     def critical(item, log=True):
 #         OutLog.put(item, level="CRITICAL")
 #         if log:
 #             OutLog._instance.logger.critical(item)
 #
 #
 #
 # # 日志配置
 # log_config = {
 #     'version': 1,
 #     'disable_existing_loggers': False,
 #     'formatters': {
 #         'standard': {
 #             'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 #         },
 #     },
 #     'handlers': {
 #         'console': {
 #             'class': 'logging.StreamHandler',
 #             'formatter': 'standard',
 #             'level': logging.INFO,
 #         },
 #         'file': {
 #             'class': 'logging.FileHandler',
 #             'filename': 'Logger.log',
 #             'formatter': 'standard',
 #             'level': logging.WARNING,
 #         },
 #     },
 #     'loggers': {
 #         '': {
 #             'handlers': ['console', 'file'],
 #             'level': logging.WARNING,
 #             'propagate': True,
 #         },
 #     }
 # }
 #
 # logging.config.dictConfig(log_config)
 #
 # outLog = OutLog()  # 获取单例实例
 import logging
 import logging.config
 import datetime
 import redis
 class OutLog:
    _instance = None
@ -121,35 +12,49 @@ class OutLog:
        if cls._instance is None:
            cls._instance = super(OutLog, cls).__new__(cls)
            cls.logger = logging.getLogger("app")  # 默认logger名称为"app"
-            cls._instance.queue_dict = {}
+            # cls._instance.queue_dict = {}
-            cls._instance.done_dict = {}
+            # cls._instance.done_dict = {}
                        # 初始化 Redis 连接
            cls._instance.redis_client = redis.StrictRedis(host='localhost', port=6379, password="root",db=0, decode_responses=True)
        return cls._instance
-    def get_queue(self, user_id,producer_name):
+    def get_queue(self,user_id,producer_name):
-        if user_id not in self.queue_dict:
+        # if user_id not in self.queue_dict:
-            self.queue_dict[user_id] = []
+        #     self.queue_dict[user_id] = []
-            self.done_dict[user_id] = {}  # 初始化为未完成的字典
+        #     self.done_dict[user_id]={}
-        if user_id not in self.done_dict:
+        # self.done_dict[user_id][producer_name] = False  # 初始化为未完成的字典
-            self.done_dict[user_id][producer_name] = False
+         # 使用 Redis 进行存储和查询
        if not self.redis_client.exists(f"queue:{user_id}"):
            # self.redis_client.rpush(f"queue:{user_id}")
            self.logger.info(f"queue:{user_id}")
        self.redis_client.hset(f"done:{user_id}", producer_name, "0")  # 初始化为未完成
        return self.UserLogger(user_id)
    def get_queueData(self, user_id):
-        if user_id in self.queue_dict:
+        # if user_id in self.queue_dict:
-           return OutLog._instance.queue_dict[self.user_id]
+        #     return self.queue_dict[user_id]
        if self.redis_client.exists(f"queue:{user_id}"):
            return self.redis_client.lpop(f"queue:{user_id}")  # 获取队列首个并删除数据
    def del_queue(self,user_id):
        # if self.is_done(user_id):
        #     del self.queue_dict[user_id]
        #     del self.done_dict[user_id]
        if self.is_done(user_id):
-            del self.queue_dict[user_id]
+            self.redis_client.delete(f"queue:{user_id}")
-            del self.done_dict[user_id]
+            self.redis_client.delete(f"done:{user_id}")
    class UserLogger:
        def __init__(self, user_id):
            self.user_id = user_id
            self.logger = OutLog._instance.logger
        def log(self, item: str, level: str):
            self._log_to_logger(item, level)
            if(level != "INFO"):
                return
            dtf = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            log_entry = f"{dtf}[{level}]: {item}"
-            OutLog._instance.queue_dict[self.user_id].append(log_entry)  # 保存到对应用户的队列
+            # print(log_entry)
-            self._log_to_logger(item, level)
+            # OutLog._instance.queue_dict[self.user_id].append(log_entry)  # 保存到对应用户的队列
-
+            OutLog._instance.redis_client.rpush(f"queue:{self.user_id}", log_entry)  # 保存到对应用户的队列
        def _log_to_logger(self, item: str, level: str):
            if level == "DEBUG":
                self.logger.debug(item)
@ -177,11 +82,17 @@ class OutLog:
        def critical(self, item: str):
            self.log(item, "CRITICAL")
    # def mark_done(self, user_id, producer_name):
    #     self.done_dict[user_id][producer_name] = True
    # def is_done(self, user_id):
    #     # print(self.done_dict.get(user_id, {}),self.done_dict.get(user_id, {}).values())
    #     return all(self.done_dict.get(user_id, {}).values())  # 检查所有生产者是否完成
    def mark_done(self, user_id, producer_name):
-        self.done_dict[user_id][producer_name] = True
+        self.redis_client.hset(f"done:{user_id}", producer_name, "1")
    def is_done(self, user_id):
-        return all(self.done_dict.get(user_id, {}).values())  # 检查所有生产者是否完成
+        done_dict = self.redis_client.hgetall(f"done:{user_id}")
        return all(value == "1" for value in done_dict.values()) if done_dict else False # 检查所有生产者是否完成
 # 日志配置
@ -203,13 +114,13 @@ log_config = {
            'class': 'logging.FileHandler',
            'filename': 'Logger.log',
            'formatter': 'standard',
-            'level': logging.WARNING,
+            'level': logging.INFO,
        },
    },
    'loggers': {
        '': {
            'handlers': ['console', 'file'],
-            'level': logging.WARNING,
+            'level': logging.INFO,
            'propagate': True,
        },
    }