文件优化

9 months ago · a1ea54d7f9
9 changed files with 651 additions and 436 deletions
--- a/UserQueue.py
+++ b/UserQueue.py
--- a/checkCompanyName.py
+++ b/checkCompanyName.py
@ -1,14 +1,15 @@
 # -*- coding:utf-8 -*-
-import time
 from docx import Document
-from paddlenlp import Taskflow
 from qwen_agent.agents import Assistant
 import re
 import json_repair
+import json
 import math
 from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
 from docx.opc.oxml import parse_xml
-
+import requests
+from myLogger import outLog
+import time

 def load_from_xml_v2(baseURI, rels_item_xml):
    """
@ -28,43 +29,10 @@ def load_from_xml_v2(baseURI, rels_item_xml):

 _SerializedRelationships.load_from_xml = load_from_xml_v2

-
 import logging
-import logging.config
-
-log_config = {
-    'version': 1,
-    'disable_existing_loggers': False,
-    'formatters': {
-        'standard': {
-            'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        },
-    },
-    'handlers': {
-        'console': {
-            'class': 'logging.StreamHandler',
-            'formatter': 'standard',
-            'level': logging.INFO,
-        },
-        'file': {
-            'class': 'logging.FileHandler',
-            'filename': 'Logger.log',
-            'formatter': 'standard',
-            'level': logging.INFO,
-        },
-    },
-    'loggers': {
-        '': {
-            'handlers': ['console', 'file'],
-            'level': logging.INFO,
-            'propagate': True,
-        },
-    }
-}
-
-logging.config.dictConfig(log_config)

-logger = logging.getLogger("checkCompanyName")
+outLog.logger = logging.getLogger("checkCompanyName")
+userLog=None
 prompt ='''
 .根据上述文本判断，是否为具体的公司或组织名称，你可以使用工具利用互联网查询，
 你只能在[具体的公司或组织名称,公益组织,简称,统称,泛化组织,政府单位,机关单位,学校，行业类型，其他]选项中选择答案,
@ -81,14 +49,23 @@ bot = Assistant(llm=llm_cfg,
                # system_message="你是一个地理专家，可以准确的判断地理位置，如果你不确定，可以使用工具"
                )

+
 def getDocxToTextAll(name):
    docxPath = name
+    loopCount = 0
+    while True:
+        loopCount+=1
+        if(loopCount>=15):
+            raise Exception("文档读取超时，或文档存在问题无法读取")
+            break
+        try:
            document = Document(docxPath)
+            break
+        except Exception as e:
+            time.sleep(1)
+            pass
    # 逐段读取docx文档的内容
-    levelList=[]
    words = []
-    addStart = False
-    levelText=""
    i = 0
    for paragraph in document.paragraphs:
        # 判断该段落的标题级别
@ -99,13 +76,15 @@ def getDocxToTextAll(name):
            words.append(text)
    # 将所有段落文本拼接成一个字符串，并用换行符分隔
    text = '\n'.join(words)
-
+    # userLog.info("checkCompanyName----保存文件")
    # 将文本写入txt文件
    with open("checkCompanyName.txt", 'w', encoding='utf-8') as txt_file:
        txt_file.write(text)
+
+
 def companyNameTask(text):
    yield "文档公司或组织名称检查---启动中...."
-    wordtag  = Taskflow("knowledge_mining",device_id=0)
+    userLog.info("checkCompanyName----启动中....")
    batchNum = 20
    sentences = re.split(r'[。\n]', text)
    # 去掉空字符
@ -122,46 +101,64 @@ def companyNameTask(text):
    # 打印每一份的内容
    for i, chunk in enumerate(chunks):
        yield f"文档公司或组织名称检查---文档解析进度:{i + 1}/{num_chunks}"
-
-        wenBen=".".join(chunk)
+        userLog.info(f"checkCompanyName----文档解析进度:{i + 1}/{num_chunks}")
        try:
-            res = wordtag(wenBen)
+            wenBen = ".".join(chunk)
+            url = "http://0.0.0.0:8191/taskflow/checkPlaceName"
+            headers = {"Content-Type": "application/json"}
+            data = {
+                "data": {
+                    "text": wenBen,
+                }
+            }
+            r = requests.post(url=url, headers=headers, data=json.dumps(data))
+            res = json.loads(r.text)
+            # userLog.info(res)
+            # print(res)
        except Exception as e:
-            logging.warning(chunk)
-            logging.warning("文档公司或组织名称检查---词类分析出错",e)
-            continue
+            userLog.warning(chunk)
+            userLog.warning("文档公司或组织名称检查--错别字识别出错\n")
+            userLog.warning(e)
+            return
        isplace = False
-        for zuhe in res[0]['items']:
+        for zuhe in res["result"]:
            # 上一个的地名,这一个还是地名，就和上一个相加代替这个
-            zhi = zuhe.get("wordtag_label")
            if isplace:
                name = placeList[len(placeList) - 1]
-                if zhi.find("组织机构类") >= 0:  # or zuhe[1] == "ns"
+                if zuhe[1].find("组织机构类") >= 0:  # or zuhe[1] == "ns"
                    isplace = True
-                    new_text = zuhe['item'].replace("\n", "")
+                    new_text = zuhe[0].replace("\n", "")
                    placeList[len(placeList) - 1] = name + new_text
                    continue
-            if zhi.find("组织机构类") >= 0:
+            if zuhe[1].find("组织机构类") >= 0:
                isplace = True
-                new_text = zuhe['item'].replace("\n", "")
+                new_text = zuhe[0].replace("\n", "")
                placeList.append(new_text)
            else:
                isplace = False
    # 打印总份数
    yield "文档公司或组织名称检查---文档解析完成"
+    userLog.info("checkCompanyName----文档解析完成")
    placeList = list(dict.fromkeys(placeList))
    yield placeList
-def checkCompanyName(filename):
+    userLog.info(placeList)
+
+def checkCompanyName(filename,user_id):
    yield f"文档公司或组织名称检查---开始处理文档..."
+    global userLog
+    userLog=outLog.get_queue(user_id, "checkCompanyName")
    try:
        getDocxToTextAll(filename)
    except Exception as e:
-        logging.warning(e)
+        userLog.warning(e)
+        userLog.warning("文档公司或组织名称检查---文档无法打开，请检查文档内容")
        yield "文档公司或组织名称检查---文档无法打开，请检查文档内容"
+        outLog.mark_done(user_id, "checkCompanyName")
        return
    with open("checkCompanyName.txt", "r", encoding='utf-8') as f:
        gettext = f.read()
    yield f"文档公司或组织名称检查---开始解析文档..."  # 每次生成一个数字就发送
+    userLog.info("checkCompanyName----开始解析文档...")
    for item in companyNameTask(gettext):
        if isinstance(item, str):
            yield item
@ -177,19 +174,22 @@ def checkCompanyName(filename):
        if cishu > 3:
            cishu = 0
        yield "文档公司或组织名称检查---结果生成中" + '.' * cishu
+        userLog.info(f"checkCompanyName----结果生成中" + '.' * cishu)
        cishu += 1
    data = runList[len(runList) - 1][0]["content"]
    parsed_data = json_repair.loads(data.replace('`', ''))
    error_places = []
+
    for place in parsed_data:
        try:
            if place['回答'] == '非泛化的公司或组织名称':
                error_places.append(place)
        except Exception as e:
-            logging.warning(place)
-            logging.warning("文档公司或组织名称检查---组织提出出错",e)
+            userLog.warning(place)
+            userLog.warning(e)
+            userLog.warning("文档公司或组织名称检查---组织提出出错")
            continue
-    logging.info(error_places)
+    userLog.info(error_places)
    returnInfo = "发现异常公司或组织名称<br>"
    if len(error_places) > 0:
        for t in error_places:
@ -199,7 +199,9 @@ def checkCompanyName(filename):
            t["yuanwen"] = paragraphs[0]
            yuanwen = paragraphs[0].replace(keyword, f"**{keyword}**").replace("\n", "")
            returnInfo += "原文：" + yuanwen + "<br>异常公司或组织名称：**" + keyword + "**！请注意" + "<br>"
-        logging.info(returnInfo)
+            userLog.info(returnInfo)
        yield returnInfo
    else:
        yield "**未发现异常公司或组织名称**<br>"
+        userLog.info("**未发现异常公司或组织名称**<br>")
+    outLog.mark_done(user_id, "checkCompanyName")
--- a/checkDocumentError.py
+++ b/checkDocumentError.py
@ -1,19 +1,15 @@
 # -*- coding:utf-8 -*-
-# from pycorrector import MacBertCorrector
-# m = MacBertCorrector("shibing624/macbert4csc-base-chinese")
 from qwen_agent.agents import Assistant
 from docx import Document
-from pprint import pprint
 import re
-from paddlenlp import Taskflow
 import json
-import time
 import json_repair
 import math
 from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
 from docx.opc.oxml import parse_xml
-
-import asyncio
+import requests
+from myLogger import outLog
+import time
 def load_from_xml_v2(baseURI, rels_item_xml):
    """
    Return |_SerializedRelationships| instance loaded with the
@ -32,41 +28,9 @@ def load_from_xml_v2(baseURI, rels_item_xml):

 _SerializedRelationships.load_from_xml = load_from_xml_v2
 import logging
-import logging.config
-
-log_config = {
-    'version': 1,
-    'disable_existing_loggers': False,
-    'formatters': {
-        'standard': {
-            'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        },
-    },
-    'handlers': {
-        'console': {
-            'class': 'logging.StreamHandler',
-            'formatter': 'standard',
-            'level': logging.INFO,
-        },
-        'file': {
-            'class': 'logging.FileHandler',
-            'filename': 'Logger.log',
-            'formatter': 'standard',
-            'level': logging.INFO,
-        },
-    },
-    'loggers': {
-        '': {
-            'handlers': ['console', 'file'],
-            'level': logging.INFO,
-            'propagate': True,
-        },
-    }
-}

-logging.config.dictConfig(log_config)
-
-logger = logging.getLogger("checkDocumentError")
+outLog.logger = logging.getLogger("checkDocumentError")
+userLog=None
 llm_cfg = {
    # 'model': 'qwen1.5-72b-chat',
    'model': "qwen2-72b",
@ -83,20 +47,28 @@ bot = Assistant(llm=llm_cfg,
 # 回答格式[{“placeName”：“原文”,"改正后":"改正的内容","回答":"答案"},{“placeName”：“原文”,"改正后":"改正的内容","回答":"答案"}]，不做过多的解释,严格按回答格式作答;
 # '''
 prompt = '''
-请回答以上问题，[是，否]选项中选择答案,原文内容，标点符号保持不变，如果有错请给出解析，没有错则不用给解析
+请回答以上问题，[是，否]选项中选择答案,原文内容，标点符号保持不变，如果有错请给出详细的解析，没有错则不用给解析
 回答格式请按照以下json格式[{"placeName":"序号","回答":"答案","解析","解析内容"},{"placeName":"序号","回答":"答案","解析","解析内容"}]，不做过多的解释,严格按回答格式作答;
 '''


 def getDocxToTextAll(name):
+    userLog.info("checkDocumentError----打开文档")
    docxPath = name
+    loopCount = 0
+    while True:
+        loopCount+=1
+        if(loopCount>=15):
+            raise Exception("文档读取超时，或文档存在问题无法读取")
+            break
+        try:
            document = Document(docxPath)
+            break
+        except Exception as e:
+            time.sleep(1)
+            pass
    # 逐段读取docx文档的内容
-    levelList = []
    words = []
-    addStart = False
-    levelText = ""
-    i = 0
    for paragraph in document.paragraphs:
        # 判断该段落的标题级别
        # 这里用isTitle()临时代表，具体见下文介绍的方法
@ -112,17 +84,23 @@ def getDocxToTextAll(name):
        txt_file.write(text)


-def getDocumentError(filename):
+def checkDocumentError(filename,user_id):
+    global userLog
+    userLog=outLog.get_queue(user_id,"checkDocumentError")
    yield f"文档纠错---开始处理文档..."
+    userLog.info("checkDocumentError----开始处理文档...")
    try:
        getDocxToTextAll(filename)
    except Exception as e:
-        logger.warning(e)
-        yield "文档无法打开，请检查文档内容"
+        userLog.warning(e)
+        userLog.warning("文档纠错----文档无法打开，请检查文档内容")
+        yield "文档纠错----文档无法打开，请检查文档内容"
+        outLog.mark_done(user_id, "checkDocumentError")
        return
    with open("checkDocumentError.txt", "r", encoding='utf-8') as f:
        gettext = f.read()
    yield f"文档纠错---开始解析文档..."  # 每次生成一个数字就发送
+    userLog.info("checkDocumentError----开始解析文档...")
    final_list = []
    for item in documentErrorTask(gettext):
        if isinstance(item, str):
@ -135,10 +113,13 @@ def getDocumentError(filename):
            yuanwen = i["placeName"].replace("\n", "")
            jianyi = i["jianyi"].replace("\n", "")
            resInfo += "原文：" + yuanwen + "<br>建议：**" + jianyi + "**<br>"
+            userLog.info(resInfo)
        yield resInfo
-        logger.info(resInfo)
+
    else:
        yield "**未发现错别字**"
+        userLog.info("未发现错别字")
+    outLog.mark_done(user_id,"checkDocumentError")


 def documentErrorTask(text):
@ -149,7 +130,7 @@ def documentErrorTask(text):
    :return: 生成器，每次返回一批文本
    """
    yield "文档纠错---启动中...."
-    corrector = Taskflow("text_correction", device_id=1)
+    userLog.info("checkDocumentError----启动中....")
    batchNum = 20
    sentences = re.split(r'[。\n]', text)
    # 去掉空字符
@ -162,18 +143,27 @@ def documentErrorTask(text):

    # 按batchNum字为一份进行处理
    chunks = [sentences[i:i + batchNum] for i in range(0, total_chars, batchNum)]
-    placeList = []
    # 打印每一份的内容
    err = []
    for i, chunk in enumerate(chunks):
        yield f"文档纠错---文档解析进度:{i + 1}/{num_chunks}"
+        userLog.info(f"checkDocumentError----文档解析进度:{i + 1}/{num_chunks}")
        try:
-            res = corrector(chunk)
+            url = "http://0.0.0.0:8190/taskflow/checkDocumentError"
+            headers = {"Content-Type": "application/json"}
+            data = {
+                "data": {
+                    "text": chunk,
+                }
+            }
+            r = requests.post(url=url, headers=headers, data=json.dumps(data))
+            res = json.loads(r.text)
+            # print(res)
        except Exception as e:
-            logger.warning(chunk)
-            logger.warning("文档纠错--错别字识别出错\n", e)
+            userLog.warning(chunk)
+            userLog.warning("文档纠错--错别字识别出错\n", e)
            continue
-        lines_with_greeting = [place for place in res if len(place['errors']) > 0]
+        lines_with_greeting = [place for place in res["result"] if len(place['errors']) > 0]
        if len(lines_with_greeting) > 0:
            num = 0
            wenti = []  # 记录问题的数组
@ -186,18 +176,20 @@ def documentErrorTask(text):
                    for key, value in item['correction'].items():
                        temp_errorWords.append(key)
                wenti.append(
-                    "{}、原文：{}。问题：【{}】这些字是否为当前原文的错别字".format(num, keyword, ",".join(temp_errorWords)))
+                    "序号：{}，原文：{}。问题：【{}】这些字是否为当前原文的错别字".format(num, keyword, ",".join(temp_errorWords)))
                num += 1
            words = "\n".join(wenti)
            messages = [{'role': 'user', 'content': [{'text': words + prompt}]}]
            runList = []
            yield f"文档纠错---内容解析中..."  # 每次生成一个数字就发送
+            userLog.info(f"checkDocumentError----内容解析中...")
            cishu = 0
            for rsp in bot.run(messages):
                runList.append(rsp)
                if cishu > 3:
                    cishu = 0
                yield "文档纠错---内容解析中" + '.' * cishu
+                userLog.info(f"checkDocumentError----内容解析中内容解析中" + '.' * cishu)
                cishu += 1
            data = runList[len(runList) - 1][0]["content"]
            parsed_data = json_repair.loads(data.replace("\\", "").replace('`', ''))
@ -209,12 +201,13 @@ def documentErrorTask(text):
                        place["jianyi"] = place["解析"]
                        resListerr.append(place)
                except Exception as e:
-                    logger.warning(parsed_data)
-                    logger.warning(place)
-                    logger.warning("文档纠错--错别字提取出错\n", e)
+                    userLog.warning(parsed_data)
+                    userLog.warning(place)
+                    userLog.warning("文档纠错--错别字提取出错\n", e)
                    continue
            if (len(resListerr) > 0):
                err.extend(resListerr)
    # 打印总份数
    yield "文档地名检查---文档解析完成"
+    userLog.info(err)
    yield err
--- a/checkPlaceName.py
+++ b/checkPlaceName.py
@ -1,15 +1,15 @@
 from docx import Document
-from paddlenlp import Taskflow
-from pprint import pprint
 from qwen_agent.agents import Assistant
 import re
 import json_repair
-import time
+import json
 import math
 from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
 from docx.opc.oxml import parse_xml
-
-
+import requests
+import logging
+from myLogger import outLog
+import time
 def load_from_xml_v2(baseURI, rels_item_xml):
    """
    Return |_SerializedRelationships| instance loaded with the
@ -29,45 +29,10 @@ def load_from_xml_v2(baseURI, rels_item_xml):
 _SerializedRelationships.load_from_xml = load_from_xml_v2


-import logging
-import logging.config
-
-log_config = {
-    'version': 1,
-    'disable_existing_loggers': False,
-    'formatters': {
-        'standard': {
-            'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        },
-    },
-    'handlers': {
-        'console': {
-            'class': 'logging.StreamHandler',
-            'formatter': 'standard',
-            'level': logging.INFO,
-        },
-        'file': {
-            'class': 'logging.FileHandler',
-            'filename': 'Logger.log',
-            'formatter': 'standard',
-            'level': logging.INFO,
-        },
-    },
-    'loggers': {
-        '': {
-            'handlers': ['console', 'file'],
-            'level': logging.INFO,
-            'propagate': True,
-        },
-    }
-}
-
-logging.config.dictConfig(log_config)
-
-logger = logging.getLogger("checkPlaceName")
-
+outLog.logger = logging.getLogger("checkPlaceName")
+userLog=None
 prompt='''
-.上述文本判断地名是否正确，你可以使用工具利用互联网查询，你只能在[正确,错误,简称,未知]三种选项中选择答案,回答格式[{“placeName”:“地名”,"回答":"答案"},{“placeName”:“地名”,"回答":"答案"}]，不做过多的解释,严格按回答格式作答;
+.上述文本判断地名是否正确，你可以使用工具利用互联网查询，你只能在[正确,错误,简称,未知]三种选项中选择答案,回答格式[{“placeName”:“地名”,"回答":"答案"},{“placeName”:“地名”,"回答":"答案"},{“placeName”:“地名”,"回答":"答案"}]，不做过多的解释,严格按回答格式作答;
 不做过多的解释,严格按回答格式作答;
 '''
 # prompt='''
@ -87,7 +52,18 @@ bot = Assistant(llm=llm_cfg,
                )
 #获取全文内容
 def getDocxToTextAll(docxPath):
+    loopCount = 0
+    while True:
+        loopCount+=1
+        if(loopCount>=15):
+            raise Exception("文档读取超时，或文档存在问题无法读取")
+            break
+        try:
            document = Document(docxPath)
+            break
+        except Exception as e:
+            time.sleep(1)
+            pass
    # 逐段读取docx文档的内容
    levelList=[]
    words=[]
@ -111,7 +87,7 @@ def getDocxToTextAll(docxPath):
 #得到全文和地名有关的内容
 def placeNameTask(text):
    yield "文档地名检查---启动中...."
-    tagTask = Taskflow("ner",device_id=2)
+    userLog.info("checkPlaceName----启动中....")
    batchNum=20
    sentences = re.split(r'[。\n]', text)
    # 去掉空字符
@ -128,16 +104,25 @@ def placeNameTask(text):
    # 打印每一份的内容
    for i, chunk in enumerate(chunks):
        yield f"文档地名检查---文档解析进度:{i + 1}/{num_chunks}"
-
+        userLog.info(f"checkPlaceName----文档解析进度:{i + 1}/{num_chunks}")
        wenBen=".".join(chunk)
        try:
-            res = tagTask(wenBen)
+            url = "http://0.0.0.0:8191/taskflow/checkPlaceName"
+            headers = {"Content-Type": "application/json"}
+            data = {
+                "data": {
+                    "text": wenBen,
+                }
+            }
+            r = requests.post(url=url, headers=headers, data=json.dumps(data))
+            res = json.loads(r.text)
        except Exception as e:
-            logger.warning(chunk)
-            logger.warning("文档地名检查---解析地名出错",e)
+            userLog.warning(chunk)
+            userLog.warning("文档地名检查---解析地名出错")
+            userLog.warning(e)
            continue
        isplace = False
-        for zuhe in res:
+        for zuhe in res["result"]:
            # 上一个的地名,这一个还是地名，就和上一个相加代替这个
            if isplace:
                name = placeList[len(placeList) - 1]
@ -154,16 +139,22 @@ def placeNameTask(text):
                isplace = False
    # 打印总份数
    yield "文档地名检查---文档解析完成"
+    userLog.info("checkPlaceName---文档解析完成")
    placeList=list(dict.fromkeys(placeList))
    yield placeList
+
 #主方法
-def checkPlaceName(filename):
+def checkPlaceName(filename,user_id):
+    global userLog
+    userLog=outLog.get_queue(user_id,"checkPlaceName")
    yield f"文档地名检查---开始处理文档..."  # 每次生成一个数字就发送
    try:
        getDocxToTextAll(filename)
    except Exception as e:
-        logger.warning(e)
+        userLog.warning(e)
        yield "文档地名检查---文档无法打开，请检查文档内容"
+        userLog.warning("文档地名检查---文档无法打开，请检查文档内容")
+        outLog.mark_done(user_id,"checkPlaceName")
        return
    with open("checkPlaceName.txt", "r",encoding='utf-8') as f:
        gettext = f.read()
@ -184,6 +175,7 @@ def checkPlaceName(filename):
        if cishu>3:
            cishu=0
        yield "文档地名检查---结果生成中"+'.'*cishu
+        userLog.info("checkPlaceName---结果生成中"+'.'*cishu)
        cishu+=1
    data = runList[len(runList) - 1][0]["content"]
    parsed_data = json_repair.loads(data.replace('`', ''))
@ -194,10 +186,12 @@ def checkPlaceName(filename):
            if place['回答'] == '错误':
                error_places.append(place)
        except Exception as e:
-            logger.warning(place)
-            logger.warning("文档地名检查---组织提出出错",e)
+            userLog.warning(parsed_data)
+            userLog.warning(place)
+            userLog.warning("文档地名检查---组织提出出错")
+            userLog.warning(e)
            continue
-    logger.info(error_places)
+    userLog.info(error_places)
    returnInfo = "发现异常地名<br>"
    if len(error_places)>0:
        for t in error_places:
@ -206,7 +200,9 @@ def checkPlaceName(filename):
            paragraphs = re.findall(r'.*?' + re.escape(keyword) + r'.*?\n', gettext)
            yuanwen= paragraphs[0].replace(keyword,f"**{keyword}**").replace("\n","")
            returnInfo+="原文：" + yuanwen + "<br>出现异常地名：**" + keyword + "**！请注意" + "<br>"
+            userLog.info(returnInfo)
        yield returnInfo
-        logger.info(returnInfo)
    else:
        yield "**未发现发现异常地名**"
+        userLog.info("未发现发现异常地名")
+        outLog.mark_done(user_id, "checkPlaceName")
--- a/checkRepeatText.py
+++ b/checkRepeatText.py
@ -5,7 +5,7 @@ from langchain_community.document_loaders import TextLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from qwen_agent.agents import Assistant
 import json_repair
-from paddlenlp import Taskflow
+import json
 embeddings = DashScopeEmbeddings(dashscope_api_key="sk-ea89cf04431645b185990b8af8c9bb13")
 device_id=0
 import re
@ -16,41 +16,11 @@ from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
 from docx.opc.oxml import parse_xml
 import logging
 import logging.config
+import requests
+from myLogger import outLog

-log_config = {
-    'version': 1,
-    'disable_existing_loggers': False,
-    'formatters': {
-        'standard': {
-            'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        },
-    },
-    'handlers': {
-        'console': {
-            'class': 'logging.StreamHandler',
-            'formatter': 'standard',
-            'level': logging.INFO,
-        },
-        'file': {
-            'class': 'logging.FileHandler',
-            'filename': 'Logger.log',
-            'formatter': 'standard',
-            'level': logging.INFO,
-        },
-    },
-    'loggers': {
-        '': {
-            'handlers': ['console', 'file'],
-            'level': logging.INFO,
-            'propagate': True,
-        },
-    }
-}
-
-logging.config.dictConfig(log_config)
-
-logger = logging.getLogger("checkRepeatText")
-
+outLog.logger = logging.getLogger("checkRepeatText")
+userLog=None
 def load_from_xml_v2(baseURI, rels_item_xml):
    """
    Return |_SerializedRelationships| instance loaded with the
@ -110,7 +80,18 @@ def isTitle(paragraph):
 #寻找标题名称
 def findTitleName(docxPath):
    yield '文档相似性检查----检查是否存在详细设计方案'
+    loopCount = 0
+    while True:
+        loopCount+=1
+        if(loopCount>=15):
+            raise Exception("文档读取超时，或文档存在问题无法读取")
+            break
+        try:
            document = Document(docxPath)
+            break
+        except Exception as e:
+            time.sleep(1)
+            pass
    # 逐段读取docx文档的内容
    titleWords=[]
    firstTitle = 0
@ -161,14 +142,24 @@ def findTitleName(docxPath):
        runList.append(rsp)
    data = runList[len(runList) - 1][0]["content"]
    parsed_data = json_repair.loads(data.replace('`', ''))
-    logger.info(parsed_data)
    if(parsed_data["answer"]=="存在"):
        yield parsed_data["name"]
    else:
        yield "文档相似性检查----未找到与详细设计方案相关内容，无法进行相似性比较"
 #获取文档中 详细设计方案 章节的所有内容
 def getDocxToText(docxPath,titleName,vector_store_path):
+    loopCount = 0
+    while True:
+        loopCount+=1
+        if(loopCount>=15):
+            raise Exception("文档读取超时，或文档存在问题无法读取")
+            break
+        try:
            document = Document(docxPath)
+            break
+        except Exception as e:
+            time.sleep(1)
+            pass
    # 逐段读取docx文档的内容
    levelList=[]
    words=[]
@ -228,7 +219,9 @@ def getDocxToText(docxPath,titleName,vector_store_path):


 # @app.route('/checkRepeatText/<filename>', methods=['GET'])
-def checkRepeatText(filename):
+def checkRepeatText(filename,user_id):
+    global userLog
+    userLog=outLog.get_queue(user_id,"checkRepeatText")
    yield "文档相似性检查---启动中...."
    vector_store_path="vector_store"+str(uuid.uuid4())
    for titleName in findTitleName(filename):
@ -239,13 +232,11 @@ def checkRepeatText(filename):
            words,uuids,vectorstore=getDocxToText(filename,titleName,vector_store_path)
        except Exception as e:
            yield f"文档相似性检查----文档内容获取失败，未找到**{titleName}**相关内容或文档打开失败"
+            userLog.warning(e)
+            userLog.warning(f"文档相似性检查----文档内容获取失败，未找到**{titleName}**相关内容或文档打开失败")
+            outLog.mark_done(user_id, "checkRepeatText")
            return
    # 记录程序开始的时间戳‘
-        global device_id
-        similarity = Taskflow("text_similarity",device_id=3)
-        # device_id+=1
-        # if(device_id>1):
-        #     device_id=0
        reslist = []
        count = 0
        for i in words:
@ -259,12 +250,23 @@ def checkRepeatText(filename):
                if (textTag.find(tag) >= 0):
                    continue
                try:
-                    res = similarity([[i[i.find('：') + 1:], text[text.find('：') + 1:]]])
+                    url = "http://0.0.0.0:8192/taskflow/checkRepeatText"
+                    headers = {"Content-Type": "application/json"}
+                    data = {
+                        "data": {
+                            "text": [[i[i.find('：') + 1:], text[text.find('：') + 1:]]],
+                        }
+                    }
+                    r = requests.post(url=url, headers=headers, data=json.dumps(data))
+                    res = json.loads(r.text)
+                    # res = similarity([[i[i.find('：') + 1:], text[text.find('：') + 1:]]])
                except Exception as e:
-                    logger.warning("文档相似性检查--发生异常:",e)
-                    logger.warning(i)
-                    logger.warning(text)
-                if (res[0]["similarity"] > 0.90):
+                    userLog.warning("文档相似性检查--发生异常:")
+                    userLog.warning(e)
+                    userLog.warning(i)
+                    userLog.warning(text)
+                    continue
+                if (res["result"][0]["similarity"] > 0.90):
                    # 判断重复内容是否被放入
                    if (len(reslist) > 0):
                        isExist = False
@ -274,19 +276,20 @@ def checkRepeatText(filename):
                                break
                        if not isExist:
                            # reslist.append({"yuanwen1":i[i.find('：') + 1:],"yuanwen2":text[text.find('：') + 1:],"similarity":res[0]["similarity"]})
-                            reslist.append({"yuanwen1":i.replace("\n",""),"yuanwen2":text.replace("\n",""),"similarity":res[0]["similarity"]})
+                            userLog.info("【在"+i[:i.find('：')].replace("\n","")+"下包含："+i[i.find('：') + 1:].replace("\n","")+"<br>在"+text[:text.find('：')].replace("\n","")+"**下包含："+text[text.find('：') + 1:].replace("\n","")+"<br>以上两段内容相似度："+'{:.2f}'.format(res["result"][0]["similarity"])+"】")
+                            reslist.append({"yuanwen1":i.replace("\n",""),"yuanwen2":text.replace("\n",""),"similarity":res["result"][0]["similarity"]})
                    else:
-                        reslist.append({"yuanwen1":i.replace("\n",""),"yuanwen2":text.replace("\n",""),"similarity":res[0]["similarity"]})
+                        reslist.append({"yuanwen1":i.replace("\n",""),"yuanwen2":text.replace("\n",""),"similarity":res["result"][0]["similarity"]})
                        # print(i.split("：")[1] + "\n" + text.split("：")[1])
+                        userLog.info("【在"+i[:i.find('：')].replace("\n","")+"下包含："+i[i.find('：') + 1:].replace("\n","")+"<br>在"+text[:text.find('：')].replace("\n","")+"**下包含："+text[text.find('：') + 1:].replace("\n","")+"<br>以上两段内容相似度："+'{:.2f}'.format(res["result"][0]["similarity"])+"】")
        # vectorstore.delete(ids=uuids)
        shutil.rmtree(vector_store_path)
-        logger.info("已删除")
-        logger.info(reslist)
        resInfo=f"对{titleName}章节，发现相似内容：<br>"
        if(len(reslist)>0):
            for res in reslist:
                resInfo+="【在**"+res["yuanwen1"][:res["yuanwen1"].find('：')]+"**下包含："+res["yuanwen1"][res["yuanwen1"].find('：') + 1:]+"<br>在**"+res["yuanwen2"][:res["yuanwen2"].find('：')]+"**下包含："+res["yuanwen2"][res["yuanwen2"].find('：') + 1:]+"<br>以上两段内容***相似度***："+'{:.2f}'.format(res['similarity'])+"】<br>"
            yield resInfo
-            logger.info(resInfo)
        else:
-            yield "未发现相似内容"
+            yield "**未发现相似内容**"
+            userLog.info("文档相似性检查----未发现相似内容**")
+        outLog.mark_done(user_id, "checkRepeatText")
--- a/checkTitleName.py
+++ b/checkTitleName.py
@ -1,3 +1,5 @@
+import time
+
 from docx import Document
 from pprint import pprint
 from qwen_agent.agents import Assistant
@ -6,7 +8,7 @@ import json_repair
 import math
 from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
 from docx.opc.oxml import parse_xml
-
+from myLogger import outLog

 def load_from_xml_v2(baseURI, rels_item_xml):
    """
@ -26,41 +28,9 @@ def load_from_xml_v2(baseURI, rels_item_xml):

 _SerializedRelationships.load_from_xml = load_from_xml_v2
 import logging
-import logging.config
-
-log_config = {
-    'version': 1,
-    'disable_existing_loggers': False,
-    'formatters': {
-        'standard': {
-            'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        },
-    },
-    'handlers': {
-        'console': {
-            'class': 'logging.StreamHandler',
-            'formatter': 'standard',
-            'level': logging.INFO,
-        },
-        'file': {
-            'class': 'logging.FileHandler',
-            'filename': 'Logger.log',
-            'formatter': 'standard',
-            'level': logging.INFO,
-        },
-    },
-    'loggers': {
-        '': {
-            'handlers': ['console', 'file'],
-            'level': logging.INFO,
-            'propagate': True,
-        },
-    }
-}

-logging.config.dictConfig(log_config)
-
-logger = logging.getLogger("checkCompanyName")
+outLog.logger = logging.getLogger("checkTitleName")
+userLog=None
 llm_cfg = {
    #'model': 'qwen1.5-72b-chat',
    'model':"qwen2-72b-instruct",
@ -113,7 +83,18 @@ def isTitle(paragraph):

 #获取文档中 详细设计方案 章节的所有内容
 def getDocxToTitleName(docxPath):
+    loopCount = 0
+    while True:
+        loopCount+=1
+        if(loopCount>=15):
+            raise Exception("文档读取超时，或文档存在问题无法读取")
+            break
+        try:
            document = Document(docxPath)
+            break
+        except Exception as e:
+            time.sleep(1)
+            pass
    # 逐段读取docx文档的内容
    levelList=[]
    words=[]
@ -130,9 +111,11 @@ def getDocxToTitleName(docxPath):
                words.append(text)
    return words

-def checkTitleName(filename):
-
+def checkTitleName(filename,user_id):
+    global userLog
+    userLog=outLog.get_queue(user_id,"checkTitleName")
    yield '文档结构检查----启动中'
+    userLog.info("checkTitleName----启动中")
    with open("ce模板.txt", "r",encoding='utf-8') as f:
        gettext = f.readlines()
    count=0
@ -140,8 +123,10 @@ def checkTitleName(filename):
    try:
        word = getDocxToTitleName(filename)
    except Exception as e:
-        print(e)
-        yield "文档无法打开，请检查文档内容"
+        userLog.warning(e)
+        yield "文档结构检查----文档无法打开，请检查文档内容"
+        outLog.mark_done(user_id, "checkTitleName")
+        userLog.warning("checkTitleName----文档无法打开，请检查文档内容")
        return
    for text in gettext:
        count+=1
@ -150,24 +135,25 @@ def checkTitleName(filename):
        '''
        xushang="回答格式{‘name’:‘名称’,'answer'：‘回答’，“标题”：“标题”}请严格按照格式回答问题，不要做过多我解释"
        yield f"文档结构检查----结构分析中{count}/{len(gettext)}"
+        userLog.info(f"checkTitleName----结构分析中{count}/{len(gettext)}")
        strword = "\n".join(word)+prompt+xushang
-        # print(strword)
        messages = [{'role': 'user', 'content': [{'text':strword}]}]
        runList = []
-        cishu = 0
        for rsp in bot.run(messages):
            runList.append(rsp)
            # print(rsp)
        data = runList[len(runList) - 1][0]["content"]
        parsed_data = json_repair.loads(data.replace('`', ''))
-        print(parsed_data)
        if(parsed_data["answer"]=="不存在"):
            reserr.append(text)
+
    resInfo="文档结构存在异常：<br>"
    if(len(reserr)>0):
        for i in reserr:
            resInfo+="**"+i.replace('\n','')+"**<br>"
-        logger.info(resInfo)
+            userLog.info(resInfo)
        yield resInfo
    else:
        yield "文档结构未发现异常"
+        userLog.info("文档结构未发现异常")
+        outLog.mark_done(user_id, "checkTitleName")
--- a/main.py
+++ b/main.py
@ -3,16 +3,19 @@ import os
 from checkPlaceName import checkPlaceName
 from checkRepeatText import checkRepeatText
 from checkCompanyName import checkCompanyName
-from checkDocumentError import getDocumentError
+from checkDocumentError import checkDocumentError
 from checkTitleName import checkTitleName
 from flask_cors import CORS
 import qwen_agenttext
+from myLogger import outLog
+import time
 app = Flask(__name__)
 cros = CORS(app)
 UPLOAD_FOLDER = 'uploads'
-usableTag=[0,0,0,0,0,0,0,0]
 if not os.path.exists(UPLOAD_FOLDER):
    os.makedirs(UPLOAD_FOLDER)
+
+
@app.route('/upload', methods=['POST'])
 def upload_file():
    if 'file' not in request.files:
@ -24,6 +27,8 @@ def upload_file():
        filename = file.filename
        file.save(os.path.join(UPLOAD_FOLDER, filename))
        return jsonify({"message": "File uploaded successfully"}), 200
+
+
@app.route('/stream', methods=["GET", "POST"])
 def stream_numbers():
    context = request.args.get('context')
@ -51,21 +56,25 @@ def stream_numbers():
        "Access-Control-Allow-Headers": "x-requested-with,content-type",
    }
    return Response(qwen_agenttext.getxinx(context), headers=headers)
+
+
@app.route('/sse/checkRepeatText', methods=['GET'])
 def checkRepeatTextWeb():
    filename = request.args.get('filename')
+    userId = request.args.get("userId")

-    def generate_checkRepeatText(filename):
+    def generate_checkRepeatText(filename,userId):
        id = 0
-        try:
-            for i in checkRepeatText(filename):
+        for i in checkRepeatText(filename,userId):
            yield f"id: {id + 1}\n"
            yield f"event: checkRepeatText\n"
            yield f"data: {i}\n\n"  # 发送完成信号
-        except Exception as e:
-            yield f"id: {id+1}\n"
-            yield f"event: checkRepeatText\n"
-            yield f"data: **程序出现异常**\n\n"  # 发送完成信号
+        # except Exception as e:
+
+        #     yield f"id: {id+1}\n"
+        #     yield f"event: checkRepeatText\n"
+        #     yield f"data: **程序出现异常**\n\n"  # 发送完成信号
+
    headers = {
        "Content-Type": "text/event-stream",
        "Cache-Control": "no-cache",
@ -74,19 +83,20 @@ def checkRepeatTextWeb():
        "Access-Control-Allow-Methods": "GET,POST",
        "Access-Control-Allow-Headers": "x-requested-with,content-type",
    }
-    return Response(generate_checkRepeatText(filename), headers=headers)
+    return Response(generate_checkRepeatText(filename,userId), headers=headers)


@app.route('/sse/checkPlaceName', methods=['GET'])
 def checkPlaceNameWebSse():
    filename = request.args.get('filename')
-
-    def generate_checkPlaceName(filename):
+    userId = request.args.get("userId")
+    def generate_checkPlaceName(filename,userId):
        id = 0
-        for i in checkPlaceName(filename):
+        for i in checkPlaceName(filename,userId):
            yield f"id: {id + 1}\n"
            yield f"event: checkPlaceName\n"
            yield f"data: {i}\n\n"  # 发送完成信号
+
    headers = {
        "Content-Type": "text/event-stream",
        "Cache-Control": "no-cache",
@ -95,14 +105,16 @@ def checkPlaceNameWebSse():
        "Access-Control-Allow-Methods": "GET,POST",
        "Access-Control-Allow-Headers": "x-requested-with,content-type",
    }
-    return Response(generate_checkPlaceName(filename), headers=headers)
+    return Response(generate_checkPlaceName(filename,userId), headers=headers)
+
+
@app.route('/sse/checkCompanyName', methods=['GET'])
 def checkCompanyNameWebSse():
    filename = request.args.get('filename')
-
-    def generate_checkCompanyName(filename):
+    userId = request.args.get("userId")
+    def generate_checkCompanyName(filename,userId):
        id = 0
-        for i in checkCompanyName(filename):
+        for i in checkCompanyName(filename,userId):
            yield f"id: {id + 1}\n"
            yield f"event: checkCompanyName\n"
            yield f"data: {i}\n\n"  # 发送完成信号
@ -115,17 +127,18 @@ def checkCompanyNameWebSse():
        "Access-Control-Allow-Methods": "GET,POST",
        "Access-Control-Allow-Headers": "x-requested-with,content-type",
    }
-    return Response(generate_checkCompanyName(filename), headers=headers)
+    return Response(generate_checkCompanyName(filename,userId), headers=headers)
+

@app.route('/sse/checkDocumentErrorWeb', methods=['GET'])
 def checkDocumentErrorWebSse():
    filename = request.args.get('filename')
-
-    def generate_checkDocumentError(filename):
+    userId = request.args.get("userId")
+    def generate_checkDocumentError(filename,userId):
        id = 0
-        for i in getDocumentError(filename):
+        for i in checkDocumentError(filename,userId):
            yield f"id: {id + 1}\n"
-            yield f"event: getDocumentError\n"
+            yield f"event: checkDocumentError\n"
            yield f"data: {i}\n\n"  # 发送完成信号

    headers = {
@ -136,14 +149,16 @@ def checkDocumentErrorWebSse():
        "Access-Control-Allow-Methods": "GET,POST",
        "Access-Control-Allow-Headers": "x-requested-with,content-type",
    }
-    return Response(generate_checkDocumentError(filename), headers=headers)
+    return Response(generate_checkDocumentError(filename,userId), headers=headers)
+
+
@app.route('/sse/checkTitleName', methods=['GET'])
 def checkTitleNameWebSse():
    filename = request.args.get('filename')
-
-    def generate_checkTitleName(filename):
+    userId = request.args.get("userId")
+    def generate_checkTitleName(filename,userId):
        id = 0
-        for i in checkTitleName(filename):
+        for i in checkTitleName(filename,userId):
            yield f"id: {id + 1}\n"
            yield f"event: checkTitleName\n"
            yield f"data: {i}\n\n"  # 发送完成信号
@ -156,6 +171,36 @@ def checkTitleNameWebSse():
        "Access-Control-Allow-Methods": "GET,POST",
        "Access-Control-Allow-Headers": "x-requested-with,content-type",
    }
-    return Response(generate_checkTitleName(filename), headers=headers)
+    return Response(generate_checkTitleName(filename,userId), headers=headers)
+
+@app.route('/sse/getLog', methods=['GET'])
+def getlog():
+    userId = request.args.get("userId")
+    def generate_getLog(userId):
+        time.sleep(1)
+        id = 0
+        while True:
+            if outLog.is_done(userId):
+                break
+            q = outLog.get_queueData(userId)
+            if q:
+                id+=1
+                text = q.pop(0)
+                yield f"id: {id}\n"
+                yield f"event: getlog\n"
+                yield f"data: {text}\n\n"  # 发送完成信号
+        yield f"id: {id}\n"
+        yield f"event: getlog\n"
+        yield f"data: 任务结束！！！！！\n\n"  # 发送完成信号
+        outLog.del_queue(userId)
+    headers = {
+        "Content-Type": "text/event-stream",
+        "Cache-Control": "no-cache",
+        "X-Accel-Buffering": "no",
+        "Access-Control-Allow-Origin": "*",
+        "Access-Control-Allow-Methods": "GET,POST",
+        "Access-Control-Allow-Headers": "x-requested-with,content-type",
+    }
+    return Response(generate_getLog(userId), headers=headers)
 if __name__ == '__main__':
    app.run(host="0.0.0.0", port=80)
--- a/myLogger.py
+++ b/myLogger.py
@ -0,0 +1,220 @@
+# -*- coding: utf-8 -*-
+"""
+@author:  bingyl123@163.com
+@version: 1.0.0
+@file:    OutLog.py
+@time:    2023/2/23 20:25
+"""
+# import logging
+# import logging.config
+# import re
+# import datetime
+# import queue
+#
+#
+# class OutLog:
+#     _instance = None
+#     logger = None
+#
+#     def __new__(cls):
+#         if cls._instance is None:
+#             cls._instance = super(OutLog, cls).__new__(cls)
+#             cls.logger = logging.getLogger("app")  # 默认logger名称为"app"
+#             cls._instance.queue_dict = {}
+#             cls._instance.done_dict = {}
+#         return cls._instance
+#
+#     def get_queue(self, user_id):
+#         if user_id not in self.queue_dict:
+#             self.queue_dict[user_id] = []
+#             self.done_dict[user_id] = {}  # 初始化为未完成的字典
+#         return self.queue_dict[user_id]
+#
+#     def mark_done(self, user_id, producer_name):
+#         self.done_dict[user_id][producer_name] = True
+#
+#     def is_done(self, user_id):
+#         return all(self.done_dict.get(user_id, {}).values())  # 检查所有生产者是否完成
+#     @staticmethod
+#     def put(item: str, level="INFO"):
+#         dtf = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+#         mq.put(f"{dtf}[{level}]: {item}")
+#
+#     @staticmethod
+#     def debug(item, log=True):
+#         OutLog.put(item, level="DEBUG")
+#         if log:
+#             OutLog._instance.logger.debug(item)
+#
+#     @staticmethod
+#     def info(item, log=True):
+#         OutLog.put(item, level="INFO")
+#         if log:
+#             OutLog._instance.logger.info(item)
+#
+#     @staticmethod
+#     def warning(item, log=True):
+#         OutLog.put(item, level="WARNING")
+#         if log:
+#             OutLog._instance.logger.warning(item)
+#
+#     @staticmethod
+#     def error(item, log=True):
+#         OutLog.put(item, level="ERROR")
+#         if log:
+#             OutLog._instance.logger.error(item)
+#
+#     @staticmethod
+#     def critical(item, log=True):
+#         OutLog.put(item, level="CRITICAL")
+#         if log:
+#             OutLog._instance.logger.critical(item)
+#
+#
+#
+# # 日志配置
+# log_config = {
+#     'version': 1,
+#     'disable_existing_loggers': False,
+#     'formatters': {
+#         'standard': {
+#             'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+#         },
+#     },
+#     'handlers': {
+#         'console': {
+#             'class': 'logging.StreamHandler',
+#             'formatter': 'standard',
+#             'level': logging.INFO,
+#         },
+#         'file': {
+#             'class': 'logging.FileHandler',
+#             'filename': 'Logger.log',
+#             'formatter': 'standard',
+#             'level': logging.WARNING,
+#         },
+#     },
+#     'loggers': {
+#         '': {
+#             'handlers': ['console', 'file'],
+#             'level': logging.WARNING,
+#             'propagate': True,
+#         },
+#     }
+# }
+#
+# logging.config.dictConfig(log_config)
+#
+# outLog = OutLog()  # 获取单例实例
+
+
+
+import logging
+import logging.config
+import datetime
+
+class OutLog:
+    _instance = None
+    logger = None
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(OutLog, cls).__new__(cls)
+            cls.logger = logging.getLogger("app")  # 默认logger名称为"app"
+            cls._instance.queue_dict = {}
+            cls._instance.done_dict = {}
+        return cls._instance
+
+    def get_queue(self, user_id,producer_name):
+        if user_id not in self.queue_dict:
+            self.queue_dict[user_id] = []
+            self.done_dict[user_id] = {}  # 初始化为未完成的字典
+        if user_id not in self.done_dict:
+            self.done_dict[user_id][producer_name] = False
+        return self.UserLogger(user_id)
+    def get_queueData(self, user_id):
+        if user_id in self.queue_dict:
+           return OutLog._instance.queue_dict[self.user_id]
+    def del_queue(self,user_id):
+        if self.is_done(user_id):
+            del self.queue_dict[user_id]
+            del self.done_dict[user_id]
+    class UserLogger:
+        def __init__(self, user_id):
+            self.user_id = user_id
+            self.logger = OutLog._instance.logger
+
+        def log(self, item: str, level: str):
+            dtf = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+            log_entry = f"{dtf}[{level}]: {item}"
+            OutLog._instance.queue_dict[self.user_id].append(log_entry)  # 保存到对应用户的队列
+            self._log_to_logger(item, level)
+
+        def _log_to_logger(self, item: str, level: str):
+            if level == "DEBUG":
+                self.logger.debug(item)
+            elif level == "INFO":
+                self.logger.info(item)
+            elif level == "WARNING":
+                self.logger.warning(item)
+            elif level == "ERROR":
+                self.logger.error(item)
+            elif level == "CRITICAL":
+                self.logger.critical(item)
+
+        def info(self, item: str):
+            self.log(item, "INFO")
+
+        def warning(self, item: str):
+            self.log(item, "WARNING")
+
+        def debug(self, item: str):
+            self.log(item, "DEBUG")
+
+        def error(self, item: str):
+            self.log(item, "ERROR")
+
+        def critical(self, item: str):
+            self.log(item, "CRITICAL")
+
+    def mark_done(self, user_id, producer_name):
+        self.done_dict[user_id][producer_name] = True
+
+    def is_done(self, user_id):
+        return all(self.done_dict.get(user_id, {}).values())  # 检查所有生产者是否完成
+
+
+# 日志配置
+log_config = {
+    'version': 1,
+    'disable_existing_loggers': False,
+    'formatters': {
+        'standard': {
+            'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        },
+    },
+    'handlers': {
+        'console': {
+            'class': 'logging.StreamHandler',
+            'formatter': 'standard',
+            'level': logging.INFO,
+        },
+        'file': {
+            'class': 'logging.FileHandler',
+            'filename': 'Logger.log',
+            'formatter': 'standard',
+            'level': logging.WARNING,
+        },
+    },
+    'loggers': {
+        '': {
+            'handlers': ['console', 'file'],
+            'level': logging.WARNING,
+            'propagate': True,
+        },
+    }
+}
+
+logging.config.dictConfig(log_config)
+
+outLog = OutLog()  # 获取单例实例
--- a/test.py
+++ b/test.py
@ -1,109 +1,79 @@
-import time
-import json
-import math
-from flask import Flask,Response,request
-from flask_sse import  sse
-from flask_cors import CORS
-import re
-import qwen_agenttext
-app = Flask(__name__)
-cros = CORS(app)
-# SSE 推送函数
-import paddle;
-paddle.device.get_available_device()
+# -*- coding:utf-8 -*-
+# from spire.doc import *
+# from spire.doc.common import *
+#
+# # 创建一个 Document 对象
+# document = Document()
+# # 加载一个 Word DOCX 文档
+# # document.LoadFromFile("C:\\Users\\gy051\\Desktop\\1223.doc")
+# document.LoadFromFile("D:\\数据集\\数据集\\3.doc")
+# print(document.Sections.Count)
+# for i in range(document.Sections.Count):
+#     section=document.Sections[i]
+#     for x  in range(section.Paragraphs.Count):
+#         paragraph=section.Paragraphs[x]
+#         print(paragraph.Text)
+#     print("---------------------------------")
+#     # 或加载一个 Word DOC 文档
+# # document.LoadFromFile("1223.xml")
+#
+# # # # 设置是否在 HTML 中嵌入图片
+# # document.HtmlExportOptions.ImageEmbedded = True
+# # # document.XHTMLValidateOption.ImageEmbedded = True
+# # #
+# # # # 设置是否将表单字段导出为纯文本在 HTML 中显示
+# # document.HtmlExportOptions.IsTextInputFormFieldAsText = True
+# # # document.XHTMLValidateOption.IsTextInputFormFieldAsText = True
+# # #
+# # # # 设置是否在 HTML 中导出页眉和页脚
+# # document.HtmlExportOptions.HasHeadersFooters = False
+# # # document.XHTMLValidateOption.HasHeadersFooters = True
+# #
+# # # 将 Word 文档保存为 HTML 文件
+# # document.SaveToFile("1223.html", FileFormat.Html)
+# # #
+# document.Close()
+from bs4 import BeautifulSoup
+# 读取HTML文件
+with open('D:\\models\\1223.html', 'r',encoding="utf-8") as file:
+    html_content = file.read()

+# 解析HTML文档
+soup = BeautifulSoup(html_content, 'html.parser')

-# SSE 推送路由
+# 用于存储结果的字典
+headings = {}
+current_heading = None

+# 遍历所有的h1, h2, h3等标题
+for element in soup.find_all(['h1', 'h2', 'h3',"h4","h5","h6"]):
+    level = int(element.name[1])  # 获取标题级别
+    title = element.get_text(strip=True)  # 获取标题文本

-# @app.route('/register', methods=["GET"])
-# def register():
-    # 获取客户端标识符
-    # client_id = str(uuid.uuid4())
-    #
-    # # 返回 SSE 响应
-    # return jsonify({"client_id": client_id})
+    # 设置当前标题
+    current_heading = {
+        'title': title,
+        'level': level,
+        'content': []
+    }

+    # 将当前标题添加到字典中
+    headings[title] = current_heading

-# SSE 推送路由
+    # 寻找当前标题下的内容
+    next_element = element.find_next_sibling()
+    while next_element and next_element.name not in ['h1', 'h2', 'h3',"h4","h5","h6"]:
+        # 判断内容的标签
+        if next_element.name in ['p', 'div']:
+            current_heading['content'].append(next_element.get_text(strip=False))
+        next_element = next_element.find_next_sibling()
+
+# 输出结果
+for heading in headings.values():
+    print(f"标题: {heading['title']} (级别: {heading['level']})")
+    print("内容:")
+    for content in heading['content']:
+        print(f" - {content}")
+    print()


-# @app.route('/sse', methods=['POST'])
-# def stream():
-#     # 获取客户端标识符
-#     client_id = 1
-#     print("client_id", client_id)
-#
-#     def aa():
-#         # 循环发送 SSE 数据
-#         for i in range(10):
-#             data = 'Hello, %s!' % client_id + str(i)
-#             print(data)
-#             sse.publish(data, channel=client_id, type='message')
-#             time.sleep(1)
-#         sse.publish("end", channel=client_id, type='message')
-#
-#     # 返回 SSE 响应
-#     response = Response(aa(), mimetype='text/event-stream')
-#     response.headers.add('Cache-Control', 'no-cache')
-#     response.headers.add('Connection', 'keep-alive')
-#     response.headers.add('X-Accel-Buffering', 'no')
-#     return response
-#
-#
-#
-# @app.route('/stream' ,methods=["GET", "POST"])
-# def stream_numbers():
-#     context= request.args.get('context')
-#
-#
-#     headers = {
-#         "Content-Type": "text/event-stream",
-#         "Cache-Control": "no-cache",
-#         "X-Accel-Buffering": "no",
-#         "Access-Control-Allow-Origin": "*",
-#         "Access-Control-Allow-Methods": "GET,POST",
-#         "Access-Control-Allow-Headers": "x-requested-with,content-type",
-#     }
-#     return Response(generate_numbers(),headers=headers)
-# def generate_numbers():
-#     event_id=0
-#     # for number in range(1, 10):
-#     #     json_data = json.dumps({"number": number})
-#     #     print(json_data)
-#     #     event_id += 1
-#     #     yield f"id: {event_id}\n"
-#     #     yield f"event: time-update\n"
-#     #     yield f"data: {json_data}\n\n"  # 每次生成一个数字就发送
-#     json_data = json.dumps({"number": "done"})
-#     yield f"id: {1}\n"
-#     yield f"event: time-update\n"
-#     yield f"data: 34568\n\n"  # 发送完成信号
-# if __name__ == '__main__':
-#
-#
-#     # 读取文件内容
-#     with open("checkPlaceName.txt", "r", encoding='utf-8') as f:
-#         gettext = f.read()
-#     batchNum=20
-#     sentences = re.split(r'[。\n]', gettext)
-#     # 去掉空字符
-#     sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
-#     # 计算总字符数
-#     total_chars = len(sentences)
-#
-#     # 计算有多少份
-#     num_chunks = math.ceil(total_chars / batchNum)
-#
-#     # 按batchNum字为一份进行处理
-#     chunks = [sentences[i:i + batchNum] for i in range(0, total_chars, batchNum)]
-#
-#     # 打印每一份的内容
-#     for i, chunk in enumerate(chunks):
-#         print(f"Chunk {i + 1}:")
-#         print(chunk)
-#         print("-" * 40)
-#
-#     # 打印总份数
-#     print(f"Total chunks: {num_chunks}")
-#     app.run(debug=True,port=80)