first commit

1 year ago · 6639ac75dc
67 changed files with 14904 additions and 0 deletions
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -0,0 +1,8 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/.idea/encodings.xml
+++ b/.idea/encodings.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Encoding">
+    <file url="file://$PROJECT_DIR$/ce.txt" charset="GBK" />
+  </component>
+</project>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.9 (venv) (2)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (venv) (2)" project-jdk-type="Python SDK" />
+</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/python项目39.iml" filepath="$PROJECT_DIR$/.idea/python项目39.iml" />
+    </modules>
+  </component>
+</project>
--- a/.idea/python项目39.iml
+++ b/.idea/python项目39.iml
@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
--- a/pycache/baidusearch.cpython-39.pyc
+++ b/pycache/baidusearch.cpython-39.pyc
--- a/pycache/checkCompanyName.cpython-39.pyc
+++ b/pycache/checkCompanyName.cpython-39.pyc
--- a/pycache/checkDocumentError.cpython-39.pyc
+++ b/pycache/checkDocumentError.cpython-39.pyc
--- a/pycache/checkPlaceName.cpython-39.pyc
+++ b/pycache/checkPlaceName.cpython-39.pyc
--- a/pycache/checkRepeatText.cpython-39.pyc
+++ b/pycache/checkRepeatText.cpython-39.pyc
--- a/pycache/json_repair.cpython-39.pyc
+++ b/pycache/json_repair.cpython-39.pyc
--- a/pycache/main.cpython-39.pyc
+++ b/pycache/main.cpython-39.pyc
--- a/pycache/qwen_agenttext.cpython-39.pyc
+++ b/pycache/qwen_agenttext.cpython-39.pyc
--- a/pycache/test.cpython-39.pyc
+++ b/pycache/test.cpython-39.pyc
--- a/baidusearch.py
+++ b/baidusearch.py
@ -0,0 +1,258 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Created by Charles on 2018/10/10
+# Function:
+
+import sys
+import requests
+from bs4 import BeautifulSoup
+
+
+ABSTRACT_MAX_LENGTH = 300    # abstract max length
+
+user_agents = [
+    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
+    'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
+    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)'
+    ' Ubuntu Chromium/49.0.2623.108 Chrome/49.0.2623.108 Safari/537.36',
+    'Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR) AppleWebKit/533.3 '
+    '(KHTML, like Gecko)  QtWeb Internet Browser/3.7 http://www.QtWeb.net',
+    'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) '
+    'Chrome/41.0.2228.0 Safari/537.36',
+    'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, '
+    'like Gecko) ChromePlus/4.0.222.3 Chrome/4.0.222.3 Safari/532.2',
+    'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.4pre) '
+    'Gecko/20070404 K-Ninja/2.1.3',
+    'Mozilla/5.0 (Future Star Technologies Corp.; Star-Blade OS; x86_64; U; '
+    'en-US) iNet Browser 4.7',
+    'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201',
+    'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) '
+    'Gecko/20080414 Firefox/2.0.0.13 Pogo/2.0.0.13.6866'
+]
+
+# 请求头信息
+HEADERS = {
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
+    "Content-Type": "application/x-www-form-urlencoded",
+    "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
+    "Referer": "https://www.baidu.com/",
+    "Accept-Encoding": "gzip, deflate",
+    "Accept-Language": "zh-CN,zh;q=0.9"
+}
+
+baidu_host_url = "https://www.baidu.com"
+baidu_search_url = "https://www.baidu.com/s?ie=utf-8&tn=baidu&wd="
+
+session = requests.Session()
+session.headers = HEADERS
+
+
+def search(keyword, num_results=10, debug=0):
+    """
+    通过关键字进行搜索
+    :param keyword: 关键字
+    :param num_results： 指定返回的结果个数
+    :return: 结果列表
+    """
+    if not keyword:
+        return None
+
+    list_result = []
+    page = 1
+
+    # 起始搜索的url
+    next_url = baidu_search_url + keyword
+
+    # 循环遍历每一页的搜索结果，并返回下一页的url
+    while len(list_result) < num_results:
+        data, next_url = parse_html(next_url, rank_start=len(list_result))
+        if data:
+            list_result += data
+            if debug:
+                print("---searching[{}], finish parsing page {}, results number={}: ".format(keyword, page, len(data)))
+                for d in data:
+                    print(str(d))
+
+        if not next_url:
+            if debug:
+                print(u"already search the last page。")
+            break
+        page += 1
+
+    if debug:
+        print("\n---search [{}] finished. total results number={}！".format(keyword, len(list_result)))
+    return list_result[: num_results] if len(list_result) > num_results else list_result
+
+
+def parse_html(url, rank_start=0, debug=0):
+    """
+    解析处理结果
+    :param url: 需要抓取的 url
+    :return:  结果列表，下一页的url
+    """
+    try:
+        res = session.get(url=url)
+        res.encoding = "utf-8"
+        root = BeautifulSoup(res.text, "lxml")
+
+        list_data = []
+        div_contents = root.find("div", id="content_left")
+        for div in div_contents.contents:
+            if type(div) != type(div_contents):
+                continue
+
+            class_list = div.get("class", [])
+            if not class_list:
+                continue
+
+            if "c-container" not in class_list:
+                continue
+
+            title = ''
+            url = ''
+            abstract = ''
+            try:
+                # 遍历所有找到的结果，取得标题和概要内容（50字以内）
+                if "xpath-log" in class_list:
+                    if div.h3:
+                        title = div.h3.text.strip()
+                        url = div.h3.a['href'].strip()
+                    else:
+                        title = div.text.strip().split("\n", 1)[0]
+                        if div.a:
+                            url = div.a['href'].strip()
+
+                    if div.find("div", class_="c-abstract"):
+                        abstract = div.find("div", class_="c-abstract").text.strip()
+                    elif div.div:
+                        abstract = div.div.text.strip()
+                    else:
+                        abstract = div.text.strip().split("\n", 1)[1].strip()
+                elif "result-op" in class_list:
+                    if div.h3:
+                        title = div.h3.text.strip()
+                        url = div.h3.a['href'].strip()
+                    else:
+                        title = div.text.strip().split("\n", 1)[0]
+                        url = div.a['href'].strip()
+                    if div.find("div", class_="c-abstract"):
+                        abstract = div.find("div", class_="c-abstract").text.strip()
+                    elif div.div:
+                        abstract = div.div.text.strip()
+                    else:
+                        # abstract = div.text.strip()
+                        abstract = div.text.strip().split("\n", 1)[1].strip()
+                else:
+                    if div.get("tpl", "") != "se_com_default":
+                        if div.get("tpl", "") == "se_st_com_abstract":
+                            if len(div.contents) >= 1:
+                                title = div.h3.text.strip()
+                                if div.find("div", class_="c-abstract"):
+                                    abstract = div.find("div", class_="c-abstract").text.strip()
+                                elif div.div:
+                                    abstract = div.div.text.strip()
+                                else:
+                                    abstract = div.text.strip()
+                        else:
+                            if len(div.contents) >= 2:
+                                if div.h3:
+                                    title = div.h3.text.strip()
+                                    url = div.h3.a['href'].strip()
+                                else:
+                                    title = div.contents[0].text.strip()
+                                    url = div.h3.a['href'].strip()
+                                # abstract = div.contents[-1].text
+                                if div.find("div", class_="c-abstract"):
+                                    abstract = div.find("div", class_="c-abstract").text.strip()
+                                elif div.div:
+                                    abstract = div.div.text.strip()
+                                else:
+                                    abstract = div.text.strip()
+                    else:
+                        if div.h3:
+                            title = div.h3.text.strip()
+                            url = div.h3.a['href'].strip()
+                        else:
+                            title = div.contents[0].text.strip()
+                            url = div.h3.a['href'].strip()
+                        if div.find("div", class_="c-abstract"):
+                            abstract = div.find("div", class_="c-abstract").text.strip()
+                        elif div.div:
+                            abstract = div.div.text.strip()
+                        else:
+                            abstract = div.text.strip()
+            except Exception as e:
+                if debug:
+                    print("catch exception duration parsing page html, e={}".format(e))
+                continue
+
+            if ABSTRACT_MAX_LENGTH and len(abstract) > ABSTRACT_MAX_LENGTH:
+                abstract = abstract[:ABSTRACT_MAX_LENGTH]
+
+            rank_start+=1
+            list_data.append({"title": title, "abstract": abstract, "url": url, "rank": rank_start})
+
+
+        # 找到下一页按钮
+        next_btn = root.find_all("a", class_="n")
+
+        # 已经是最后一页了，没有下一页了，此时只返回数据不再获取下一页的链接
+        if len(next_btn) <= 0 or u"上一页" in next_btn[-1].text:
+            return list_data, None
+
+        next_url = baidu_host_url + next_btn[-1]["href"]
+        return list_data, next_url
+    except Exception as e:
+        if debug:
+            print(u"catch exception duration parsing page html, e：{}".format(e))
+        return None, None
+
+
+def run():
+    """
+    主程序入口，支持命令得带参执行或者手动输入关键字
+    :return:
+    """
+    default_keyword = u"长风破浪小武哥"
+    num_results = 10
+    debug = 0
+
+    prompt = """
+    baidusearch: not enough arguments
+    [0]keyword: keyword what you want to search
+    [1]num_results: number of results
+    [2]debug: debug switch, 0-close, 1-open, default-0
+    eg: baidusearch NBA
+        baidusearch NBA 6
+        baidusearch NBA 8 1
+    """
+    if len(sys.argv) > 3:
+        keyword = sys.argv[1]
+        try:
+            num_results = int(sys.argv[2])
+            debug = int(sys.argv[3])
+        except:
+            pass
+    elif len(sys.argv) > 1:
+        keyword = sys.argv[1]
+    else:
+        print(prompt)
+        keyword = input("please input keyword: ")
+        # sys.exit(1)
+
+    if not keyword:
+        keyword = default_keyword
+
+    print("---start search: [{}], expected number of results:[{}].".format(keyword, num_results))
+    results = search(keyword, num_results=num_results, debug=debug)
+
+    if isinstance(results, list):
+        print("search results：(total[{}]items.)".format(len(results)))
+        for res in results:
+            print("{}. {}\n   {}\n   {}".format(res['rank'], res["title"], res["abstract"], res["url"]))
+    else:
+        print("start search: [{}] failed.".format(keyword))
+
+
+if __name__ == '__main__':
+    run()
--- a/cewenj.py
+++ b/cewenj.py
@ -0,0 +1,64 @@
+from qwen_agent.agents import Assistant
+# from qwen_agent.agents.doc_qa import ParallelDocQA
+
+llm_cfg = {
+    #'model': 'qwen1.5-72b-chat',
+    'model':"qwen2-72b",
+    'model_server': 'http://127.0.0.1:1025/v1',  # base_url, also known as api_base
+    # 'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+}
+bot = Assistant(llm=llm_cfg,
+                name='Assistant',
+                description='使用RAG检索并回答，支持文件类型：PDF/Word/PPT/TXT/HTML。'
+                )
+prompt='''
+请找是描述项目建设的章节名称
+'''
+messages = [{'role': 'user', 'content': [{'text': prompt}, {'file': ''}]}]
+for rsp in bot.run(messages):
+    print(rsp)
+# messages = [{'role': 'user', 'content': [{'text':prompt}]}]
+# runList=[]
+# for rsp in bot.run(messages):
+#     print(rsp)
+import re
+# from docx import Document
+#
+# document = Document('747991ddb29a49da903210959076bb9f.docx')
+# # 逐段读取docx文档的内容
+# levelList = []
+# words = []
+# addStart = False
+# levelText = ""
+# i = 0
+# for paragraph in document.paragraphs:
+#     # 判断该段落的标题级别
+#     # 这里用isTitle()临时代表，具体见下文介绍的方法
+#     text = paragraph.text
+#     if text.strip():  # 非空判断
+#         # print("非空")
+#         words.append(text)
+#         # level = isTitle(paragraph)
+#         # if(addStart and level=="0"):
+#         #     addStart=False
+#         # if(level=="0" and text.find("详细设计方案")>=0):
+#         #     addStart=True
+#         # if level:
+#         #     levelList.append("{}：".format(level)+paragraph.text)
+#         #     levelText=text
+#         # else:
+#         #     if addStart:
+#         #         if(text.startswith("图") or text.startswith("注：")):
+#         #             continue
+#         #         i=i+1
+#         #         words.append("第{}个段落：".format(i)+text)
+#
+# # 将所有段落文本拼接成一个字符串，并用换行符分隔
+# print(len(words))
+# text = '\n'.join(words)
+# paragraphs = re.findall(r'.*?' + re.escape('宁波市') + r'.*?\n', text)
+# print(paragraphs)
+from langchain_community.document_loaders import TextLoader
+
+loader = TextLoader('checkRepeatText.txt')
+docs = loader.load()
--- a/checkCompanyName.py
+++ b/checkCompanyName.py
@ -0,0 +1,205 @@
+# -*- coding:utf-8 -*-
+import time
+from docx import  Document
+from paddlenlp import Taskflow
+from qwen_agent.agents import Assistant
+import re
+import json_repair
+import math
+from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
+from docx.opc.oxml import parse_xml
+
+
+def load_from_xml_v2(baseURI, rels_item_xml):
+    """
+    Return |_SerializedRelationships| instance loaded with the
+    relationships contained in *rels_item_xml*. Returns an empty
+    collection if *rels_item_xml* is |None|.
+    """
+    srels = _SerializedRelationships()
+    if rels_item_xml is not None:
+        rels_elm = parse_xml(rels_item_xml)
+        for rel_elm in rels_elm.Relationship_lst:
+            if rel_elm.target_ref in ('../NULL', 'NULL'):
+                continue
+            srels._srels.append(_SerializedRelationship(baseURI, rel_elm))
+    return srels
+
+
+_SerializedRelationships.load_from_xml = load_from_xml_v2
+
+
+import logging
+import logging.config
+
+log_config = {
+    'version': 1,
+    'disable_existing_loggers': False,
+    'formatters': {
+        'standard': {
+            'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        },
+    },
+    'handlers': {
+        'console': {
+            'class': 'logging.StreamHandler',
+            'formatter': 'standard',
+            'level': logging.INFO,
+        },
+        'file': {
+            'class': 'logging.FileHandler',
+            'filename': 'Logger.log',
+            'formatter': 'standard',
+            'level': logging.INFO,
+        },
+    },
+    'loggers': {
+        '': {
+            'handlers': ['console', 'file'],
+            'level': logging.INFO,
+            'propagate': True,
+        },
+    }
+}
+
+logging.config.dictConfig(log_config)
+
+logger = logging.getLogger("checkCompanyName")
+prompt = '''
+.根据上述文本判断，是否为具体的公司或组织名称，你可以使用工具利用互联网查询，
+你只能在[具体的公司或组织名称,公益组织,简称,统称,泛化组织,政府单位,机关单位,学校，行业类型，其他]选项中选择答案,
+回答格式[{“companyName”：“名称”,"回答":"答案"}，{“companyName”：“名称”,"回答":"答案"}]，不做过多的解释,严格按回答格式作答;
+'''
+llm_cfg = {
+    #'model': 'qwen1.5-72b-chat',
+    'model':"qwen2-72b",
+    'model_server': 'http://127.0.0.1:1025/v1',  # base_url, also known as api_base
+    # 'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+}
+bot = Assistant(llm=llm_cfg,
+                name='Assistant',
+                # system_message="你是一个地理专家，可以准确的判断地理位置，如果你不确定，可以使用工具"
+                )
+
+def getDocxToTextAll(name):
+    docxPath=name
+    document = Document(docxPath)
+    # 逐段读取docx文档的内容
+    levelList=[]
+    words=[]
+    addStart = False
+    levelText=""
+    i = 0
+    for paragraph in document.paragraphs:
+        # 判断该段落的标题级别
+        # 这里用isTitle()临时代表，具体见下文介绍的方法
+        text = paragraph.text
+        if text.strip():#非空判断
+            # print("非空")
+            words.append(text)
+    # 将所有段落文本拼接成一个字符串，并用换行符分隔
+    text = '\n'.join(words)
+
+    # 将文本写入txt文件
+    with open("checkCompanyName.txt", 'w', encoding='utf-8') as txt_file:
+        txt_file.write(text)
+def companyNameTask(text):
+    yield "文档公司或组织名称检查---启动中...."
+    wordtag  = Taskflow("knowledge_mining",device_id=0)
+    batchNum=20
+    sentences = re.split(r'[。\n]', text)
+    # 去掉空字符
+    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
+    # 计算总字符数
+    total_chars = len(sentences)
+
+    # 计算有多少份
+    num_chunks = math.ceil(total_chars / batchNum)
+
+    # 按batchNum字为一份进行处理
+    chunks = [sentences[i:i + batchNum] for i in range(0, total_chars, batchNum)]
+    placeList = []
+    # 打印每一份的内容
+    for i, chunk in enumerate(chunks):
+        yield f"文档公司或组织名称检查---文档解析进度:{i + 1}/{num_chunks}"
+
+        wenBen=".".join(chunk)
+        try:
+            res = wordtag(wenBen)
+        except Exception as e:
+            logging.warning(chunk)
+            logging.warning("文档公司或组织名称检查---词类分析出错",e)
+            continue
+        isplace = False
+        for zuhe in res[0]['items']:
+            # 上一个的地名,这一个还是地名，就和上一个相加代替这个
+            zhi = zuhe.get("wordtag_label")
+            if isplace:
+                name = placeList[len(placeList) - 1]
+                if zhi.find("组织机构类") >= 0:  # or zuhe[1] == "ns"
+                    isplace = True
+                    new_text = zuhe['item'].replace("\n", "")
+                    placeList[len(placeList) - 1] = name + new_text
+                    continue
+            if zhi.find("组织机构类") >= 0:
+                isplace = True
+                new_text = zuhe['item'].replace("\n", "")
+                placeList.append(new_text)
+            else:
+                isplace = False
+    # 打印总份数
+    yield "文档公司或组织名称检查---文档解析完成"
+    placeList=list(dict.fromkeys(placeList))
+    yield placeList
+def checkCompanyName(filename):
+    yield f"文档公司或组织名称检查---开始处理文档..."
+    try:
+        getDocxToTextAll(filename)
+    except Exception as e:
+        logging.warning(e)
+        yield "文档公司或组织名称检查---文档无法打开，请检查文档内容"
+        return
+    with open("checkCompanyName.txt", "r", encoding='utf-8') as f:
+        gettext = f.read()
+    yield f"文档公司或组织名称检查---开始解析文档..."  # 每次生成一个数字就发送
+    for item in companyNameTask(gettext):
+        if isinstance(item, str):
+            yield item
+        else:
+            final_list = item  # 获取最终结果
+    propnStr = ",".join(final_list)
+    messages = [{'role': 'user', 'content': [{'text': propnStr+prompt}]}]
+    runList = []
+    yield f"文档公司或组织名称检查---结果生成中..."  # 每次生成一个数字就发送
+    cishu = 0
+    for rsp in bot.run(messages):
+        runList.append(rsp)
+        if cishu > 3:
+            cishu = 0
+        yield "文档公司或组织名称检查---结果生成中" + '.' * cishu
+        cishu += 1
+    data = runList[len(runList) - 1][0]["content"]
+    parsed_data = json_repair.loads(data.replace('`', ''))
+    error_places=[]
+    for place in parsed_data:
+        try:
+            if place['回答'] == '非泛化的公司或组织名称':
+                error_places.append(place)
+        except Exception as e:
+            logging.warning(place)
+            logging.warning("文档公司或组织名称检查---组织提出出错",e)
+            continue
+    logging.info(error_places)
+    returnInfo = "发现异常公司或组织名称<br>"
+    if len(error_places)>0:
+        for t in error_places:
+            keyword= t['companyName'].replace("\n","")
+        # 查找包含关键字的段落
+            paragraphs = re.findall(r'.*?' + re.escape(keyword) + r'.*?\n', gettext)
+            t["yuanwen"]=paragraphs[0]
+            yuanwen = paragraphs[0].replace(keyword, f"**{keyword}**").replace("\n","")
+            returnInfo += "原文：" + yuanwen + "<br>异常公司或组织名称：**" + keyword + "**！请注意" + "<br>"
+        logging.info(returnInfo)
+        yield returnInfo
+    else:
+        yield "**未发现异常公司或组织名称**<br>"
--- a/checkCompanyName.txt
+++ b/checkCompanyName.txt
--- a/checkDocumentError.py
+++ b/checkDocumentError.py
@ -0,0 +1,220 @@
+# -*- coding:utf-8 -*-
+# from pycorrector import MacBertCorrector
+# m = MacBertCorrector("shibing624/macbert4csc-base-chinese")
+from qwen_agent.agents import Assistant
+from docx import Document
+from pprint import pprint
+import re
+from paddlenlp import Taskflow
+import json
+import time
+import json_repair
+import math
+from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
+from docx.opc.oxml import parse_xml
+
+import asyncio
+def load_from_xml_v2(baseURI, rels_item_xml):
+    """
+    Return |_SerializedRelationships| instance loaded with the
+    relationships contained in *rels_item_xml*. Returns an empty
+    collection if *rels_item_xml* is |None|.
+    """
+    srels = _SerializedRelationships()
+    if rels_item_xml is not None:
+        rels_elm = parse_xml(rels_item_xml)
+        for rel_elm in rels_elm.Relationship_lst:
+            if rel_elm.target_ref in ('../NULL', 'NULL'):
+                continue
+            srels._srels.append(_SerializedRelationship(baseURI, rel_elm))
+    return srels
+
+
+_SerializedRelationships.load_from_xml = load_from_xml_v2
+import logging
+import logging.config
+
+log_config = {
+    'version': 1,
+    'disable_existing_loggers': False,
+    'formatters': {
+        'standard': {
+            'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        },
+    },
+    'handlers': {
+        'console': {
+            'class': 'logging.StreamHandler',
+            'formatter': 'standard',
+            'level': logging.INFO,
+        },
+        'file': {
+            'class': 'logging.FileHandler',
+            'filename': 'Logger.log',
+            'formatter': 'standard',
+            'level': logging.INFO,
+        },
+    },
+    'loggers': {
+        '': {
+            'handlers': ['console', 'file'],
+            'level': logging.INFO,
+            'propagate': True,
+        },
+    }
+}
+
+logging.config.dictConfig(log_config)
+
+logger = logging.getLogger("checkDocumentError")
+llm_cfg = {
+    # 'model': 'qwen1.5-72b-chat',
+    'model': "qwen2-72b",
+    'model_server': 'http://127.0.0.1:1025/v1',  # base_url, also known as api_base
+    # 'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+}
+bot = Assistant(llm=llm_cfg,
+                name='Assistant',
+                # description='使用RAG检索并回答，支持文件类型：PDF/Word/PPT/TXT/HTML。'
+
+                )
+# prompt='''
+# 是否存在错别字，若存在请指出，不做其他方面的校验，你只能在[存在，不存在，未知]选项中选择答案,
+# 回答格式[{“placeName”：“原文”,"改正后":"改正的内容","回答":"答案"},{“placeName”：“原文”,"改正后":"改正的内容","回答":"答案"}]，不做过多的解释,严格按回答格式作答;
+# '''
+prompt = '''
+请回答以上问题，[是，否]选项中选择答案,原文内容，标点符号保持不变，如果有错请给出解析，没有错则不用给解析
+回答格式请按照以下json格式[{"placeName":"序号","回答":"答案","解析","解析内容"},{"placeName":"序号","回答":"答案","解析","解析内容"}]，不做过多的解释,严格按回答格式作答;
+'''
+
+
+def getDocxToTextAll(name):
+    docxPath = name
+    document = Document(docxPath)
+    # 逐段读取docx文档的内容
+    levelList = []
+    words = []
+    addStart = False
+    levelText = ""
+    i = 0
+    for paragraph in document.paragraphs:
+        # 判断该段落的标题级别
+        # 这里用isTitle()临时代表，具体见下文介绍的方法
+        text = paragraph.text
+        if text.strip():  # 非空判断
+            # print("非空")
+            words.append(text)
+    # 将所有段落文本拼接成一个字符串，并用换行符分隔
+    text = '\n'.join(words)
+
+    # 将文本写入txt文件
+    with open("checkDocumentError.txt", 'w', encoding='utf-8') as txt_file:
+        txt_file.write(text)
+
+
+def getDocumentError(filename):
+    yield f"文档纠错---开始处理文档..."
+    try:
+        getDocxToTextAll(filename)
+    except Exception as e:
+        logger.warning(e)
+        yield "文档无法打开，请检查文档内容"
+        return
+    with open("checkDocumentError.txt", "r", encoding='utf-8') as f:
+        gettext = f.read()
+    yield f"文档纠错---开始解析文档..."  # 每次生成一个数字就发送
+    final_list = []
+    for item in documentErrorTask(gettext):
+        if isinstance(item, str):
+            yield item
+        else:
+            final_list = item  # 获取最终结果
+    resInfo = "发现错别字<br>"
+    if (len(final_list) > 0):
+        for i in final_list:
+            yuanwen = i["placeName"].replace("\n", "")
+            jianyi = i["jianyi"].replace("\n", "")
+            resInfo += "原文：" + yuanwen + "<br>建议：**" + jianyi + "**<br>"
+        yield resInfo
+        logger.info(resInfo)
+    else:
+        yield "**未发现错别字**"
+
+
+def documentErrorTask(text):
+    """
+    分批读取文本文件
+    :param file_path: 文件路径
+    :param batch_size: 每批处理的字符数
+    :return: 生成器，每次返回一批文本
+    """
+    yield "文档纠错---启动中...."
+    corrector = Taskflow("text_correction", device_id=1)
+    batchNum = 20
+    sentences = re.split(r'[。\n]', text)
+    # 去掉空字符
+    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
+    # 计算总字符数
+    total_chars = len(sentences)
+
+    # 计算有多少份
+    num_chunks = math.ceil(total_chars / batchNum)
+
+    # 按batchNum字为一份进行处理
+    chunks = [sentences[i:i + batchNum] for i in range(0, total_chars, batchNum)]
+    placeList = []
+    # 打印每一份的内容
+    err = []
+    for i, chunk in enumerate(chunks):
+        yield f"文档纠错---文档解析进度:{i + 1}/{num_chunks}"
+        try:
+            res = corrector(chunk)
+        except Exception as e:
+            logger.warning(chunk)
+            logger.warning("文档纠错--错别字识别出错\n", e)
+            continue
+        lines_with_greeting = [place for place in res if len(place['errors']) > 0]
+        if len(lines_with_greeting) > 0:
+            num = 0
+            wenti = []  # 记录问题的数组
+            keyword_list = []  # 记录问题
+            for t in lines_with_greeting:
+                temp_errorWords = []
+                keyword = t['source']
+                keyword_list.append(keyword)
+                for item in t["errors"]:
+                    for key, value in item['correction'].items():
+                        temp_errorWords.append(key)
+                wenti.append(
+                    "{}、原文：{}。问题：【{}】这些字是否为当前原文的错别字".format(num, keyword, ",".join(temp_errorWords)))
+                num += 1
+            words = "\n".join(wenti)
+            messages = [{'role': 'user', 'content': [{'text': words + prompt}]}]
+            runList = []
+            yield f"文档纠错---内容解析中..."  # 每次生成一个数字就发送
+            cishu = 0
+            for rsp in bot.run(messages):
+                runList.append(rsp)
+                if cishu > 3:
+                    cishu = 0
+                yield "文档纠错---内容解析中" + '.' * cishu
+                cishu += 1
+            data = runList[len(runList) - 1][0]["content"]
+            parsed_data = json_repair.loads(data.replace("\\", "").replace('`', ''))
+            resListerr = []
+            for place in parsed_data:
+                try:
+                    if place['回答'] == '是':
+                        place["placeName"] = keyword_list[int(place["placeName"])]
+                        place["jianyi"] = place["解析"]
+                        resListerr.append(place)
+                except Exception as e:
+                    logger.warning(parsed_data)
+                    logger.warning(place)
+                    logger.warning("文档纠错--错别字提取出错\n", e)
+                    continue
+            if (len(resListerr) > 0):
+                err.extend(resListerr)
+    # 打印总份数
+    yield "文档地名检查---文档解析完成"
+    yield err
--- a/checkPlaceName.py
+++ b/checkPlaceName.py
@ -0,0 +1,212 @@
+from docx import Document
+from paddlenlp import Taskflow
+from pprint import pprint
+from qwen_agent.agents import Assistant
+import re
+import json_repair
+import time
+import math
+from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
+from docx.opc.oxml import parse_xml
+
+
+def load_from_xml_v2(baseURI, rels_item_xml):
+    """
+    Return |_SerializedRelationships| instance loaded with the
+    relationships contained in *rels_item_xml*. Returns an empty
+    collection if *rels_item_xml* is |None|.
+    """
+    srels = _SerializedRelationships()
+    if rels_item_xml is not None:
+        rels_elm = parse_xml(rels_item_xml)
+        for rel_elm in rels_elm.Relationship_lst:
+            if rel_elm.target_ref in ('../NULL', 'NULL'):
+                continue
+            srels._srels.append(_SerializedRelationship(baseURI, rel_elm))
+    return srels
+
+
+_SerializedRelationships.load_from_xml = load_from_xml_v2
+
+
+import logging
+import logging.config
+
+log_config = {
+    'version': 1,
+    'disable_existing_loggers': False,
+    'formatters': {
+        'standard': {
+            'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        },
+    },
+    'handlers': {
+        'console': {
+            'class': 'logging.StreamHandler',
+            'formatter': 'standard',
+            'level': logging.INFO,
+        },
+        'file': {
+            'class': 'logging.FileHandler',
+            'filename': 'Logger.log',
+            'formatter': 'standard',
+            'level': logging.INFO,
+        },
+    },
+    'loggers': {
+        '': {
+            'handlers': ['console', 'file'],
+            'level': logging.INFO,
+            'propagate': True,
+        },
+    }
+}
+
+logging.config.dictConfig(log_config)
+
+logger = logging.getLogger("checkPlaceName")
+
+prompt='''
+.上述文本判断地名是否正确，你可以使用工具利用互联网查询，你只能在[正确,错误,简称,未知]三种选项中选择答案,回答格式[{“placeName”:“地名”,"回答":"答案"},{“placeName”:“地名”,"回答":"答案"}]，不做过多的解释,严格按回答格式作答;
+不做过多的解释,严格按回答格式作答;
+'''
+# prompt='''
+# .请回答以上问题，
+# ,回答格式[{“placeName”:"原文","回答":"答案"},{“placeName”:"原文","回答":"答案"}]，不做过多的解释,严格按回答格式作答;
+# 不做过多的解释,严格按回答格式作答;
+# '''
+llm_cfg = {
+    #'model': 'qwen1.5-72b-chat',
+    'model':"qwen2-72b",
+    'model_server': 'http://127.0.0.1:1025/v1',  # base_url, also known as api_base
+    # 'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+}
+bot = Assistant(llm=llm_cfg,
+                name='Assistant',
+                # description='使用RAG检索并回答，支持文件类型：PDF/Word/PPT/TXT/HTML。'
+                )
+#获取全文内容
+def getDocxToTextAll(docxPath):
+    document = Document(docxPath)
+    # 逐段读取docx文档的内容
+    levelList=[]
+    words=[]
+    addStart = False
+    levelText=""
+    i = 0
+    for paragraph in document.paragraphs:
+        # 判断该段落的标题级别
+        # 这里用isTitle()临时代表，具体见下文介绍的方法
+        text = paragraph.text
+        if text.strip():#非空判断
+            # print("非空")
+            words.append(text)
+    # 将所有段落文本拼接成一个字符串，并用换行符分隔
+    text = '\n'.join(words)
+
+    # 将文本写入txt文件
+    with open("checkPlaceName.txt", 'w', encoding='utf-8') as txt_file:
+        txt_file.write(text)
+
+#得到全文和地名有关的内容
+def placeNameTask(text):
+    yield "文档地名检查---启动中...."
+    tagTask = Taskflow("ner",device_id=2)
+    batchNum=20
+    sentences = re.split(r'[。\n]', text)
+    # 去掉空字符
+    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
+    # 计算总字符数
+    total_chars = len(sentences)
+
+    # 计算有多少份
+    num_chunks = math.ceil(total_chars / batchNum)
+
+    # 按batchNum字为一份进行处理
+    chunks = [sentences[i:i + batchNum] for i in range(0, total_chars, batchNum)]
+    placeList = []
+    # 打印每一份的内容
+    for i, chunk in enumerate(chunks):
+        yield f"文档地名检查---文档解析进度:{i + 1}/{num_chunks}"
+
+        wenBen=".".join(chunk)
+        try:
+            res = tagTask(wenBen)
+        except Exception as e:
+            logger.warning(chunk)
+            logger.warning("文档地名检查---解析地名出错",e)
+            continue
+        isplace = False
+        for zuhe in res:
+            # 上一个的地名,这一个还是地名，就和上一个相加代替这个
+            if isplace:
+                name = placeList[len(placeList) - 1]
+                if zuhe[1].find("组织机构类") >= 0 or zuhe[1].find("世界地区类") >= 0:  # or zuhe[1] == "ns"
+                    isplace = True
+                    new_text = zuhe[0].replace("\n", "")
+                    placeList[len(placeList) - 1] = name + new_text
+                    continue
+            if zuhe[1].find("组织机构类") >= 0 or zuhe[1].find("世界地区类") >= 0:
+                isplace = True
+                new_text = zuhe[0].replace("\n", "")
+                placeList.append(new_text)
+            else:
+                isplace = False
+    # 打印总份数
+    yield "文档地名检查---文档解析完成"
+    placeList=list(dict.fromkeys(placeList))
+    yield placeList
+#主方法
+def checkPlaceName(filename):
+    yield f"文档地名检查---开始处理文档..."  # 每次生成一个数字就发送
+    try:
+        getDocxToTextAll(filename)
+    except Exception as e:
+        logger.warning(e)
+        yield "文档地名检查---文档无法打开，请检查文档内容"
+        return
+    with open("checkPlaceName.txt", "r",encoding='utf-8') as f:
+        gettext = f.read()
+    yield f"文档地名检查---开始解析文档..."  # 每次生成一个数字就发送
+    # propnList=placeNameTask(gettext)
+    for item in placeNameTask(gettext):
+        if isinstance(item, str):
+            yield item
+        else:
+            final_list = item  # 获取最终结果
+    propnStr = ",".join(final_list)
+    messages = [{'role': 'user', 'content': [{'text': propnStr + prompt}]}]
+    runList = []
+    yield f"文档地名检查---结果生成中..."  # 每次生成一个数字就发送
+    cishu=0
+    for rsp in bot.run(messages):
+        runList.append(rsp)
+        if cishu>3:
+            cishu=0
+        yield "文档地名检查---结果生成中"+'.'*cishu
+        cishu+=1
+    data = runList[len(runList) - 1][0]["content"]
+    parsed_data = json_repair.loads(data.replace('`', ''))
+    error_places=[]
+    # 如果需要进一步操作，例如只关注“正确”的回答
+    for place in parsed_data:
+        try:
+            if place['回答'] == '错误':
+                error_places.append(place)
+        except Exception as e:
+            logger.warning(place)
+            logger.warning("文档地名检查---组织提出出错",e)
+            continue
+    logger.info(error_places)
+    returnInfo = "发现异常地名<br>"
+    if len(error_places)>0:
+        for t in error_places:
+            keyword= t['placeName'].replace("\n","")
+        # 查找包含关键字的段落
+            paragraphs = re.findall(r'.*?' + re.escape(keyword) + r'.*?\n', gettext)
+            yuanwen= paragraphs[0].replace(keyword,f"**{keyword}**").replace("\n","")
+            returnInfo+="原文：" + yuanwen + "<br>出现异常地名：**" + keyword + "**！请注意" + "<br>"
+        yield returnInfo
+        logger.info(returnInfo)
+    else:
+        yield "**未发现发现异常地名**"
--- a/checkRepeatText.py
+++ b/checkRepeatText.py
@ -0,0 +1,292 @@
+import uuid
+from langchain_chroma import Chroma
+from langchain_community.embeddings import DashScopeEmbeddings
+from langchain_community.document_loaders import TextLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from qwen_agent.agents import Assistant
+import json_repair
+from paddlenlp import Taskflow
+embeddings = DashScopeEmbeddings(dashscope_api_key="sk-ea89cf04431645b185990b8af8c9bb13")
+device_id=0
+import re
+import time
+from docx import Document
+import shutil
+from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
+from docx.opc.oxml import parse_xml
+import logging
+import logging.config
+
+log_config = {
+    'version': 1,
+    'disable_existing_loggers': False,
+    'formatters': {
+        'standard': {
+            'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        },
+    },
+    'handlers': {
+        'console': {
+            'class': 'logging.StreamHandler',
+            'formatter': 'standard',
+            'level': logging.INFO,
+        },
+        'file': {
+            'class': 'logging.FileHandler',
+            'filename': 'Logger.log',
+            'formatter': 'standard',
+            'level': logging.INFO,
+        },
+    },
+    'loggers': {
+        '': {
+            'handlers': ['console', 'file'],
+            'level': logging.INFO,
+            'propagate': True,
+        },
+    }
+}
+
+logging.config.dictConfig(log_config)
+
+logger = logging.getLogger("checkRepeatText")
+
+def load_from_xml_v2(baseURI, rels_item_xml):
+    """
+    Return |_SerializedRelationships| instance loaded with the
+    relationships contained in *rels_item_xml*. Returns an empty
+    collection if *rels_item_xml* is |None|.
+    """
+    srels = _SerializedRelationships()
+    if rels_item_xml is not None:
+        rels_elm = parse_xml(rels_item_xml)
+        for rel_elm in rels_elm.Relationship_lst:
+            if rel_elm.target_ref in ('../NULL', 'NULL'):
+                continue
+            srels._srels.append(_SerializedRelationship(baseURI, rel_elm))
+    return srels
+
+
+_SerializedRelationships.load_from_xml = load_from_xml_v2
+# 记录程序开始的时间戳
+def getOutlineLevel(inputXml):
+    """
+    功能 从xml字段中提取出<w:outlineLvl w:val="number"/>中的数字number
+    参数 inputXml
+    返回 number
+    """
+    start_index = inputXml.find('<w:outlineLvl')
+    end_index = inputXml.find('>', start_index)
+    number = inputXml[start_index:end_index + 1]
+    number = re.search("\d+", number).group()
+    return number
+
+
+def isTitle(paragraph):
+    """
+    功能 判断该段落是否设置了大纲等级
+    参数 paragraph:段落
+    返回 None:普通正文，没有大纲级别 0:一级标题 1:二级标题 2:三级标题
+    """
+    # 如果是空行，直接返回None
+    if paragraph.text.strip() == '':
+        return None
+
+    # 如果该段落是直接在段落里设置大纲级别的，根据xml判断大纲级别
+    paragraphXml = paragraph._p.xml
+    if paragraphXml.find('<w:outlineLvl') >= 0:
+        return getOutlineLevel(paragraphXml)
+    # 如果该段落是通过样式设置大纲级别的，逐级检索样式及其父样式，判断大纲级别
+    targetStyle = paragraph.style
+    while targetStyle is not None:
+        # 如果在该级style中找到了大纲级别，返回
+        if targetStyle.element.xml.find('<w:outlineLvl') >= 0:
+            return getOutlineLevel(targetStyle.element.xml)
+        else:
+            targetStyle = targetStyle.base_style
+    # 如果在段落、样式里都没有找到大纲级别，返回None
+    return None
+
+#寻找标题名称
+def findTitleName(docxPath):
+    yield '文档相似性检查----检查是否存在详细设计方案'
+    document = Document(docxPath)
+    # 逐段读取docx文档的内容
+    titleWords=[]
+    firstTitle = 0
+    secondTitle = 0
+    sanjiTitle = 0
+    for paragraph in document.paragraphs:
+        # 判断该段落的标题级别
+        # 这里用isTitle()临时代表，具体见下文介绍的方法
+        text = paragraph.text
+        if text.strip():#非空判断
+            level = isTitle(paragraph)
+            if level=="0":
+                firstTitle+=1
+                secondTitle = 0
+                if(text.find("附件")>=0):
+                    continue
+                titleWords.append("一级标题:".format(firstTitle)+text)
+            elif level=="1":
+                secondTitle+=1
+                sanjiTitle=0
+                # words.append("\t"+"{}.{}".format(firstTitle,secondTitle)+text)
+                # titleWords.append("第{}章的二级标题:".format(firstTitle,firstTitle,secondTitle)+text)
+            elif level=="2":
+                sanjiTitle += 1
+                # words.append("\t"+"{}.{}".format(firstTitle,secondTitle)+text)
+                # titleWords.append("第{}章的三级标题".format(firstTitle, secondTitle,firstTitle, secondTitle,sanjiTitle) + text)
+    findTitleName_llm_cfg = {
+    #'model': 'qwen1.5-72b-chat',
+    'model':"qwen2-72b",
+    'model_server': 'http://127.0.0.1:1025/v1',  # base_url, also known as api_base
+    # 'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+    }
+    findTitleName_bot = Assistant(llm=findTitleName_llm_cfg,
+                                    name='Assistant',
+                                    # system_message='1：这样的是一级标题。1.1：这样的是二级标题。1.1.1：这样的是三级标题'
+                                )
+    prompt='''\n是文档的大纲，一级标题组成，哪一章存在与方案相关的内容
+    类似详细设计方案,详细服务方案，详细建设方案为最相关的，优先选择
+    类似设计方案，服务方案，建设方案为次相关，次级选择
+    类似方案是最后选择
+    按照这样的顺序选择最合适的
+    你只能从这两个答案中选择一个：{"name":"一级标题名称","answer":"存在"}或{"name":"","answer":"不存在"}，不做过多的解释,严格按回答格式作答
+    '''
+    # print("\n".join(titleWords)+prompt)
+    messages = [({'role': 'user', 'content': "\n".join(titleWords)+prompt})]
+    runList=[]
+    for rsp in findTitleName_bot.run(messages):
+        runList.append(rsp)
+    data = runList[len(runList) - 1][0]["content"]
+    parsed_data = json_repair.loads(data.replace('`', ''))
+    logger.info(parsed_data)
+    if(parsed_data["answer"]=="存在"):
+        yield parsed_data["name"]
+    else:
+        yield "文档相似性检查----未找到与详细设计方案相关内容，无法进行相似性比较"
+#获取文档中 详细设计方案 章节的所有内容
+def getDocxToText(docxPath,titleName,vector_store_path):
+    document = Document(docxPath)
+    # 逐段读取docx文档的内容
+    levelList=[]
+    words=[]
+    addStart = False
+    levelText=""
+    i = 0
+    for paragraph in document.paragraphs:
+        # 判断该段落的标题级别
+        # 这里用isTitle()临时代表，具体见下文介绍的方法
+        text = paragraph.text
+        if text.strip():#非空判断
+            if titleName:
+                level = isTitle(paragraph)
+                if(addStart and level=="0"):
+                    addStart=False
+                if(level=="0" and (titleName.find(text)>=0 or text.find(titleName)>=0)):
+                    addStart=True
+                if level:
+                    levelList.append("{}：".format(level)+paragraph.text)
+                    levelText=f"{int(level)+1}级标题-"+text
+                else:
+                    if addStart:
+                        if(text.startswith("图") or text.startswith("注：")):
+                            continue
+                        if(len(text)>30):
+                            i=i+1
+                            words.append("{}：".format(levelText)+text)
+    # 将所有段落文本拼接成一个字符串，并用换行符分隔
+    if len(words)==0:
+        raise Exception("checkRepeatText，获取长度为0")
+    text = '\n'.join(words)
+
+    # 将文本写入txt文件
+    with open("checkRepeatText.txt", 'w', ) as txt_file:
+        txt_file.write(text)
+    time.sleep(3)
+    loader = TextLoader(file_path='checkRepeatText.txt')
+    docs = loader.load()
+    # print(docs)
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10, add_start_index=True,
+                                                   separators=["\n\n", "\n"])
+
+    splits = text_splitter.split_documents(docs)
+    uuids = []
+    for i in range(len(splits)):
+        uuids.append(str(uuid.uuid4()))
+    logging.info(f"checkRepeatTextuuidLen{len(uuids)}")
+
+    vectorstore = Chroma(persist_directory=vector_store_path, embedding_function=embeddings)
+    vectorstore.add_documents(documents=splits, ids=uuids)
+    while True:
+        time.sleep(0.3)
+        ress = vectorstore.similarity_search(words[0])
+        if (len(ress) > 0):
+            break
+    return words,uuids,vectorstore
+
+
+# @app.route('/checkRepeatText/<filename>', methods=['GET'])
+def checkRepeatText(filename):
+    yield "文档相似性检查---启动中...."
+    vector_store_path="vector_store"+str(uuid.uuid4())
+    for titleName in findTitleName(filename):
+        yield titleName
+    if(titleName!="文档相似性检查----未找到与详细设计方案相关内容，无法进行相似性比较"):
+        try:
+            yield "文档相似性检查----文档内容解析中"
+            words,uuids,vectorstore=getDocxToText(filename,titleName,vector_store_path)
+        except Exception as e:
+            yield f"文档相似性检查----文档内容获取失败，未找到**{titleName}**相关内容或文档打开失败"
+            return
+    # 记录程序开始的时间戳‘
+        global device_id
+        similarity = Taskflow("text_similarity",device_id=3)
+        # device_id+=1
+        # if(device_id>1):
+        #     device_id=0
+        reslist = []
+        count = 0
+        for i in words:
+            count += 1
+            yield f"文档相似性检查--对{titleName}章节，进行文档内容检查中{count}/{len(words)}"
+            result = vectorstore.similarity_search(i)
+            textTag = i.split("：")[0]
+            for content in result:
+                text = content.page_content
+                tag = text.split("：")[0].replace('\n', '')
+                if (textTag.find(tag) >= 0):
+                    continue
+                try:
+                    res = similarity([[i[i.find('：') + 1:], text[text.find('：') + 1:]]])
+                except Exception as e:
+                    logger.warning("文档相似性检查--发生异常:",e)
+                    logger.warning(i)
+                    logger.warning(text)
+                if (res[0]["similarity"] > 0.90):
+                    # 判断重复内容是否被放入
+                    if (len(reslist) > 0):
+                        isExist = False
+                        for neirong in reslist:
+                            if i in neirong.values():
+                                isExist = True
+                                break
+                        if not isExist:
+                            # reslist.append({"yuanwen1":i[i.find('：') + 1:],"yuanwen2":text[text.find('：') + 1:],"similarity":res[0]["similarity"]})
+                            reslist.append({"yuanwen1":i.replace("\n",""),"yuanwen2":text.replace("\n",""),"similarity":res[0]["similarity"]})
+                    else:
+                        reslist.append({"yuanwen1":i.replace("\n",""),"yuanwen2":text.replace("\n",""),"similarity":res[0]["similarity"]})
+                        # print(i.split("：")[1] + "\n" + text.split("：")[1])
+        # vectorstore.delete(ids=uuids)
+        shutil.rmtree(vector_store_path)
+        logger.info("已删除")
+        logger.info(reslist)
+        resInfo=f"对{titleName}章节，发现相似内容：<br>"
+        if(len(reslist)>0):
+            for res in reslist:
+                resInfo+="【在**"+res["yuanwen1"][:res["yuanwen1"].find('：')]+"**下包含："+res["yuanwen1"][res["yuanwen1"].find('：') + 1:]+"<br>在**"+res["yuanwen2"][:res["yuanwen2"].find('：')]+"**下包含："+res["yuanwen2"][res["yuanwen2"].find('：') + 1:]+"<br>以上两段内容***相似度***："+'{:.2f}'.format(res['similarity'])+"】<br>"
+            yield resInfo
+            logger.info(resInfo)
+        else:
+            yield "未发现相似内容"
--- a/checkTitleName.py
+++ b/checkTitleName.py
@ -0,0 +1,173 @@
+from docx import Document
+from pprint import pprint
+from qwen_agent.agents import Assistant
+import re
+import json_repair
+import math
+from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
+from docx.opc.oxml import parse_xml
+
+
+def load_from_xml_v2(baseURI, rels_item_xml):
+    """
+    Return |_SerializedRelationships| instance loaded with the
+    relationships contained in *rels_item_xml*. Returns an empty
+    collection if *rels_item_xml* is |None|.
+    """
+    srels = _SerializedRelationships()
+    if rels_item_xml is not None:
+        rels_elm = parse_xml(rels_item_xml)
+        for rel_elm in rels_elm.Relationship_lst:
+            if rel_elm.target_ref in ('../NULL', 'NULL'):
+                continue
+            srels._srels.append(_SerializedRelationship(baseURI, rel_elm))
+    return srels
+
+
+_SerializedRelationships.load_from_xml = load_from_xml_v2
+import logging
+import logging.config
+
+log_config = {
+    'version': 1,
+    'disable_existing_loggers': False,
+    'formatters': {
+        'standard': {
+            'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        },
+    },
+    'handlers': {
+        'console': {
+            'class': 'logging.StreamHandler',
+            'formatter': 'standard',
+            'level': logging.INFO,
+        },
+        'file': {
+            'class': 'logging.FileHandler',
+            'filename': 'Logger.log',
+            'formatter': 'standard',
+            'level': logging.INFO,
+        },
+    },
+    'loggers': {
+        '': {
+            'handlers': ['console', 'file'],
+            'level': logging.INFO,
+            'propagate': True,
+        },
+    }
+}
+
+logging.config.dictConfig(log_config)
+
+logger = logging.getLogger("checkCompanyName")
+llm_cfg = {
+    #'model': 'qwen1.5-72b-chat',
+    'model':"qwen2-72b-instruct",
+    'model_server': 'DashScope',  # base_url, also known as api_base
+    'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+}
+bot = Assistant(llm=llm_cfg,
+                name='Assistant',
+                )
+
+
+# 记录程序开始的时间戳
+def getOutlineLevel(inputXml):
+    """
+    功能 从xml字段中提取出<w:outlineLvl w:val="number"/>中的数字number
+    参数 inputXml
+    返回 number
+    """
+    start_index = inputXml.find('<w:outlineLvl')
+    end_index = inputXml.find('>', start_index)
+    number = inputXml[start_index:end_index + 1]
+    number = re.search("\d+", number).group()
+    return number
+
+
+def isTitle(paragraph):
+    """
+    功能 判断该段落是否设置了大纲等级
+    参数 paragraph:段落
+    返回 None:普通正文，没有大纲级别 0:一级标题 1:二级标题 2:三级标题
+    """
+    # 如果是空行，直接返回None
+    if paragraph.text.strip() == '':
+        return None
+
+    # 如果该段落是直接在段落里设置大纲级别的，根据xml判断大纲级别
+    paragraphXml = paragraph._p.xml
+    if paragraphXml.find('<w:outlineLvl') >= 0:
+        return getOutlineLevel(paragraphXml)
+    # 如果该段落是通过样式设置大纲级别的，逐级检索样式及其父样式，判断大纲级别
+    targetStyle = paragraph.style
+    while targetStyle is not None:
+        # 如果在该级style中找到了大纲级别，返回
+        if targetStyle.element.xml.find('<w:outlineLvl') >= 0:
+            return getOutlineLevel(targetStyle.element.xml)
+        else:
+            targetStyle = targetStyle.base_style
+    # 如果在段落、样式里都没有找到大纲级别，返回None
+    return None
+
+#获取文档中 详细设计方案 章节的所有内容
+def getDocxToTitleName(docxPath):
+    document = Document(docxPath)
+    # 逐段读取docx文档的内容
+    levelList=[]
+    words=[]
+    addStart = False
+    levelText=""
+    i = 0
+    for paragraph in document.paragraphs:
+        # 判断该段落的标题级别
+        # 这里用isTitle()临时代表，具体见下文介绍的方法
+        text = paragraph.text
+        if text.strip():#非空判断
+            level = isTitle(paragraph)
+            if level=="0":
+                words.append(text)
+    return words
+
+def checkTitleName(filename):
+
+    yield '文档结构检查----启动中'
+    with open("ce模板.txt", "r",encoding='utf-8') as f:
+        gettext = f.readlines()
+    count=0
+    reserr = []
+    try:
+        word = getDocxToTitleName(filename)
+    except Exception as e:
+        print(e)
+        yield "文档无法打开，请检查文档内容"
+        return
+    for text in gettext:
+        count+=1
+        prompt = f'''
+        \n 这些是文章的标题，请问【{text}】在标题中是否可以配对的，若有请指出是哪个标题，若没有请回到不存在
+        '''
+        xushang="回答格式{‘name’:‘名称’,'answer'：‘回答’，“标题”：“标题”}请严格按照格式回答问题，不要做过多我解释"
+        yield f"文档结构检查----结构分析中{count}/{len(gettext)}"
+        strword = "\n".join(word)+prompt+xushang
+        # print(strword)
+        messages = [{'role': 'user', 'content': [{'text':strword}]}]
+        runList = []
+        cishu = 0
+        for rsp in bot.run(messages):
+            runList.append(rsp)
+            # print(rsp)
+        data = runList[len(runList) - 1][0]["content"]
+        parsed_data = json_repair.loads(data.replace('`', ''))
+        print(parsed_data)
+        if(parsed_data["answer"]=="不存在"):
+            reserr.append(text)
+    resInfo="文档结构存在异常：<br>"
+    if(len(reserr)>0):
+        for i in reserr:
+            resInfo+="**"+i.replace('\n','')+"**<br>"
+        logger.info(resInfo)
+        yield resInfo
+    else:
+        yield "文档结构未发现异常"
--- a/daijian方案.py
+++ b/daijian方案.py
@ -0,0 +1,176 @@
+from docx import Document
+from pprint import pprint
+from qwen_agent.agents import Assistant
+import re
+import json_repair
+import math
+from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
+from docx.opc.oxml import parse_xml
+def load_from_xml_v2(baseURI, rels_item_xml):
+    """
+    Return |_SerializedRelationships| instance loaded with the
+    relationships contained in *rels_item_xml*. Returns an empty
+    collection if *rels_item_xml* is |None|.
+    """
+    srels = _SerializedRelationships()
+    if rels_item_xml is not None:
+        rels_elm = parse_xml(rels_item_xml)
+        for rel_elm in rels_elm.Relationship_lst:
+            if rel_elm.target_ref in ('../NULL', 'NULL'):
+                continue
+            srels._srels.append(_SerializedRelationship(baseURI, rel_elm))
+    return srels
+
+
+_SerializedRelationships.load_from_xml = load_from_xml_v2
+llm_cfg = {
+    #'model': 'qwen1.5-72b-chat',
+    'model':"qwen2-72b-instruct",
+    'model_server': 'DashScope',  # base_url, also known as api_base
+    'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+}
+bot = Assistant(llm=llm_cfg,
+                name='Assistant',
+                )
+
+
+# 记录程序开始的时间戳
+def getOutlineLevel(inputXml):
+    """
+    功能 从xml字段中提取出<w:outlineLvl w:val="number"/>中的数字number
+    参数 inputXml
+    返回 number
+    """
+    start_index = inputXml.find('<w:outlineLvl')
+    end_index = inputXml.find('>', start_index)
+    number = inputXml[start_index:end_index + 1]
+    number = re.search("\d+", number).group()
+    return number
+
+
+def isTitle(paragraph):
+    """
+    功能 判断该段落是否设置了大纲等级
+    参数 paragraph:段落
+    返回 None:普通正文，没有大纲级别 0:一级标题 1:二级标题 2:三级标题
+    """
+    # 如果是空行，直接返回None
+    if paragraph.text.strip() == '':
+        return None
+
+    # 如果该段落是直接在段落里设置大纲级别的，根据xml判断大纲级别
+    paragraphXml = paragraph._p.xml
+    if paragraphXml.find('<w:outlineLvl') >= 0:
+        return getOutlineLevel(paragraphXml)
+    # 如果该段落是通过样式设置大纲级别的，逐级检索样式及其父样式，判断大纲级别
+    targetStyle = paragraph.style
+    while targetStyle is not None:
+        # 如果在该级style中找到了大纲级别，返回
+        if targetStyle.element.xml.find('<w:outlineLvl') >= 0:
+            return getOutlineLevel(targetStyle.element.xml)
+        else:
+            targetStyle = targetStyle.base_style
+    # 如果在段落、样式里都没有找到大纲级别，返回None
+    return None
+
+#获取文档中 详细设计方案 章节的所有内容
+def getDocxToTitleName(docxPath):
+    document = Document(docxPath)
+    # 逐段读取docx文档的内容
+    levelList=[]
+    words=[]
+    addStart = False
+    levelText=""
+    i = 0
+    for paragraph in document.paragraphs:
+        # 判断该段落的标题级别
+        # 这里用isTitle()临时代表，具体见下文介绍的方法
+        text = paragraph.text
+        if text.strip():#非空判断
+            level = isTitle(paragraph)
+            if level=="0":
+                words.append(text)
+    return words
+
+def checkTitleName(filename):
+    prompt = f'''
+            \n 这些是文章的标题，请问【{text}】在标题中是否可以配对的，若有请指出是哪个标题，若没有请回到不存在
+            '''
+    xushang = "回答格式{‘name’:‘名称’,'answer'：‘回答’，“标题”：“标题”}请严格按照格式回答问题，不要做过多我解释"
+    yield f"文档结构检查----结构分析中{count}/{len(gettext)}"
+    strword = "\n".join(word) + prompt + xushang
+    # print(strword)
+    messages = [{'role': 'user', 'content': [{'text': strword}]}]
+    runList = []
+    cishu = 0
+    for rsp in bot.run(messages):
+        runList.append(rsp)
+        # print(rsp)
+    data = runList[len(runList) - 1][0]["content"]
+    parsed_data = json_repair.loads(data.replace('`', ''))
+    print(parsed_data)
+    # yield '文档结构检查----启动中'
+    # with open("ce模板.txt", "r",encoding='utf-8') as f:
+    #     gettext = f.readlines()
+    # count=0
+    # reserr = []
+    # try:
+    #     word = getDocxToTitleName(filename)
+    # except Exception as e:
+    #     print(e)
+    #     yield "文档无法打开，请检查文档内容"
+    #     return
+    # for text in gettext:
+    #     count+=1
+    #     prompt = f'''
+    #     \n 这些是文章的标题，请问【{text}】在标题中是否可以配对的，若有请指出是哪个标题，若没有请回到不存在
+    #     '''
+    #     xushang="回答格式{‘name’:‘名称’,'answer'：‘回答’，“标题”：“标题”}请严格按照格式回答问题，不要做过多我解释"
+    #     yield f"文档结构检查----结构分析中{count}/{len(gettext)}"
+    #     strword = "\n".join(word)+prompt+xushang
+    #     # print(strword)
+    #     messages = [{'role': 'user', 'content': [{'text':strword}]}]
+    #     runList = []
+    #     cishu = 0
+    #     for rsp in bot.run(messages):
+    #         runList.append(rsp)
+    #         # print(rsp)
+    #     data = runList[len(runList) - 1][0]["content"]
+    #     parsed_data = json_repair.loads(data.replace('`', ''))
+    #     print(parsed_data)
+    #     if(parsed_data["answer"]=="不存在"):
+    #         reserr.append(text)
+    # resInfo="文档结构存在异常：<br>"
+    # if(len(reserr)>0):
+    #     for i in reserr:
+    #         resInfo+=f"**{i}**<br>"
+    #     yield resInfo
+    # else:
+    #     yield "文档结构未发现异常"
+
+
+import logging
+
+# 创建一个记录器
+logger = logging.getLogger('my_logger')
+logger.setLevel(logging.DEBUG)
+
+# 创建一个处理器
+ch = logging.StreamHandler()
+ch.setLevel(logging.DEBUG)
+
+# 创建一个格式化器并将其添加到处理器中
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ch.setFormatter(formatter)
+
+# 将处理器添加到记录器中
+logger.addHandler(ch)
+try:
+# 记录一些日志消息
+    logger.debug('这是一个调试消息')
+    logger.info('这是一个信息消息')
+    logger.warning('这是一个警告消息')
+    logger.error('这是一个错误消息')
+    logger.critical('这是一个致命错误消息')
+except Exception as e:
+    logger.warning(e)
--- a/json_repair.py
+++ b/json_repair.py
@ -0,0 +1,712 @@
+"""
+This module will parse the JSON file following the BNF definition:
+
+    <json> ::= <container>
+
+    <primitive> ::= <number> | <string> | <boolean>
+    ; Where:
+    ; <number> is a valid real number expressed in one of a number of given formats
+    ; <string> is a string of valid characters enclosed in quotes
+    ; <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
+
+    <container> ::= <object> | <array>
+    <array> ::= '[' [ <json> *(', ' <json>) ] ']' ; A sequence of JSON values separated by commas
+    <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
+    <member> ::= <string> ': ' <json> ; A pair consisting of a name, and a JSON value
+
+If something is wrong (a missing parantheses or quotes for example) it will use a few simple heuristics to fix the JSON string:
+- Add the missing parentheses if the parser believes that the array or object should be closed
+- Quote strings or add missing single quotes
+- Adjust whitespaces and remove line breaks
+
+All supported use cases are in the unit tests
+"""
+
+import os
+import json
+from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, Literal
+
+
+class StringFileWrapper:
+    # This is a trick to simplify the code, transform the filedescriptor handling into a string handling
+    def __init__(self, fd: TextIO) -> None:
+        self.fd = fd
+        self.length: int = 0
+
+    def __getitem__(self, index: Union[int, slice]) -> str:
+        if isinstance(index, slice):
+            self.fd.seek(index.start)
+            value = self.fd.read(index.stop - index.start)
+            self.fd.seek(index.start)
+            return value
+        else:
+            self.fd.seek(index)
+            return self.fd.read(1)
+
+    def __len__(self) -> int:
+        if self.length < 1:
+            current_position = self.fd.tell()
+            self.fd.seek(0, os.SEEK_END)
+            self.length = self.fd.tell()
+            self.fd.seek(current_position)
+        return self.length
+
+
+class LoggerConfig:
+    # This is a type class to simplify the declaration
+    def __init__(self, log_level: Optional[str]):
+        self.log: List[Dict[str, str]] = []
+        self.window: int = 10
+        self.log_level: str = log_level if log_level else "none"
+
+
+JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
+
+
+class JSONParser:
+    def __init__(
+        self,
+        json_str: Union[str, StringFileWrapper],
+        json_fd: Optional[TextIO],
+        logging: Optional[bool],
+    ) -> None:
+        # The string to parse
+        self.json_str = json_str
+        # Alternatively, the file description with a json file in it
+        if json_fd:
+            # This is a trick we do to treat the file wrapper as an array
+            self.json_str = StringFileWrapper(json_fd)
+        # Index is our iterator that will keep track of which character we are looking at right now
+        self.index: int = 0
+        # This is used in the object member parsing to manage the special cases of missing quotes in key or value
+        self.context: list[str] = []
+        # Use this to log the activity, but only if logging is active
+        self.logger = LoggerConfig(log_level="info" if logging else None)
+
+    def parse(
+        self,
+    ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
+        json = self.parse_json()
+        if self.index < len(self.json_str):
+            self.log(
+                "The parser returned early, checking if there's more json elements",
+                "info",
+            )
+            json = [json]
+            last_index = self.index
+            while self.index < len(self.json_str):
+                j = self.parse_json()
+                if j != "":
+                    json.append(j)
+                if self.index == last_index:
+                    self.index += 1
+                last_index = self.index
+            # If nothing extra was found, don't return an array
+            if len(json) == 1:
+                self.log(
+                    "There were no more elements, returning the element without the array",
+                    "info",
+                )
+                json = json[0]
+        if self.logger.log_level == "none":
+            return json
+        else:
+            return json, self.logger.log
+
+    def parse_json(
+        self,
+    ) -> JSONReturnType:
+        while True:
+            char = self.get_char_at()
+            # This parser will ignore any basic element (string or number) that is not inside an array or object
+            is_in_context = len(self.context) > 0
+            # False means that we are at the end of the string provided
+            if char is False:
+                return ""
+            # <object> starts with '{'
+            elif char == "{":
+                self.index += 1
+                return self.parse_object()
+            # <array> starts with '['
+            elif char == "[":
+                self.index += 1
+                return self.parse_array()
+            # there can be an edge case in which a key is empty and at the end of an object
+            # like "key": }. We return an empty string here to close the object properly
+            elif char == "}":
+                self.log(
+                    "At the end of an object we found a key with missing value, skipping",
+                    "info",
+                )
+                return ""
+            # <string> starts with a quote
+            elif is_in_context and (char in ['"', "'", "“"] or char.isalpha()):
+                return self.parse_string()
+            # <number> starts with [0-9] or minus
+            elif is_in_context and (char.isdigit() or char == "-" or char == "."):
+                return self.parse_number()
+            # If everything else fails, we just ignore and move on
+            else:
+                self.index += 1
+
+    def parse_object(self) -> Dict[str, Any]:
+        # <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
+        obj = {}
+        # Stop when you either find the closing parentheses or you have iterated over the entire string
+        while (self.get_char_at() or "}") != "}":
+            # This is what we expect to find:
+            # <member> ::= <string> ': ' <json>
+
+            # Skip filler whitespaces
+            self.skip_whitespaces_at()
+
+            # Sometimes LLMs do weird things, if we find a ":" so early, we'll change it to "," and move on
+            if (self.get_char_at() or "") == ":":
+                self.log(
+                    "While parsing an object we found a : before a key, ignoring",
+                    "info",
+                )
+                self.index += 1
+
+            # We are now searching for they string key
+            # Context is used in the string parser to manage the lack of quotes
+            self.set_context("object_key")
+
+            self.skip_whitespaces_at()
+
+            # <member> starts with a <string>
+            key = ""
+            while self.get_char_at():
+                key = str(self.parse_string())
+
+                if key != "" or (key == "" and self.get_char_at() == ":"):
+                    # If the string is empty but there is a object divider, we are done here
+                    break
+
+            self.skip_whitespaces_at()
+
+            # We reached the end here
+            if (self.get_char_at() or "}") == "}":
+                continue
+
+            self.skip_whitespaces_at()
+
+            # An extreme case of missing ":" after a key
+            if (self.get_char_at() or "") != ":":
+                self.log(
+                    "While parsing an object we missed a : after a key",
+                    "info",
+                )
+
+            self.index += 1
+            self.reset_context()
+            self.set_context("object_value")
+            # The value can be any valid json
+            value = self.parse_json()
+
+            # Reset context since our job is done
+            self.reset_context()
+            obj[key] = value
+
+            if (self.get_char_at() or "") in [",", "'", '"']:
+                self.index += 1
+
+            # Remove trailing spaces
+            self.skip_whitespaces_at()
+
+        self.index += 1
+        return obj
+
+    def parse_array(self) -> List[Any]:
+        # <array> ::= '[' [ <json> *(', ' <json>) ] ']' ; A sequence of JSON values separated by commas
+        arr = []
+        self.set_context("array")
+        # Stop when you either find the closing parentheses or you have iterated over the entire string
+        while (self.get_char_at() or "]") != "]":
+            self.skip_whitespaces_at()
+            value = self.parse_json()
+
+            # It is possible that parse_json() returns nothing valid, so we stop
+            if value == "":
+                break
+
+            if value == "..." and self.get_char_at(-1) == ".":
+                self.log(
+                    "While parsing an array, found a stray '...'; ignoring it", "info"
+                )
+            else:
+                arr.append(value)
+
+            # skip over whitespace after a value but before closing ]
+            char = self.get_char_at()
+            while char and (char.isspace() or char == ","):
+                self.index += 1
+                char = self.get_char_at()
+
+        # Especially at the end of an LLM generated json you might miss the last "]"
+        char = self.get_char_at()
+        if char and char != "]":
+            self.log(
+                "While parsing an array we missed the closing ], adding it back", "info"
+            )
+            self.index -= 1
+
+        self.index += 1
+        self.reset_context()
+        return arr
+
+    def parse_string(self) -> Union[str, bool, None]:
+        # <string> is a string of valid characters enclosed in quotes
+        # i.e. { name: "John" }
+        # Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
+
+        # Flag to manage corner cases related to missing starting quote
+        missing_quotes = False
+        doubled_quotes = False
+        lstring_delimiter = rstring_delimiter = '"'
+
+        char = self.get_char_at()
+        # A valid string can only start with a valid quote or, in our case, with a literal
+        while char and char not in ['"', "'", "“"] and not char.isalnum():
+            self.index += 1
+            char = self.get_char_at()
+
+        if not char:
+            # This is an empty string
+            return ""
+
+        # Ensuring we use the right delimiter
+        if char == "'":
+            lstring_delimiter = rstring_delimiter = "'"
+        elif char == "“":
+            lstring_delimiter = "“"
+            rstring_delimiter = "”"
+        elif char.isalnum():
+            # This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
+            # But remember, object keys are only of type string
+            if char.lower() in ["t", "f", "n"] and self.get_context() != "object_key":
+                value = self.parse_boolean_or_null()
+                if value != "":
+                    return value
+            self.log(
+                "While parsing a string, we found a literal instead of a quote",
+                "info",
+            )
+            self.log(
+                "While parsing a string, we found no starting quote. Will add the quote back",
+                "info",
+            )
+            missing_quotes = True
+
+        if not missing_quotes:
+            self.index += 1
+
+        # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
+        if self.get_char_at() == lstring_delimiter:
+            # If it's an empty key, this was easy
+            if self.get_context() == "object_key" and self.get_char_at(1) == ":":
+                self.index += 1
+                return ""
+            # Find the next delimiter
+            i = 1
+            next_c = self.get_char_at(i)
+            while next_c and next_c != rstring_delimiter:
+                i += 1
+                next_c = self.get_char_at(i)
+            # Now check that the next character is also a delimiter to ensure that we have "".....""
+            # In that case we ignore this rstring delimiter
+            if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
+                self.log(
+                    "While parsing a string, we found a valid starting doubled quote, ignoring it",
+                    "info",
+                )
+                doubled_quotes = True
+                self.index += 1
+            else:
+                # Ok this is not a doubled quote, check if this is an empty string or not
+                i = 1
+                next_c = self.get_char_at(i)
+                while next_c and next_c.isspace():
+                    i += 1
+                    next_c = self.get_char_at(i)
+                if next_c not in [",", "]", "}"]:
+                    self.log(
+                        "While parsing a string, we found a doubled quote but it was a mistake, removing one quote",
+                        "info",
+                    )
+                    self.index += 1
+
+        # Initialize our return value
+        string_acc = ""
+
+        # Here things get a bit hairy because a string missing the final quote can also be a key or a value in an object
+        # In that case we need to use the ":|,|}" characters as terminators of the string
+        # So this will stop if:
+        # * It finds a closing quote
+        # * It iterated over the entire sequence
+        # * If we are fixing missing quotes in an object, when it finds the special terminators
+        char = self.get_char_at()
+        while char and char != rstring_delimiter:
+            if missing_quotes:
+                if self.get_context() == "object_key" and (
+                    char == ":" or char.isspace()
+                ):
+                    self.log(
+                        "While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
+                        "info",
+                    )
+                    break
+                elif self.get_context() == "object_value" and char in [",", "}"]:
+                    rstring_delimiter_missing = True
+                    # check if this is a case in which the closing comma is NOT missing instead
+                    i = 1
+                    next_c = self.get_char_at(i)
+                    while next_c and next_c != rstring_delimiter:
+                        i += 1
+                        next_c = self.get_char_at(i)
+                    if next_c:
+                        i += 1
+                        next_c = self.get_char_at(i)
+                        # found a delimiter, now we need to check that is followed strictly by a comma or brace
+                        while next_c and next_c.isspace():
+                            i += 1
+                            next_c = self.get_char_at(i)
+                        if next_c and next_c in [",", "}"]:
+                            rstring_delimiter_missing = False
+                    if rstring_delimiter_missing:
+                        self.log(
+                            "While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
+                            "info",
+                        )
+                        break
+            string_acc += char
+            self.index += 1
+            char = self.get_char_at()
+            if char and len(string_acc) > 0 and string_acc[-1] == "\\":
+                # This is a special case, if people use real strings this might happen
+                self.log("Found a stray escape sequence, normalizing it", "info")
+                string_acc = string_acc[:-1]
+                if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
+                    escape_seqs = {"t": "\t", "n": "\n", "r": "\r", "b": "\b"}
+                    string_acc += escape_seqs.get(char, char) or char
+                    self.index += 1
+                    char = self.get_char_at()
+            # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
+            if char == rstring_delimiter:
+                # Special case here, in case of double quotes one after another
+                if doubled_quotes and self.get_char_at(1) == rstring_delimiter:
+                    self.log(
+                        "While parsing a string, we found a doubled quote, ignoring it",
+                        "info",
+                    )
+                    self.index += 1
+                elif missing_quotes and self.get_context() == "object_value":
+                    # In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
+                    i = 1
+                    next_c = self.get_char_at(i)
+                    while next_c and next_c not in [
+                        rstring_delimiter,
+                        lstring_delimiter,
+                    ]:
+                        i += 1
+                        next_c = self.get_char_at(i)
+                    if next_c:
+                        # We found a quote, now let's make sure there's a ":" following
+                        i += 1
+                        next_c = self.get_char_at(i)
+                        # found a delimiter, now we need to check that is followed strictly by a comma or brace
+                        while next_c and next_c.isspace():
+                            i += 1
+                            next_c = self.get_char_at(i)
+                        if next_c and next_c == ":":
+                            # Reset the cursor
+                            self.index -= 1
+                            char = self.get_char_at()
+                            self.log(
+                                "In a string with missing quotes and object value context, I found a delimeter but it turns out it was the beginning on the next key. Stopping here.",
+                                "info",
+                            )
+                            break
+                else:
+                    # Check if eventually there is a rstring delimiter, otherwise we bail
+                    i = 1
+                    next_c = self.get_char_at(i)
+                    check_comma_in_object_value = True
+                    while next_c and next_c not in [
+                        rstring_delimiter,
+                        lstring_delimiter,
+                    ]:
+                        # This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
+                        # This is because the routine after will make sure to correct any bad guess and this solves a corner case
+                        if check_comma_in_object_value and next_c.isalpha():
+                            check_comma_in_object_value = False
+                        # If we are in an object context, let's check for the right delimiters
+                        if (
+                            ("object_key" in self.context and next_c in [":", "}"])
+                            or ("object_value" in self.context and next_c == "}")
+                            or ("array" in self.context and next_c in ["]", ","])
+                            or (
+                                check_comma_in_object_value
+                                and self.get_context() == "object_value"
+                                and next_c == ","
+                            )
+                        ):
+                            break
+                        i += 1
+                        next_c = self.get_char_at(i)
+                    # If we stopped for a comma in object_value context, let's check if find a "} at the end of the string
+                    if next_c == "," and self.get_context() == "object_value":
+                        i += 1
+                        next_c = self.get_char_at(i)
+                        while next_c and next_c != rstring_delimiter:
+                            i += 1
+                            next_c = self.get_char_at(i)
+                        # Ok now I found a delimiter, let's skip whitespaces and see if next we find a }
+                        i += 1
+                        next_c = self.get_char_at(i)
+                        while next_c and next_c.isspace():
+                            i += 1
+                            next_c = self.get_char_at(i)
+                        if next_c == "}":
+                            # OK this is valid then
+                            self.log(
+                                "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here since this is the last element of the object, ignoring it",
+                                "info",
+                            )
+                            string_acc += str(char)
+                            self.index += 1
+                            char = self.get_char_at()
+                    elif next_c == rstring_delimiter:
+                        if self.get_context() == "object_value":
+                            # But this might not be it! This could be just a missing comma
+                            # We found a delimiter and we need to check if this is a key
+                            # so find a rstring_delimiter and a colon after
+                            i += 1
+                            next_c = self.get_char_at(i)
+                            while next_c and next_c != rstring_delimiter:
+                                i += 1
+                                next_c = self.get_char_at(i)
+                            i += 1
+                            next_c = self.get_char_at(i)
+                            while next_c and next_c != ":":
+                                if next_c in [
+                                    lstring_delimiter,
+                                    rstring_delimiter,
+                                    ",",
+                                ]:
+                                    break
+                                i += 1
+                                next_c = self.get_char_at(i)
+                            # Only if we fail to find a ':' then we know this is misplaced quote
+                            if next_c != ":":
+                                self.log(
+                                    "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
+                                    "info",
+                                )
+                                string_acc += str(char)
+                                self.index += 1
+                                char = self.get_char_at()
+
+        if (
+            char
+            and missing_quotes
+            and self.get_context() == "object_key"
+            and char.isspace()
+        ):
+            self.log(
+                "While parsing a string, handling an extreme corner case in which the LLM added a comment instead of valid string, invalidate the string and return an empty value",
+                "info",
+            )
+            self.skip_whitespaces_at()
+            if self.get_char_at() not in [":", ","]:
+                return ""
+
+        # A fallout of the previous special case in the while loop,
+        # we need to update the index only if we had a closing quote
+        if char != rstring_delimiter:
+            self.log(
+                "While parsing a string, we missed the closing quote, ignoring",
+                "info",
+            )
+        else:
+            self.index += 1
+
+        return string_acc.rstrip()
+
+    def parse_number(self) -> Union[float, int, str, JSONReturnType]:
+        # <number> is a valid real number expressed in one of a number of given formats
+        number_str = ""
+        number_chars = set("0123456789-.eE/,")
+        char = self.get_char_at()
+        is_array = self.get_context() == "array"
+        while char and char in number_chars and (char != "," or not is_array):
+            number_str += char
+            self.index += 1
+            char = self.get_char_at()
+        if len(number_str) > 1 and number_str[-1] in "-eE/,":
+            # The number ends with a non valid character for a number/currency, rolling back one
+            number_str = number_str[:-1]
+            self.index -= 1
+        try:
+            if "," in number_str:
+                return str(number_str)
+            if "." in number_str or "e" in number_str or "E" in number_str:
+                return float(number_str)
+            elif number_str == "-":
+                # If there is a stray "-" this will throw an exception, throw away this character
+                return self.parse_json()
+            else:
+                return int(number_str)
+        except ValueError:
+            return number_str
+
+    def parse_boolean_or_null(self) -> Union[bool, str, None]:
+        # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
+        starting_index = self.index
+        char = (self.get_char_at() or "").lower()
+        value: Optional[Tuple[str, Optional[bool]]]
+        if char == "t":
+            value = ("true", True)
+        elif char == "f":
+            value = ("false", False)
+        elif char == "n":
+            value = ("null", None)
+
+        if value:
+            i = 0
+            while char and i < len(value[0]) and char == value[0][i]:
+                i += 1
+                self.index += 1
+                char = (self.get_char_at() or "").lower()
+            if i == len(value[0]):
+                return value[1]
+
+        # If nothing works reset the index before returning
+        self.index = starting_index
+        return ""
+
+    def get_char_at(self, count: int = 0) -> Union[str, Literal[False]]:
+        # Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
+        try:
+            return self.json_str[self.index + count]
+        except IndexError:
+            return False
+
+    def skip_whitespaces_at(self) -> None:
+        """
+        This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
+        """
+        try:
+            char = self.json_str[self.index]
+        except IndexError:
+            return
+        while char.isspace():
+            self.index += 1
+            try:
+                char = self.json_str[self.index]
+            except IndexError:
+                return
+
+    def set_context(self, value: str) -> None:
+        # If a value is provided update the context variable and save in stack
+        if value:
+            self.context.append(value)
+
+    def reset_context(self) -> None:
+        self.context.pop()
+
+    def get_context(self) -> str:
+        return self.context[-1]
+
+    def log(self, text: str, level: str) -> None:
+        if level == self.logger.log_level:
+            context = ""
+            start = max(self.index - self.logger.window, 0)
+            end = min(self.index + self.logger.window, len(self.json_str))
+            context = self.json_str[start:end]
+            self.logger.log.append(
+                {
+                    "text": text,
+                    "context": context,
+                }
+            )
+
+
+def repair_json(
+    json_str: str = "",
+    return_objects: bool = False,
+    skip_json_loads: bool = False,
+    logging: bool = False,
+    json_fd: Optional[TextIO] = None,
+    ensure_ascii: bool = True,
+) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
+    """
+    Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
+    It will return the fixed string by default.
+    When `return_objects=True` is passed, it will return the decoded data structure instead.
+    When `skip_json_loads=True` is passed, it will not call the built-in json.loads() function
+    When `logging=True` is passed, it will return a tuple with the repaired json and a log of all repair actions
+    """
+    parser = JSONParser(json_str, json_fd, logging)
+    if skip_json_loads:
+        parsed_json = parser.parse()
+    else:
+        try:
+            if json_fd:
+                parsed_json = json.load(json_fd)
+            else:
+                parsed_json = json.loads(json_str)
+        except json.JSONDecodeError:
+            parsed_json = parser.parse()
+    # It's useful to return the actual object instead of the json string,
+    # it allows this lib to be a replacement of the json library
+    if return_objects or logging:
+        return parsed_json
+    return json.dumps(parsed_json, ensure_ascii=ensure_ascii)
+
+
+def loads(
+    json_str: str,
+    skip_json_loads: bool = False,
+    logging: bool = False,
+) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
+    """
+    This function works like `json.loads()` except that it will fix your JSON in the process.
+    It is a wrapper around the `repair_json()` function with `return_objects=True`.
+    """
+    return repair_json(
+        json_str=json_str,
+        return_objects=True,
+        skip_json_loads=skip_json_loads,
+        logging=logging,
+    )
+
+
+def load(
+    fd: TextIO, skip_json_loads: bool = False, logging: bool = False
+) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
+    """
+    This function works like `json.load()` except that it will fix your JSON in the process.
+    It is a wrapper around the `repair_json()` function with `json_fd=fd` and `return_objects=True`.
+    """
+    return repair_json(
+        json_fd=fd,
+        return_objects=True,
+        skip_json_loads=skip_json_loads,
+        logging=logging,
+    )
+
+
+def from_file(
+    filename: str,
+    skip_json_loads: bool = False,
+    logging: bool = False,
+) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
+    """
+    This function is a wrapper around `load()` so you can pass the filename as string
+    """
+    fd = open(filename)
+    jsonobj = load(fd, skip_json_loads, logging)
+    fd.close()
+
+    return jsonobj
--- a/main.py
+++ b/main.py
@ -0,0 +1,161 @@
+from flask import Flask, request, jsonify,Response
+import os
+from checkPlaceName import checkPlaceName
+from checkRepeatText import checkRepeatText
+from checkCompanyName import checkCompanyName
+from checkDocumentError import getDocumentError
+from checkTitleName import checkTitleName
+from flask_cors import CORS
+import qwen_agenttext
+app = Flask(__name__)
+cros = CORS(app)
+UPLOAD_FOLDER = 'uploads'
+usableTag=[0,0,0,0,0,0,0,0]
+if not os.path.exists(UPLOAD_FOLDER):
+    os.makedirs(UPLOAD_FOLDER)
+@app.route('/upload', methods=['POST'])
+def upload_file():
+    if 'file' not in request.files:
+        return jsonify({"error": "No file part"}), 400
+    file = request.files['file']
+    if file.filename == '':
+        return jsonify({"error": "No selected file"}), 400
+    if file:
+        filename = file.filename
+        file.save(os.path.join(UPLOAD_FOLDER,filename))
+        return jsonify({"message": "File uploaded successfully"}), 200
+@app.route('/stream' ,methods=["GET", "POST"])
+def stream_numbers():
+    context= request.args.get('context')
+    # def generate_numbers():
+    #     event_id=0
+    #     for number in range(1, 10):
+    #         json_data = json.dumps({"number": number})
+    #         print(json_data)
+    #         event_id += 1
+    #         yield f"id: {event_id}\n"
+    #         yield f"event: time-update\n"
+    #         yield f"data: {json_data}\n\n"  # 每次生成一个数字就发送
+    #         time.sleep(0.5)  # 为了演示，加入短暂延迟
+    #     json_data = json.dumps({"number": "done"})
+    #     yield f"id: {1}\n"
+    #     yield f"event: time-update\n"
+    #     yield f"data: {json_data}\n\n"  # 发送完成信号
+
+    headers = {
+        "Content-Type": "text/event-stream",
+        "Cache-Control": "no-cache",
+        "X-Accel-Buffering": "no",
+        "Access-Control-Allow-Origin": "*",
+        "Access-Control-Allow-Methods": "GET,POST",
+        "Access-Control-Allow-Headers": "x-requested-with,content-type",
+    }
+    return Response(qwen_agenttext.getxinx(context),headers=headers)
+@app.route('/sse/checkRepeatText', methods=['GET'])
+def checkRepeatTextWeb():
+    filename = request.args.get('filename')
+
+    def generate_checkRepeatText(filename):
+        id=0
+        try:
+            for i in checkRepeatText(filename):
+                yield f"id: {id+1}\n"
+                yield f"event: checkRepeatText\n"
+                yield f"data: {i}\n\n"  # 发送完成信号
+        except Exception as e:
+            yield f"id: {id+1}\n"
+            yield f"event: checkRepeatText\n"
+            yield f"data: **程序出现异常**\n\n"  # 发送完成信号
+    headers = {
+        "Content-Type": "text/event-stream",
+        "Cache-Control": "no-cache",
+        "X-Accel-Buffering": "no",
+        "Access-Control-Allow-Origin": "*",
+        "Access-Control-Allow-Methods": "GET,POST",
+        "Access-Control-Allow-Headers": "x-requested-with,content-type",
+    }
+    return Response(generate_checkRepeatText(filename), headers=headers)
+
+
+@app.route('/sse/checkPlaceName', methods=['GET'])
+def checkPlaceNameWebSse():
+    filename = request.args.get('filename')
+
+    def generate_checkPlaceName(filename):
+        id=0
+        for i in checkPlaceName(filename):
+            yield f"id: {id+1}\n"
+            yield f"event: checkPlaceName\n"
+            yield f"data: {i}\n\n"  # 发送完成信号
+    headers = {
+        "Content-Type": "text/event-stream",
+        "Cache-Control": "no-cache",
+        "X-Accel-Buffering": "no",
+        "Access-Control-Allow-Origin": "*",
+        "Access-Control-Allow-Methods": "GET,POST",
+        "Access-Control-Allow-Headers": "x-requested-with,content-type",
+    }
+    return Response(generate_checkPlaceName(filename), headers=headers)
+@app.route('/sse/checkCompanyName', methods=['GET'])
+def checkCompanyNameWebSse():
+    filename = request.args.get('filename')
+
+    def generate_checkCompanyName(filename):
+        id = 0
+        for i in checkCompanyName(filename):
+            yield f"id: {id + 1}\n"
+            yield f"event: checkCompanyName\n"
+            yield f"data: {i}\n\n"  # 发送完成信号
+
+    headers = {
+        "Content-Type": "text/event-stream",
+        "Cache-Control": "no-cache",
+        "X-Accel-Buffering": "no",
+        "Access-Control-Allow-Origin": "*",
+        "Access-Control-Allow-Methods": "GET,POST",
+        "Access-Control-Allow-Headers": "x-requested-with,content-type",
+    }
+    return Response(generate_checkCompanyName(filename), headers=headers)
+
+@app.route('/sse/checkDocumentErrorWeb', methods=['GET'])
+def checkDocumentErrorWebSse():
+    filename = request.args.get('filename')
+
+    def generate_checkDocumentError(filename):
+        id = 0
+        for i in getDocumentError(filename):
+            yield f"id: {id + 1}\n"
+            yield f"event: getDocumentError\n"
+            yield f"data: {i}\n\n"  # 发送完成信号
+
+    headers = {
+        "Content-Type": "text/event-stream",
+        "Cache-Control": "no-cache",
+        "X-Accel-Buffering": "no",
+        "Access-Control-Allow-Origin": "*",
+        "Access-Control-Allow-Methods": "GET,POST",
+        "Access-Control-Allow-Headers": "x-requested-with,content-type",
+    }
+    return Response(generate_checkDocumentError(filename), headers=headers)
+@app.route('/sse/checkTitleName', methods=['GET'])
+def checkTitleNameWebSse():
+    filename = request.args.get('filename')
+
+    def generate_checkTitleName(filename):
+        id = 0
+        for i in checkTitleName(filename):
+            yield f"id: {id + 1}\n"
+            yield f"event: checkTitleName\n"
+            yield f"data: {i}\n\n"  # 发送完成信号
+
+    headers = {
+        "Content-Type": "text/event-stream",
+        "Cache-Control": "no-cache",
+        "X-Accel-Buffering": "no",
+        "Access-Control-Allow-Origin": "*",
+        "Access-Control-Allow-Methods": "GET,POST",
+        "Access-Control-Allow-Headers": "x-requested-with,content-type",
+    }
+    return Response(generate_checkTitleName(filename), headers=headers)
+if __name__ == '__main__':
+    app.run(host="0.0.0.0",port=80)
--- a/qwen_agenttext.py
+++ b/qwen_agenttext.py
@ -0,0 +1,132 @@
+import pprint
+import urllib.parse
+import json5
+from qwen_agent.agents import Assistant
+from qwen_agent.tools.base import BaseTool, register_tool
+import requests
+import baidusearch
+import tqdm
+
+# 使用示例
+
+
+
+# Step 1 (Optional): Add a custom tool named `my_image_gen`.
+@register_tool('my_image_gen')
+class MyImageGen(BaseTool):
+    # The `description` tells the agent the functionality of this tool.
+    description = 'AI painting (image generation) service, input text description, and return the image URL drawn based on text information.'
+    # The `parameters` tell the agent what input parameters the tool has.
+    parameters = [{
+        'name': 'prompt',
+        'type': 'string',
+        'description': 'Detailed description of the desired image content, in English',
+        'required': True
+    }]
+
+    def call(self, params: str, **kwargs) -> str:
+        # `params` are the arguments generated by the LLM agent.
+        prompt = json5.loads(params)['prompt']
+        # 对提示词进行URL编码
+        prompt = urllib.parse.quote(prompt)
+        #
+        return json5.dumps(
+            {'image_url': f'https://image.pollinations.ai/prompt/{prompt}'},
+            ensure_ascii=False)
+
+
+@register_tool('chaxun')
+class MyImageGen(BaseTool):
+    # The `description` tells the agent the functionality of this tool.
+    description = '如果你不会，请使用此工具进行联网查询'
+    # The `parameters` tell the agent what input parameters the tool has.
+    parameters = [{
+        'name': 'prompt',
+        'type': 'string',
+        'description': '请你描述需要提问的信息,以此帮助你了解更多的信息',
+        'required': True
+    }]
+
+    def call(self, params: str, **kwargs) -> str:
+        # `params` are the arguments generated by the LLM agent.
+        prompt = json5.loads(params)['prompt']
+        # 对提示词进行URL编码
+        prompt = urllib.parse.quote(prompt)
+        #
+        search_tool = baidusearch.search(prompt, num_results=20)
+        print(search_tool)
+        return search_tool
+# Step 2: Configure the LLM you are using.
+# 这里是需要配置模型的地方。需要填写模型名字，以及model_server，即模型所在服务器名字，如果没有，也可以考虑使用api_key。
+llm_cfg = {
+    # Use the model service provided by DashScope:
+    # model：模型名称
+    # model_server：模型所在的服务器
+    # api_key： 所使用到的api-key，可以显示的设置，也可以从环境变量中获取
+
+    'model':"qwen2-72b-instruct",
+    'model_server': 'DashScope',  # base_url, also known as api_base
+    'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+    # 'api_key': 'YOUR_DASHSCOPE_API_KEY',
+    # It will use the `DASHSCOPE_API_KEY' environment variable if 'api_key' is not set here.
+
+    # Use a model service compatible with the OpenAI API, such as vLLM or Ollama:
+    # 'model': 'Qwen1.5-7B-Chat',
+    # 'model_server': 'http://localhost:8000/v1',  # base_url, also known as api_base
+    # 'api_key': 'EMPTY',
+
+    # (Optional) LLM hyperparameters for generation:
+    # 用于调整生成参数的可选配置
+    'generate_cfg': {
+        'top_p': 0.8
+    }
+}
+
+# Step 3: Create an agent. Here we use the `Assistant` agent as an example, which is capable of using tools and reading files.
+
+# agent的提示词指令
+system_instruction = '''
+你是一个乐于助人的助手。
+收到用户的请求后，您应：
+你应该进行思考，判断是否使用工具，
+如果遇到你不会回答,请使用工具[chaxun]
+'''
+
+# 工具列表，指定Assistant可以访问的工具，一个是自定义的工具，一个是代码执行器
+tools = ["chaxun"]  # `code_interpreter` is a built-in tool for executing code.
+# 助理可以读取的文件路径
+# files = ['./examples/resource/doc.pdf']  # Give the bot a PDF file to read.
+
+# 初始化Assistant
+bot = Assistant(llm=llm_cfg,
+                system_message=system_instruction,
+                function_list=tools,
+                # files=files
+                )
+
+# Step 4: Run the agent as a chatbot.
+messages = []  # This stores the chat history.
+def getxinx(context):
+    # For example, enter the query "draw a dog and rotate it 90 degrees".
+    # query = input('user query: ')
+    # Append the user query to the chat history.
+    messages=[({'role': 'user', 'content': context})]
+    print(messages)
+    response = []
+    event_id = 0
+    for rsp in bot.run(messages=messages):
+        response.append(rsp)
+        yield "请稍等.."
+    # len()
+    # for i in bot.run(messages=messages):
+    #     #     for number in range(1, 10):
+    #     print(i)
+    #     print(i[len(i)-1]['content'])
+    #     event_id += 1
+    #     yield f"id: {event_id}\n"
+    #     yield f"event: time-update\n"
+    #     if(i[len(i)-1]['role']=='assistant'):
+    #         yield "data: {}\n\n".format(str(i[len(i)-1]['content'].replace('\n\n','')))  # 每次生成一个数字就发送
+    #     else:
+    #         yield f"data: \n\n"  # 每次生成一个数字就发送
+    # Streaming output.
--- a/test.py
+++ b/test.py
@ -0,0 +1,109 @@
+import time
+import json
+import math
+from flask import Flask,Response,request
+from flask_sse import  sse
+from flask_cors import CORS
+import re
+import qwen_agenttext
+app = Flask(__name__)
+cros = CORS(app)
+# SSE 推送函数
+import paddle;
+paddle.device.get_available_device()
+
+
+# SSE 推送路由
+
+
+# @app.route('/register', methods=["GET"])
+# def register():
+    # 获取客户端标识符
+    # client_id = str(uuid.uuid4())
+    #
+    # # 返回 SSE 响应
+    # return jsonify({"client_id": client_id})
+
+
+# SSE 推送路由
+
+
+# @app.route('/sse', methods=['POST'])
+# def stream():
+#     # 获取客户端标识符
+#     client_id = 1
+#     print("client_id", client_id)
+#
+#     def aa():
+#         # 循环发送 SSE 数据
+#         for i in range(10):
+#             data = 'Hello, %s!' % client_id + str(i)
+#             print(data)
+#             sse.publish(data, channel=client_id, type='message')
+#             time.sleep(1)
+#         sse.publish("end", channel=client_id, type='message')
+#
+#     # 返回 SSE 响应
+#     response = Response(aa(), mimetype='text/event-stream')
+#     response.headers.add('Cache-Control', 'no-cache')
+#     response.headers.add('Connection', 'keep-alive')
+#     response.headers.add('X-Accel-Buffering', 'no')
+#     return response
+#
+#
+#
+# @app.route('/stream' ,methods=["GET", "POST"])
+# def stream_numbers():
+#     context= request.args.get('context')
+#
+#
+#     headers = {
+#         "Content-Type": "text/event-stream",
+#         "Cache-Control": "no-cache",
+#         "X-Accel-Buffering": "no",
+#         "Access-Control-Allow-Origin": "*",
+#         "Access-Control-Allow-Methods": "GET,POST",
+#         "Access-Control-Allow-Headers": "x-requested-with,content-type",
+#     }
+#     return Response(generate_numbers(),headers=headers)
+# def generate_numbers():
+#     event_id=0
+#     # for number in range(1, 10):
+#     #     json_data = json.dumps({"number": number})
+#     #     print(json_data)
+#     #     event_id += 1
+#     #     yield f"id: {event_id}\n"
+#     #     yield f"event: time-update\n"
+#     #     yield f"data: {json_data}\n\n"  # 每次生成一个数字就发送
+#     json_data = json.dumps({"number": "done"})
+#     yield f"id: {1}\n"
+#     yield f"event: time-update\n"
+#     yield f"data: 34568\n\n"  # 发送完成信号
+# if __name__ == '__main__':
+#
+#
+#     # 读取文件内容
+#     with open("checkPlaceName.txt", "r", encoding='utf-8') as f:
+#         gettext = f.read()
+#     batchNum=20
+#     sentences = re.split(r'[。\n]', gettext)
+#     # 去掉空字符
+#     sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
+#     # 计算总字符数
+#     total_chars = len(sentences)
+#
+#     # 计算有多少份
+#     num_chunks = math.ceil(total_chars / batchNum)
+#
+#     # 按batchNum字为一份进行处理
+#     chunks = [sentences[i:i + batchNum] for i in range(0, total_chars, batchNum)]
+#
+#     # 打印每一份的内容
+#     for i, chunk in enumerate(chunks):
+#         print(f"Chunk {i + 1}:")
+#         print(chunk)
+#         print("-" * 40)
+#
+#     # 打印总份数
+#     print(f"Total chunks: {num_chunks}")
+#     app.run(debug=True,port=80)
--- a/workspace/1.png
+++ b/workspace/1.png
--- a/workspace/image14.png
+++ b/workspace/image14.png
--- a/workspace/image15.png
+++ b/workspace/image15.png
--- a/workspace/image16.png
+++ b/workspace/image16.png
--- a/workspace/image17.png
+++ b/workspace/image17.png
--- a/workspace/image18.png
+++ b/workspace/image18.png
--- a/workspace/image19.png
+++ b/workspace/image19.png
--- a/workspace/image20.png
+++ b/workspace/image20.png
--- a/workspace/tools/code_interpreter/05613c9c-c910-455d-8c8b-62b7dc243b2a.png
+++ b/workspace/tools/code_interpreter/05613c9c-c910-455d-8c8b-62b7dc243b2a.png
--- a/workspace/tools/code_interpreter/1560f103-f2dc-49e3-88c2-35f5d500bc1d.png
+++ b/workspace/tools/code_interpreter/1560f103-f2dc-49e3-88c2-35f5d500bc1d.png
--- a/workspace/tools/code_interpreter/4aa3a1fe-7fc2-440f-8bd9-653ee1721776.png
+++ b/workspace/tools/code_interpreter/4aa3a1fe-7fc2-440f-8bd9-653ee1721776.png
--- a/workspace/tools/code_interpreter/54b7ad57-9c89-4977-b49a-eaf7e60b9656.png
+++ b/workspace/tools/code_interpreter/54b7ad57-9c89-4977-b49a-eaf7e60b9656.png
--- a/workspace/tools/code_interpreter/c8cba059-ac85-42b0-b197-1c8e1e7182c9.png
+++ b/workspace/tools/code_interpreter/c8cba059-ac85-42b0-b197-1c8e1e7182c9.png
--- a/workspace/tools/code_interpreter/kernel_connection_file_0eb57682-3a22-44c8-bedb-a4871b813c3c_19796.json
+++ b/workspace/tools/code_interpreter/kernel_connection_file_0eb57682-3a22-44c8-bedb-a4871b813c3c_19796.json
@ -0,0 +1,12 @@
+{
+  "shell_port": 3199,
+  "iopub_port": 3205,
+  "stdin_port": 3200,
+  "control_port": 3201,
+  "hb_port": 3209,
+  "ip": "127.0.0.1",
+  "key": "41711130-ba4287db5e2a6e7b98444c31",
+  "transport": "tcp",
+  "signature_scheme": "hmac-sha256",
+  "kernel_name": ""
+}
--- a/workspace/tools/code_interpreter/kernel_connection_file_113f0326-0345-475c-85c1-86af71d668c0_24876.json
+++ b/workspace/tools/code_interpreter/kernel_connection_file_113f0326-0345-475c-85c1-86af71d668c0_24876.json
@ -0,0 +1,12 @@
+{
+  "shell_port": 36295,
+  "iopub_port": 36301,
+  "stdin_port": 36296,
+  "control_port": 36297,
+  "hb_port": 36305,
+  "ip": "127.0.0.1",
+  "key": "0faec31a-0f91a316abd70cf50f57dbad",
+  "transport": "tcp",
+  "signature_scheme": "hmac-sha256",
+  "kernel_name": ""
+}
--- a/workspace/tools/code_interpreter/kernel_connection_file_599899c4-4f00-44c1-bba5-1bcc31eb535c_12240.json
+++ b/workspace/tools/code_interpreter/kernel_connection_file_599899c4-4f00-44c1-bba5-1bcc31eb535c_12240.json
@ -0,0 +1,12 @@
+{
+  "shell_port": 5355,
+  "iopub_port": 5362,
+  "stdin_port": 5356,
+  "control_port": 5358,
+  "hb_port": 5366,
+  "ip": "127.0.0.1",
+  "key": "de89d28a-7beb5da33100363d2c20fd6b",
+  "transport": "tcp",
+  "signature_scheme": "hmac-sha256",
+  "kernel_name": ""
+}
--- a/workspace/tools/code_interpreter/kernel_connection_file_a3131ded-afec-43fa-95eb-d2f35548a411_39868.json
+++ b/workspace/tools/code_interpreter/kernel_connection_file_a3131ded-afec-43fa-95eb-d2f35548a411_39868.json
@ -0,0 +1,12 @@
+{
+  "shell_port": 3079,
+  "iopub_port": 3085,
+  "stdin_port": 3080,
+  "control_port": 3081,
+  "hb_port": 3089,
+  "ip": "127.0.0.1",
+  "key": "1825b8a3-a33137bc69e3375f26f384a3",
+  "transport": "tcp",
+  "signature_scheme": "hmac-sha256",
+  "kernel_name": ""
+}
--- a/workspace/tools/code_interpreter/kernel_connection_file_b4447d65-4542-4bd2-89ff-b33b5fb00ac5_1068.json
+++ b/workspace/tools/code_interpreter/kernel_connection_file_b4447d65-4542-4bd2-89ff-b33b5fb00ac5_1068.json
@ -0,0 +1,12 @@
+{
+  "shell_port": 36740,
+  "iopub_port": 36746,
+  "stdin_port": 36741,
+  "control_port": 36742,
+  "hb_port": 36750,
+  "ip": "127.0.0.1",
+  "key": "ac6de478-4a3be71d79c2c63da7065148",
+  "transport": "tcp",
+  "signature_scheme": "hmac-sha256",
+  "kernel_name": ""
+}
--- a/workspace/tools/code_interpreter/kernel_connection_file_d624f7a6-914d-48c1-b902-4e298f92b671_20484.json
+++ b/workspace/tools/code_interpreter/kernel_connection_file_d624f7a6-914d-48c1-b902-4e298f92b671_20484.json
@ -0,0 +1,12 @@
+{
+  "shell_port": 2563,
+  "iopub_port": 2569,
+  "stdin_port": 2564,
+  "control_port": 2565,
+  "hb_port": 2573,
+  "ip": "127.0.0.1",
+  "key": "7e020774-be96933cbe5aaad90c1c9bfc",
+  "transport": "tcp",
+  "signature_scheme": "hmac-sha256",
+  "kernel_name": ""
+}
--- a/workspace/tools/code_interpreter/kernel_connection_file_ec74ca73-6455-4a78-96b1-542747f19a25_39260.json
+++ b/workspace/tools/code_interpreter/kernel_connection_file_ec74ca73-6455-4a78-96b1-542747f19a25_39260.json
@ -0,0 +1,12 @@
+{
+  "shell_port": 5840,
+  "iopub_port": 5846,
+  "stdin_port": 5841,
+  "control_port": 5842,
+  "hb_port": 5850,
+  "ip": "127.0.0.1",
+  "key": "e4c27d68-1c3a9dfa16551f35481b05b8",
+  "transport": "tcp",
+  "signature_scheme": "hmac-sha256",
+  "kernel_name": ""
+}
--- a/workspace/tools/code_interpreter/launch_kernel_0eb57682-3a22-44c8-bedb-a4871b813c3c_19796.py
+++ b/workspace/tools/code_interpreter/launch_kernel_0eb57682-3a22-44c8-bedb-a4871b813c3c_19796.py
@ -0,0 +1,3 @@
+
+from ipykernel import kernelapp as app
+app.launch_new_instance()
--- a/workspace/tools/code_interpreter/launch_kernel_113f0326-0345-475c-85c1-86af71d668c0_24876.py
+++ b/workspace/tools/code_interpreter/launch_kernel_113f0326-0345-475c-85c1-86af71d668c0_24876.py
@ -0,0 +1,3 @@
+
+from ipykernel import kernelapp as app
+app.launch_new_instance()
--- a/workspace/tools/code_interpreter/launch_kernel_599899c4-4f00-44c1-bba5-1bcc31eb535c_12240.py
+++ b/workspace/tools/code_interpreter/launch_kernel_599899c4-4f00-44c1-bba5-1bcc31eb535c_12240.py
@ -0,0 +1,3 @@
+
+from ipykernel import kernelapp as app
+app.launch_new_instance()
--- a/workspace/tools/code_interpreter/launch_kernel_a3131ded-afec-43fa-95eb-d2f35548a411_39868.py
+++ b/workspace/tools/code_interpreter/launch_kernel_a3131ded-afec-43fa-95eb-d2f35548a411_39868.py
@ -0,0 +1,3 @@
+
+from ipykernel import kernelapp as app
+app.launch_new_instance()
--- a/workspace/tools/code_interpreter/launch_kernel_b4447d65-4542-4bd2-89ff-b33b5fb00ac5_1068.py
+++ b/workspace/tools/code_interpreter/launch_kernel_b4447d65-4542-4bd2-89ff-b33b5fb00ac5_1068.py
@ -0,0 +1,3 @@
+
+from ipykernel import kernelapp as app
+app.launch_new_instance()
--- a/workspace/tools/code_interpreter/launch_kernel_d624f7a6-914d-48c1-b902-4e298f92b671_20484.py
+++ b/workspace/tools/code_interpreter/launch_kernel_d624f7a6-914d-48c1-b902-4e298f92b671_20484.py
@ -0,0 +1,3 @@
+
+from ipykernel import kernelapp as app
+app.launch_new_instance()
--- a/workspace/tools/code_interpreter/launch_kernel_ec74ca73-6455-4a78-96b1-542747f19a25_39260.py
+++ b/workspace/tools/code_interpreter/launch_kernel_ec74ca73-6455-4a78-96b1-542747f19a25_39260.py
@ -0,0 +1,3 @@
+
+from ipykernel import kernelapp as app
+app.launch_new_instance()
--- a/workspace/tools/code_interpreter/temp_image.png
+++ b/workspace/tools/code_interpreter/temp_image.png
--- a/workspace/tools/doc_parser/53dea512c5e030d7ad12f34dceaecc2a3c5bcb058907ae3495d60e5876b079a2_500
+++ b/workspace/tools/doc_parser/53dea512c5e030d7ad12f34dceaecc2a3c5bcb058907ae3495d60e5876b079a2_500
--- a/workspace/tools/simple_doc_parser/53dea512c5e030d7ad12f34dceaecc2a3c5bcb058907ae3495d60e5876b079a2_ori
+++ b/workspace/tools/simple_doc_parser/53dea512c5e030d7ad12f34dceaecc2a3c5bcb058907ae3495d60e5876b079a2_ori
--- a/代码段存储.py
+++ b/代码段存储.py
@ -0,0 +1,140 @@
+from docx import Document
+from paddlenlp import Taskflow
+from pprint import pprint
+from qwen_agent.agents import Assistant
+import re
+import json_repair
+import time
+import math
+tagTask = Taskflow("ner")
+prompt='''
+.上述文本判断地名是否正确，你可以使用工具利用互联网查询，你只能在[正确,错误,简称,未知]三种选项中选择答案,回答格式[{“placeName”:“地名”,"回答":"答案"},{“placeName”:“地名”,"回答":"答案"}]，不做过多的解释,严格按回答格式作答;
+不做过多的解释,严格按回答格式作答;
+'''
+# prompt='''
+# .请回答以上问题，
+# ,回答格式[{“placeName”:"原文","回答":"答案"},{“placeName”:"原文","回答":"答案"}]，不做过多的解释,严格按回答格式作答;
+# 不做过多的解释,严格按回答格式作答;
+# '''
+llm_cfg = {
+    #'model': 'qwen1.5-72b-chat',
+    'model':"qwen2-72b",
+    'model_server': 'http://127.0.0.1:1025/v1',  # base_url, also known as api_base
+    # 'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+}
+bot = Assistant(llm=llm_cfg,
+                name='Assistant',
+                # description='使用RAG检索并回答，支持文件类型：PDF/Word/PPT/TXT/HTML。'
+                )
+#获取全文内容
+def getDocxToTextAll(name):
+    docxPath=name
+    document = Document(docxPath)
+    # 逐段读取docx文档的内容
+    levelList=[]
+    words=[]
+    addStart = False
+    levelText=""
+    i = 0
+    for paragraph in document.paragraphs:
+        # 判断该段落的标题级别
+        # 这里用isTitle()临时代表，具体见下文介绍的方法
+        text = paragraph.text
+        if text.strip():#非空判断
+            # print("非空")
+            words.append(text)
+    # 将所有段落文本拼接成一个字符串，并用换行符分隔
+    print("placeNameTask",len(words))
+    text = '\n'.join(words)
+
+    # 将文本写入txt文件
+    with open("checkPlaceName.txt", 'w', encoding='utf-8') as txt_file:
+        txt_file.write(text)
+
+#得到全文和地名有关的内容
+def placeNameTask(text):
+    batchNum=20
+    sentences = re.split(r'[。\n]', text)
+    # 去掉空字符
+    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
+    # 计算总字符数
+    total_chars = len(sentences)
+
+    # 计算有多少份
+    num_chunks = math.ceil(total_chars / batchNum)
+
+    # 按batchNum字为一份进行处理
+    chunks = [sentences[i:i + batchNum] for i in range(0, total_chars, batchNum)]
+    placeList = []
+    # 打印每一份的内容
+    for i, chunk in enumerate(chunks):
+        yield f"文档地名检查---文档解析进度:{i + 1}/{num_chunks}"
+
+        wenBen=".".join(chunk)
+        print(chunk)
+        res = tagTask(wenBen)
+        isplace = False
+        for zuhe in res:
+            # 上一个的地名,这一个还是地名，就和上一个相加代替这个
+            if isplace:
+                name = placeList[len(placeList) - 1]
+                if zuhe[1].find("组织机构类") >= 0 or zuhe[1].find("世界地区类") >= 0:  # or zuhe[1] == "ns"
+                    isplace = True
+                    new_text = zuhe[0].replace("\n", "")
+                    placeList[len(placeList) - 1] = name + new_text
+                    continue
+            if zuhe[1].find("组织机构类") >= 0 or zuhe[1].find("世界地区类") >= 0:
+                isplace = True
+                new_text = zuhe[0].replace("\n", "")
+                placeList.append(new_text)
+            else:
+                isplace = False
+        print("-" * 40)
+    # 打印总份数
+    yield "文档地名检查---文档解析完成"
+    placeList=list(dict.fromkeys(placeList))
+    yield placeList
+#主方法
+def checkPlaceName(filename):
+    yield f"文档地名检查---开始处理文档..."  # 每次生成一个数字就发送
+    getDocxToTextAll(filename)
+    with open("checkPlaceName.txt", "r",encoding='utf-8') as f:
+        gettext = f.read()
+    yield f"文档地名检查---开始解析文档..."  # 每次生成一个数字就发送
+    # propnList=placeNameTask(gettext)
+    for item in placeNameTask(gettext):
+        if isinstance(item, str):
+            yield item
+        else:
+            final_list = item  # 获取最终结果
+    propnStr = ",".join(final_list)
+    print("placeNameTask",propnStr)
+    messages = [{'role': 'user', 'content': [{'text': propnStr + prompt}]}]
+    runList = []
+    yield f"文档地名检查---结果生成中..."  # 每次生成一个数字就发送
+    cishu=0
+    for rsp in bot.run(messages):
+        runList.append(rsp)
+        if cishu>3:
+            cishu=0
+        yield "文档地名检查---结果生成中"+'.'*cishu
+        cishu+=1
+    data = runList[len(runList) - 1][0]["content"]
+    print("placeNameTask",data)
+    parsed_data = json_repair.loads(data.replace('`', ''))
+
+    # 如果需要进一步操作，例如只关注“正确”的回答
+    error_places = [place for place in parsed_data if place['回答'] == '错误']
+    print("placeNameTask",error_places)
+    returnInfo = "发现异常地名<br />";
+    if len(error_places)>0:
+        for t in error_places:
+            keyword= t['placeName']
+        # 查找包含关键字的段落
+            paragraphs = re.findall(r'.*?' + re.escape(keyword) + r'.*?\n', gettext)
+            yuanwen= paragraphs[0].replace(keyword,f"**{keyword}**").replace("\n","")
+            returnInfo+="原文：" + yuanwen + "<br />出现异常地名：**" + keyword + "**！请注意" + "<br />";
+        yield returnInfo
+        print(returnInfo)
+    else:
+        yield "**未发现发现异常地名**"
--- a/文档一二级标题识别与提取.py
+++ b/文档一二级标题识别与提取.py
@ -0,0 +1,118 @@
+import re
+import time
+from docx import Document
+from pprint import pprint
+# from paddlenlp import Taskflow
+#
+# similarity = Taskflow("text_similarity", truncation=True, max_length=102400)
+
+
+def getOutlineLevel(inputXml):
+    """
+    功能 从xml字段中提取出<w:outlineLvl w:val="number"/>中的数字number
+    参数 inputXml
+    返回 number
+    """
+    start_index = inputXml.find('<w:outlineLvl')
+    end_index = inputXml.find('>', start_index)
+    number = inputXml[start_index:end_index + 1]
+    number = re.search("\d+", number).group()
+    return number
+
+
+def isTitle(paragraph):
+    """
+    功能 判断该段落是否设置了大纲等级
+    参数 paragraph:段落
+    返回 None:普通正文，没有大纲级别 0:一级标题 1:二级标题 2:三级标题
+    """
+    # 如果是空行，直接返回None
+    if paragraph.text.strip() == '':
+        return None
+
+    # 如果该段落是直接在段落里设置大纲级别的，根据xml判断大纲级别
+    paragraphXml = paragraph._p.xml
+    if paragraphXml.find('<w:outlineLvl') >= 0:
+        return getOutlineLevel(paragraphXml)
+    # 如果该段落是通过样式设置大纲级别的，逐级检索样式及其父样式，判断大纲级别
+    targetStyle = paragraph.style
+    while targetStyle is not None:
+        # 如果在该级style中找到了大纲级别，返回
+        if targetStyle.element.xml.find('<w:outlineLvl') >= 0:
+            return getOutlineLevel(targetStyle.element.xml)
+        else:
+            targetStyle = targetStyle.base_style
+    # 如果在段落、样式里都没有找到大纲级别，返回None
+    return None
+
+def getDocxToText12biaoti(name):
+    document = Document(name)
+    # 逐段读取docx文档的内容
+    levelList=[]
+    words=[]
+    levelText=""
+    i = 0
+    firstTitle = 0
+    secondTitle = 0
+    sanjiTitle = 0
+    for paragraph in document.paragraphs:
+        # 判断该段落的标题级别
+        # 这里用isTitle()临时代表，具体见下文介绍的方法
+        text = paragraph.text
+
+        if text.strip():#非空判断
+            # print("非空")
+            # words.append(text)
+            level = isTitle(paragraph)
+            if level=="0":
+                firstTitle+=1
+                secondTitle = 0
+                if(text.find("附件")>=0):
+                    continue
+                words.append("{}:".format(firstTitle)+text)
+            elif level=="1":
+                secondTitle+=1
+                sanjiTitle=0
+                # words.append("\t"+"{}.{}".format(firstTitle,secondTitle)+text)
+                words.append("{}.{}".format(firstTitle,secondTitle)+text)
+            elif level=="2":
+                sanjiTitle += 1
+                # words.append("\t"+"{}.{}".format(firstTitle,secondTitle)+text)
+                words.append("{}.{}.{}".format(firstTitle, secondTitle,sanjiTitle) + text)
+
+
+    # 将所有段落文本拼接成一个字符串，并用换行符分隔
+    print(len(words))
+    if len(words)==0:
+        raise Exception("I know python!")
+    text = '\n'.join(words)
+    with open("ce1.txt", 'w',encoding="utf-8") as txt_file:
+        txt_file.write(text)
+    return words
+mobanList=[]
+dangqianList=[]
+errorList =[]
+    # 将文本写入txt文件
+# with open("ce模板.txt", 'r',encoding="utf-8") as txt_file:
+#     for i in txt_file:
+#         i=re.sub(r'[\t\n]', '', i)
+#         mobanList.append(i)
+# pprint(mobanList)
+# dangqianList=getDocxToText12biaoti("1.docx")
+# if len(dangqianList)!=len(mobanList):
+#     print("标题数量与模板不一致")
+# for num in range(len(mobanList)):
+#     moban = mobanList[num]
+#     dangqian= dangqianList[num]
+#     fenshu=similarity([[dangqian,moban]])
+#     pprint(fenshu)
+#     if (fenshu[0]["similarity"]<0.85):
+#         errorList.append(dangqianList)
+# getDocxToText12biaoti("1.docx")
+# pprint(errorList)
+
+prompt = '''{}这是文档大纲，根据大纲分析文档中是否有{}这块内容的描述,若不存在请回答不存在
+'''
+dagang ="1"
+biaozhun="2"
+print(prompt.format(dagang, biaozhun))
--- a/文档图片提取.py
+++ b/文档图片提取.py
@ -0,0 +1,282 @@
+import re
+import os
+import docx
+from docx.document import Document
+from docx.text.paragraph import Paragraph
+from docx.parts.image import ImagePart
+from qwen_agent.agents import Assistant
+
+from docx.oxml.table import CT_Tbl
+from docx.oxml.text.paragraph import CT_P
+
+import shutil
+import re
+import json_repair
+import uuid
+
+# 记录程序开始的时间戳
+def getOutlineLevel(inputXml):
+    """
+    功能 从xml字段中提取出<w:outlineLvl w:val="number"/>中的数字number
+    参数 inputXml
+    返回 number
+    """
+    start_index = inputXml.find('<w:outlineLvl')
+    end_index = inputXml.find('>', start_index)
+    number = inputXml[start_index:end_index + 1]
+    number = re.search("\d+", number).group()
+    return number
+
+
+def isTitle(paragraph):
+    """
+    功能 判断该段落是否设置了大纲等级
+    参数 paragraph:段落
+    返回 None:普通正文，没有大纲级别 0:一级标题 1:二级标题 2:三级标题
+    """
+    # 如果是空行，直接返回None
+    if paragraph.text.strip() == '':
+        return None
+
+    # 如果该段落是直接在段落里设置大纲级别的，根据xml判断大纲级别
+    paragraphXml = paragraph._p.xml
+    if paragraphXml.find('<w:outlineLvl') >= 0:
+        return getOutlineLevel(paragraphXml)
+    # 如果该段落是通过样式设置大纲级别的，逐级检索样式及其父样式，判断大纲级别
+    targetStyle = paragraph.style
+    while targetStyle is not None:
+        # 如果在该级style中找到了大纲级别，返回
+        if targetStyle.element.xml.find('<w:outlineLvl') >= 0:
+            return getOutlineLevel(targetStyle.element.xml)
+        else:
+            targetStyle = targetStyle.base_style
+    # 如果在段落、样式里都没有找到大纲级别，返回None
+    return None
+
+
+# 该行只能有一个图片
+def is_image(graph: Paragraph, doc: Document):
+    images = graph._element.xpath('.//pic:pic')  # 获取所有图片
+    for image in images:
+        for img_id in image.xpath('.//a:blip/@r:embed'):  # 获取图片id
+            part = doc.part.related_parts[img_id]  # 根据图片id获取对应的图片
+            if isinstance(part, ImagePart):
+                return True
+    return False
+
+
+# 获取图片（该行只能有一个图片）
+def get_ImagePart(graph: Paragraph, doc: Document):
+    images = graph._element.xpath('.//pic:pic')  # 获取所有图片
+    for image in images:
+        for img_id in image.xpath('.//a:blip/@r:embed'):  # 获取图片id
+            part = doc.part.related_parts[img_id]  # 根据图片id获取对应的图片
+            if isinstance(part, ImagePart):
+                return part
+    return None
+#寻找标题名称
+def findTitleName(docxPath):
+    yield '文档图片信息检查----检查是否存在详细设计方案'
+    document = docx.Document(docxPath)
+    # 逐段读取docx文档的内容
+    titleWords=[]
+    firstTitle = 0
+    secondTitle = 0
+    sanjiTitle = 0
+    for paragraph in document.paragraphs:
+        # 判断该段落的标题级别
+        # 这里用isTitle()临时代表，具体见下文介绍的方法
+        text = paragraph.text
+        if text.strip():#非空判断
+            level = isTitle(paragraph)
+            if level=="0":
+                firstTitle+=1
+                secondTitle = 0
+                if(text.find("附件")>=0):
+                    continue
+                titleWords.append("一级标题:".format(firstTitle)+text)
+            elif level=="1":
+                secondTitle+=1
+                sanjiTitle=0
+                # words.append("\t"+"{}.{}".format(firstTitle,secondTitle)+text)
+                # titleWords.append("第{}章的二级标题:".format(firstTitle,firstTitle,secondTitle)+text)
+            elif level=="2":
+                sanjiTitle += 1
+                # words.append("\t"+"{}.{}".format(firstTitle,secondTitle)+text)
+                # titleWords.append("第{}章的三级标题".format(firstTitle, secondTitle,firstTitle, secondTitle,sanjiTitle) + text)
+    findTitleName_llm_cfg = {
+    # 'model':"qwen2-72b",
+    # 'model_server': 'http://127.0.0.1:1025/v1',  # base_url, also known as api_base
+        'model': "qwen2-72b-instruct",
+        'model_server': 'DashScope',  # base_url, also known as api_base
+        'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+    }
+    findTitleName_bot = Assistant(llm=findTitleName_llm_cfg,
+                                    name='Assistant',
+                                    # system_message='1：这样的是一级标题。1.1：这样的是二级标题。1.1.1：这样的是三级标题'
+                                )
+    prompt='''\n是文档的大纲，一级标题组成，哪一章存在与方案相关的内容
+    类似详细设计方案,详细服务方案，详细建设方案为最相关的，优先选择
+    类似设计方案，服务方案，建设方案为次相关，次级选择
+    类似方案是最后选择
+    按照这样的顺序选择最合适的
+    你只能从这两个答案中选择一个：{"name":"一级标题名称","answer":"存在"}或{"name":"","answer":"不存在"}，不做过多的解释,严格按回答格式作答
+    '''
+    # print("\n".join(titleWords)+prompt)
+    messages = [({'role': 'user', 'content': "\n".join(titleWords)+prompt})]
+    runList=[]
+    for rsp in findTitleName_bot.run(messages):
+        runList.append(rsp)
+    data = runList[len(runList) - 1][0]["content"]
+    parsed_data = json_repair.loads(data.replace('`', ''))
+    print(parsed_data)
+    if(parsed_data["answer"]=="存在"):
+        print("存在",parsed_data["name"])
+        yield parsed_data["name"]
+    else:
+        print("不存在",parsed_data["name"])
+        yield "文档图片信息检查----未找到与详细设计方案相关内容，无法进行图文检查"
+def saveImage(fileName,titleName,imagePath):
+    fristName=""
+    doc = docx.Document(fileName)
+    for paragraph in doc.paragraphs:
+        # 判断该段落的标题级别
+        # 这里用isTitle()临时代表，具体见下文介绍的方法
+        text = paragraph.text
+        if text.strip():  # 非空判断
+            level = isTitle(paragraph)
+            if level == "0":
+                fristName = text
+                print(text)
+            if level:
+                levelText = f"{int(level) + 1}级标题-" + text
+        else:
+            # 空说明是表格或者图片
+            r = is_image(paragraph, doc)
+            if r and fristName == titleName:
+                part = get_ImagePart(paragraph, doc)
+                img_name = levelText+"_"+ os.path.basename(part.partname)
+                with open(f'{imagePath}/{img_name}', "wb") as f:
+                    f.write(part.blob)
+                #保存完成后，上传大模型进行分析
+def checkImageText(filename):
+    llm_cfg_vl = {
+        #'model': 'qwen1.5-72b-chat',qwen2-72b-instruct
+        'model':"qwen-vl-max",
+        'model_server': 'DashScope',  # base_url, also known as api_base
+        'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+    }
+    botImage = Assistant(llm=llm_cfg_vl,
+                    name='Assistant',
+                    # system_message="你是一个地理专家，可以准确的判断地理位置，如果你不确定，可以使用工具"1_image4
+                    )
+    llm_cfg = {
+        #'model': 'qwen1.5-72b-chat',
+        'model':"qwen2-72b-instruct",
+        'model_server': 'DashScope',  # base_url, also known as api_base
+        'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+    }
+    bot = Assistant(llm=llm_cfg,
+                    name='Assistant',
+                    # description='使用RAG检索并回答，支持文件类型：PDF/Word/PPT/TXT/HTML。'
+
+                    )
+    for titleName in findTitleName(filename):
+        yield titleName
+    if (titleName != "文档图片信息检查----未找到与详细设计方案相关内容，无法进行图文检查"):
+        yield "文档图片信息检查----文档内容解析中"
+        imagePath = "Image" + str(uuid.uuid4())
+        os.mkdir(imagePath)
+        saveImage(filename,titleName,imagePath)
+        imagePathList = os.listdir(imagePath)
+        count = 0
+        resMap={}
+        for image in imagePathList:
+            count+=1
+            yield f"文档图片信息检查---当前处理进度{count}/{len(imagePathList)}"
+            outpath=os.path.join("imagePath", image)
+            print(outpath)
+            messagesImage = [{'role': 'user', "content": [{"image": outpath}, {"text": '提取图片中的信息，每个信息进行自动分类，不要出现与图中无关的信息，不要删减，不要修改，不要总结内容，不做过多的解释,严格按要求作答'}]}]
+            runListImage = []
+            for rsp in botImage.run(messagesImage):
+                runListImage.append(rsp)
+            data = runListImage[len(runListImage) - 1][0]["content"]
+            print(str(data))
+            prompt='''
+            依次上述内容是否与文档有关，你只能在[无关，有关]选项中选择答案,
+            按照这样的格式回答[{“text”：“内容”,"answer":"答案"},{“text”：“内容”,"answer":"答案"}]不做过多的解释,严格按回答格式作答
+            '''
+            messages = [{'role': 'user', 'content': [{'text':str(data)+prompt},{"file":filename}]}]
+            runList = []
+            for rsp in bot.run(messages):
+                runList.append(rsp)
+            textdata = runList[len(runList) - 1][0]["content"]
+            print(textdata)
+            parsed_data = json_repair.loads(textdata)
+            print(parsed_data)
+            for res in parsed_data:
+                if (res["answer"] == "无关"):
+                    print("无关", res["name"])
+                    map = resMap.get(image)
+                    if map:
+                        #存在map说明之前已经保存过了
+                        resMap[image]=map+"，"+res["text"]
+                    else:
+                        resMap[image]=res["text"]
+            out=''
+            if(len(resMap)>0):
+                for key,value in resMap:
+                    out+=f"在{key}图片中,{value}以上内容在文档中未出现相关描述<br>"
+                yield out
+            else:
+                yield "文档图片信息检查----图文符合要求"
+            shutil.rmtree(imagePath)
+        # except Exception as e:
+        #     yield f"文档图片信息检查----未找到与详细设计方案相关内容，无法进行图文检查"
+        #     return
+for i in checkImageText("1.docx"):
+    print(i)
+# import docx
+# doc = docx.Document('1.docx')
+# dict_rel = doc.part._rels  # rels其实是个目录
+# for rel in dict_rel:
+#     rel = dict_rel[rel]
+#     print("rel", rel.target_ref)
+#     if "image" in rel.target_ref:
+#         # create_dir(desc_path)
+#         img_name = re.findall("/(.*)", rel.target_ref)[0]  # windos:/
+#         print("img_name", img_name)
+#         word_name = os.path.splitext("1.docx")[0]
+#         print("word_name", word_name)
+#         #检查文件路径分隔符（os.sep），并根据不同的操作系统（Windows或Unix/Linux）处理文件名。
+#         if os.sep in word_name:
+#             new_name = word_name.split('\\')[-1]
+#         else:
+#             new_name = word_name.split('/')[-1]
+#         img_name = f'{new_name}_{img_name}'
+#         print(img_name)
+#         desc_path='workspace'
+#         with open(f'{desc_path}/{img_name}', "wb") as f:
+#             f.write(rel.target_part.blob)
+# #
+# # # prompt='''
+# # # .根据上述文本判断，是否为非泛化的公司或组织名称，你可以使用工具利用互联网查询，你只能在[非泛化的公司或组织名称,公益组织,统称,泛化名称,政府单位,机关单位,学校，委员单位]选项中选择答案,回答格式[{“placeName”：“名称”,"回答":"答案"}]，不做过多的解释,严格按回答格式作答;
+# # # '''
+# llm_cfg_vl = {
+#     #'model': 'qwen1.5-72b-chat',qwen2-72b-instruct
+#     'model':"qwen-vl-max",
+#     'model_server': 'DashScope',  # base_url, also known as api_base
+#     'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+# }
+# botvl = Assistant(llm=llm_cfg_vl,
+#                 name='Assistant',
+#                 # system_message="你是一个地理专家，可以准确的判断地理位置，如果你不确定，可以使用工具"1_image4
+#                 )
+# messages = [{'role': 'user', "content": [{"image": "workspace/1.png"},{"text": '提取图片中的信息，每个信息进行自动分类，不要出现与图中无关的信息，不要删减，不要修改，不要总结内容，不做过多的解释,严格按要求作答'}]}]
+# runList = []
+# for rsp in botvl.run(messages):
+#     runList.append(rsp)
+#     print(rsp)
+# data = runList[len(runList) - 1][0]["content"]
+# print(str(data))
+
--- a/服务器文件/checkCompanyName.py
+++ b/服务器文件/checkCompanyName.py
@ -0,0 +1,133 @@
+# -*- coding:utf-8 -*-
+import time
+from docx import  Document
+from paddlenlp import Taskflow
+from qwen_agent.agents import Assistant
+import re
+import json_repair
+wordtag  = Taskflow("knowledge_mining")
+
+prompt = '''
+.根据上述文本判断，是否为具体的公司或组织名称，你可以使用工具利用互联网查询，
+你只能在[具体的公司或组织名称,公益组织,简称,统称,泛化组织,政府单位,机关单位,学校，行业类型，其他]选项中选择答案,
+回答格式[{“companyName”：“名称”,"回答":"答案"}，{“companyName”：“名称”,"回答":"答案"}]，不做过多的解释,严格按回答格式作答;
+'''
+llm_cfg = {
+    #'model': 'qwen1.5-72b-chat',
+    'model':"qwen2-72b",
+    'model_server': 'http://127.0.0.1:1025/v1',  # base_url, also known as api_base
+    # 'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+}
+bot = Assistant(llm=llm_cfg,
+                name='Assistant',
+                # system_message="你是一个地理专家，可以准确的判断地理位置，如果你不确定，可以使用工具"
+                )
+
+def getDocxToTextAll(name):
+    docxPath=name
+    document = Document(docxPath)
+    # 逐段读取docx文档的内容
+    levelList=[]
+    words=[]
+    addStart = False
+    levelText=""
+    i = 0
+    for paragraph in document.paragraphs:
+        # 判断该段落的标题级别
+        # 这里用isTitle()临时代表，具体见下文介绍的方法
+        text = paragraph.text
+        if text.strip():#非空判断
+            # print("非空")
+            words.append(text)
+    # 将所有段落文本拼接成一个字符串，并用换行符分隔
+    print("checkCompanyName",len(words))
+    text = '\n'.join(words)
+
+    # 将文本写入txt文件
+    with open("checkCompanyName.txt", 'w', encoding='utf-8') as txt_file:
+        txt_file.write(text)
+def checkCompanyName(filename):
+    getDocxToTextAll(filename)
+    start_time=time.time()
+    error_places = []
+    for batch in read_file_in_batches('checkCompanyName.txt'):
+        res=process_batch(batch)
+        if(len(res)>0):
+            error_places.extend(res)
+
+    print(error_places)
+    end_time = time.time()
+    # 计算执行时间
+    elapsed_time = end_time - start_time
+    print(f"checkCompanyName程序执行时间: {elapsed_time} 秒")
+    return error_places
+    
+def read_file_in_batches(file_path, batch_size=5000):
+    """
+    分批读取文本文件
+    :param file_path: 文件路径
+    :param batch_size: 每批处理的字符数
+    :return: 生成器，每次返回一批文本
+    """
+    with open(file_path, 'r', encoding='utf-8') as file:
+        batch = []
+        char_count = 0
+        for line in file:
+            batch.append(line)
+            char_count += len(line)
+            if char_count >= batch_size:
+                yield ''.join(batch)
+                batch = []
+                char_count = 0
+        if batch:
+            yield ''.join(batch)
+
+def process_batch(batch):
+    """
+    处理一批文本
+    :param batch: 一批文本
+    """
+    # 在这里添加你的处理逻辑
+
+    # sentences = re.split(r'[。\n]', batch)
+    # sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
+    res=wordtag(batch)
+    placeList = []
+    isplace = False
+    for zuhe in res[0]['items']:
+        # 上一个的地名,这一个还是地名，就和上一个相加代替这个
+        zhi = zuhe.get("wordtag_label")
+        if isplace:
+            name = placeList[len(placeList) - 1]
+            if zhi.find("组织机构类")>=0 :  # or zuhe[1] == "ns"
+                isplace = True
+                new_text = zuhe['item'].replace("\n", "")
+                placeList[len(placeList) - 1] = name + new_text
+                continue
+        if zhi.find("组织机构类")>=0 :
+            isplace = True
+            new_text = zuhe['item'].replace("\n", "")
+            placeList.append(new_text)
+        else:
+            isplace = False
+    placeList=list(dict.fromkeys(placeList))
+    placeStr = ",".join(placeList)
+    messages = [{'role': 'user', 'content': [{'text': placeStr+prompt}]}]
+    print("checkCompanyName",placeStr+prompt)
+    runList = []
+    for rsp in bot.run(messages):
+        runList.append(rsp)
+    data = runList[len(runList) - 1][0]["content"]
+    print("checkCompanyName",data)
+    parsed_data = json_repair.loads(data.replace('`', ''))
+    error_places = [place for place in parsed_data if place['回答'] == '具体的公司或组织名称']
+    print("checkCompanyName",error_places)
+    if len(error_places)>0:
+        for t in error_places:
+            keyword= t['companyName']
+        # 查找包含关键字的段落
+            paragraphs = re.findall(r'.*?' + re.escape(keyword) + r'.*?\n', batch)
+            t["yuanwen"]=paragraphs[0]
+        return error_places
+    else:
+        return error_places
--- a/服务器文件/checkDocumentError.py
+++ b/服务器文件/checkDocumentError.py
@ -0,0 +1,226 @@
+#-*- coding:utf-8 -*-
+# from pycorrector import MacBertCorrector
+# m = MacBertCorrector("shibing624/macbert4csc-base-chinese")
+from qwen_agent.agents import Assistant
+from docx import  Document
+from pprint import pprint
+import re
+from paddlenlp import Taskflow
+import json
+import time
+import json_repair
+print(json_repair.loads('{"name":""aaaa"}'))
+start_time = time.time()
+corrector = Taskflow("text_correction")
+llm_cfg = {
+    #'model': 'qwen1.5-72b-chat',
+    'model':"qwen2-72b",
+    'model_server': 'http://127.0.0.1:1025/v1',  # base_url, also known as api_base
+    # 'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+}
+bot = Assistant(llm=llm_cfg,
+                name='Assistant',
+                # description='使用RAG检索并回答，支持文件类型：PDF/Word/PPT/TXT/HTML。'
+
+                )
+# prompt='''
+# 是否存在错别字，若存在请指出，不做其他方面的校验，你只能在[存在，不存在，未知]选项中选择答案,
+# 回答格式[{“placeName”：“原文”,"改正后":"改正的内容","回答":"答案"},{“placeName”：“原文”,"改正后":"改正的内容","回答":"答案"}]，不做过多的解释,严格按回答格式作答;
+# '''
+prompt='''
+请回答以上问题，[是，否]选项中选择答案,原文内容，标点符号保持不变，如果有错请给出解析，没有错则不用给解析
+回答格式请按照以下json格式[{"placeName":"序号","回答":"答案","jianyi","解析"},{"placeName":"序号","回答":"答案","jianyi","解析"}]，不做过多的解释,严格按回答格式作答;
+'''
+def getDocxToTextAll(name):
+    docxPath=name
+    document = Document(docxPath)
+    # 逐段读取docx文档的内容
+    levelList=[]
+    words=[]
+    addStart = False
+    levelText=""
+    i = 0
+    for paragraph in document.paragraphs:
+        # 判断该段落的标题级别
+        # 这里用isTitle()临时代表，具体见下文介绍的方法
+        text = paragraph.text
+        if text.strip():#非空判断
+            # print("非空")
+            words.append(text)
+    # 将所有段落文本拼接成一个字符串，并用换行符分隔
+    print("checkDocumentError",len(words))
+    text = '\n'.join(words)
+
+    # 将文本写入txt文件
+    with open("checkDocumentError.txt", 'w', encoding='utf-8') as txt_file:
+        txt_file.write(text)
+def getDocumentError(filename):
+    getDocxToTextAll(filename)
+    error_places = []
+    # # 打开文件
+    for batch in read_file_in_batches('checkDocumentError.txt'):
+        res=process_batch(batch)
+        if(len(res)>0):
+            error_places.extend(res)
+
+    pprint(error_places)
+    end_time = time.time()
+    # 计算执行时间
+    elapsed_time = end_time - start_time
+    print(f"checkDocumentError程序执行时间: {elapsed_time} 秒")
+    return error_places
+    #
+    # 过滤掉填充的None（如果有的话）
+    # chunk = [line for line in chunk if line is not None]
+    # res = m.correct_batch(sentences)
+    # print("DocumentError",res)
+    # lines_with_greeting = [place for place in res if len( place['errors'])>0]
+    # error_places.extend(lines_with_greeting)
+    # pprint(error_places)
+    # if len(lines_with_greeting)>0:
+    #     for t in error_places:
+    #         keyword= t['source']
+    #
+    #         errorWord=t["errors"]
+    #     # 查找包含关键字的段落
+    #         paragraphs = re.findall(r'.*?' + re.escape(keyword) + r'.*?\n', gettext)
+    #         t["yuanwen"]=paragraphs[0]
+    #     return error_places
+    # else:
+    #     return error_places
+    # return lines_with_greeting
+def read_file_in_batches(file_path, batch_size=5000):
+    """
+    分批读取文本文件
+    :param file_path: 文件路径
+    :param batch_size: 每批处理的字符数
+    :return: 生成器，每次返回一批文本
+    """
+    with open(file_path, 'r', encoding='utf-8') as file:
+        batch = []
+        char_count = 0
+        for line in file:
+            batch.append(line)
+            char_count += len(line)
+            if char_count >= batch_size:
+                yield ''.join(batch)
+                batch = []
+                char_count = 0
+        if batch:
+            yield ''.join(batch)
+
+def process_batch(batch):
+    """
+    处理一批文本
+    :param batch: 一批文本
+    """
+    # 在这里添加你的处理逻辑
+    # error_places=[]
+    sentences = re.split(r'[。\n]', batch)
+    # 去掉空字符串
+    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
+    res = corrector(sentences)
+    lines_with_greeting = [place for place in res if len(place['errors']) > 0]
+    # error_places.extend(lines_with_greeting)
+    # pprint(error_places)
+    words=''
+    err=[]
+    if len(lines_with_greeting) > 0:
+        num=0
+        wenti=[]#记录问题的数组
+        keyword_list = []#记录问题
+        for t in lines_with_greeting:
+            temp_errorWords = []
+            keyword = t['source']
+            keyword_list.append(keyword)
+            for item in t["errors"]:
+                for key, value in item['correction'].items():
+                    temp_errorWords.append(key)
+            wenti.append("{}、原文：{}。问题：【{}】这些字是否为当前原文的错别字".format(num,keyword,",".join(temp_errorWords)))
+            num+=1
+        words ="\n".join(wenti)
+
+        messages = [{'role': 'user', 'content': [{'text': words+ prompt}]}]
+        runList = []
+        print(words+ prompt)
+        for rsp in bot.run(messages):
+            runList.append(rsp)
+        data = runList[len(runList) - 1][0]["content"]
+        pprint(data)
+        parsed_data = json_repair.loads(data.replace("\\","").replace('`', ''))
+        err = [
+        {**place, "placeName": keyword_list[int(place["placeName"])],"jianyi":place["解析"]} 
+        for place in parsed_data 
+        if place['回答'] == '是'
+        ]
+        pprint(err)
+    # err = [place["placeName"]=keyword_list[int(place["placeName"])] for place in parsed_data if place['回答'] == '是']
+    # if len(err) > 0:
+    #     # for t in error_places:
+    #     #     keyword = t['placeName']
+    #     #     # 查找包含关键字的段落
+    #     #     paragraphs = re.findall(r'.*?' + re.escape(keyword) + r'.*?\n', gettext)
+    #     #     t["yuanwen"] = paragraphs[0]
+    #     return err
+    # else:
+    return err
+
+# from flask import Flask, request, jsonify
+# import os
+# # from checkPlaceName import checkPlaceName
+# # from checkRepeatText import checkRepeatText
+# # from checkCompanyName import checkCompanyName
+# # from documentError import getDocumentError
+# app = Flask(__name__)
+# UPLOAD_FOLDER = 'uploads'
+# if not os.path.exists(UPLOAD_FOLDER):
+#     os.makedirs(UPLOAD_FOLDER)
+# @app.route('/upload', methods=['POST'])
+# def upload_file():
+#     if 'file' not in request.files:
+#         return jsonify({"error": "No file part"}), 400
+#     file = request.files['file']
+#     if file.filename == '':
+#         return jsonify({"error": "No selected file"}), 400
+#     if file:
+#         filename = file.filename
+#         file.save(os.path.join(UPLOAD_FOLDER,filename))
+#         return jsonify({"message": "File uploaded successfully"}), 200
+# # @app.route('/checkPlaceName/<filename>', methods=['GET'])
+# # def checkPlaceNameWeb(filename):
+# #     return checkPlaceName(filename)
+# # @app.route('/checkRepeatText/<filename>', methods=['GET'])
+# # def checkRepeatTextWeb(filename):
+# #     return checkRepeatText(filename)
+# # @app.route('/checkCompanyName/<filename>', methods=['GET'])
+# # def checkCompanyNameWeb(filename):
+# #     return checkCompanyName(filename)
+# # @app.route('/checkDocumentErrorWeb/<filename>', methods=['GET'])
+# # def checkDocumentErrorWeb(filename):
+# #     return getDocumentError(filename)
+# if __name__ == '__main__':
+#     app.run(host='0.0.0.0',port=80)
+# from transformers import AutoTokenizer, AutoModel, GenerationConfig,AutoModelForCausalLM
+# import os
+# os.environ['NPU_VISIBLE_DEVICES']='0,1,2,3,4,5,6,7'
+# os.environ['ASCEND_RT_VISIBLE_DEVICES']='0,1,2,3,4,5,6,7'
+# import torch
+# import torch_npu
+# from torch_npu.contrib import transfer_to_npu
+
+# from accelerate import Accelerator
+
+# # device = 'cpu'
+# accelerator = Accelerator()
+# # torch_device = "npu" # 0~7
+# # torch.npu.set_device(torch.device(torch_device))
+# devices = []
+# for i in range(8):
+#     devices.append(f"npu:{i}")
+# print(devices)
+# torch.npu.set_device(devices)
+# torch.npu.set_compile_mode(jit_compile=False)
+# model_name_or_path = '/mnt/sdc/qwen/Qwen2-72B-Instruct'
+# tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=False)
+# # model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, device_map="auto",torch_dtype=torch.float16)
+# model = AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True, device_map=accelerator,torch_dtype=torch.float16).npu().eval()
--- a/服务器文件/checkPlaceName.py
+++ b/服务器文件/checkPlaceName.py
@ -0,0 +1,153 @@
+from docx import Document
+from paddlenlp import Taskflow
+from pprint import pprint
+from qwen_agent.agents import Assistant
+import re
+import json_repair
+import time
+tagTask = Taskflow("ner")
+prompt='''
+.上述文本判断地名是否正确，你可以使用工具利用互联网查询，你只能在[正确,错误,简称,未知]三种选项中选择答案,回答格式[{“placeName”:“地名”,"回答":"答案"},{“placeName”:“地名”,"回答":"答案"}]，不做过多的解释,严格按回答格式作答;
+不做过多的解释,严格按回答格式作答;
+'''
+# prompt='''
+# .请回答以上问题，
+# ,回答格式[{“placeName”:"原文","回答":"答案"},{“placeName”:"原文","回答":"答案"}]，不做过多的解释,严格按回答格式作答;
+# 不做过多的解释,严格按回答格式作答;
+# '''
+llm_cfg = {
+    #'model': 'qwen1.5-72b-chat',
+    'model':"qwen2-72b",
+    'model_server': 'http://127.0.0.1:1025/v1',  # base_url, also known as api_base
+    # 'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
+}
+bot = Assistant(llm=llm_cfg,
+                name='Assistant',
+                # description='使用RAG检索并回答，支持文件类型：PDF/Word/PPT/TXT/HTML。'
+                )
+#获取全文内容
+def getDocxToTextAll(name):
+    docxPath=name
+    document = Document(docxPath)
+    # 逐段读取docx文档的内容
+    levelList=[]
+    words=[]
+    addStart = False
+    levelText=""
+    i = 0
+    for paragraph in document.paragraphs:
+        # 判断该段落的标题级别
+        # 这里用isTitle()临时代表，具体见下文介绍的方法
+        text = paragraph.text
+        if text.strip():#非空判断
+            # print("非空")
+            words.append(text)
+    # 将所有段落文本拼接成一个字符串，并用换行符分隔
+    print("placeNameTask",len(words))
+    text = '\n'.join(words)
+
+    # 将文本写入txt文件
+    with open("checkPlaceName.txt", 'w', encoding='utf-8') as txt_file:
+        txt_file.write(text)
+
+#得到全文和地名有关的内容
+def placeNameTask(text):
+    res = tagTask(text)
+    print(res)
+    placeList = []
+    isplace = False
+    for zuhe in res:
+        # 上一个的地名,这一个还是地名，就和上一个相加代替这个
+
+        if isplace:
+            name = placeList[len(placeList) - 1]
+            if zuhe[1].find("组织机构类")>=0 or zuhe[1].find("世界地区类")>=0:# or zuhe[1] == "ns"
+                isplace = True
+                new_text = zuhe[0].replace("\n", "")
+                placeList[len(placeList) - 1] = name + new_text
+                continue
+        if zuhe[1].find("组织机构类")>=0 or zuhe[1].find("世界地区类")>=0:
+            isplace = True
+            new_text = zuhe[0].replace("\n", "")
+            placeList.append(new_text)
+        else:
+            isplace = False
+    placeList=list(dict.fromkeys(placeList))
+    return placeList
+#主方法
+def checkPlaceName(filename):
+    getDocxToTextAll(filename)
+    start_time=time.time()
+    error_places = []
+    for batch in read_file_in_batches('checkPlaceName.txt'):
+        res=process_batch(batch)
+        if(len(res)>0):
+            error_places.extend(res)
+
+    pprint(error_places)
+    end_time = time.time()
+    # 计算执行时间
+    elapsed_time = end_time - start_time
+    print(f"checkPlaceName程序执行时间: {elapsed_time} 秒")
+    return error_places
+
+def read_file_in_batches(file_path, batch_size=5000):
+    """
+    分批读取文本文件
+    :param file_path: 文件路径
+    :param batch_size: 每批处理的字符数
+    :return: 生成器，每次返回一批文本
+    """
+    with open(file_path, 'r', encoding='utf-8') as file:
+        batch = []
+        char_count = 0
+        for line in file:
+            batch.append(line)
+            char_count += len(line)
+            if char_count >= batch_size:
+                yield ''.join(batch)
+                batch = []
+                char_count = 0
+        if batch:
+            yield ''.join(batch)
+
+def process_batch(batch):
+    """
+    处理一批文本
+    :param batch: 一批文本
+    """
+    # 在这里添加你的处理逻辑
+
+    # sentences = re.split(r'[。\n]', batch)
+    # sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
+    propnList=placeNameTask(batch)
+    # words=[]
+    # for placeName in propnList:
+    #     word="原文：{},先从分析原文是否含有错误地名，若含有错误地名，请回答包含错误地名，若不包含错误地名，请从【具体的公司或组织名称,非具体的公司或组织名称,与政府有关的公司或组织名称,其他组织名称，地名】中选择最合适的一个作为答案".format(placeName)
+    #     words.append(word)  
+    propnStr = ",".join(propnList)
+    print("placeNameTask",propnStr)
+    messages = [{'role': 'user', 'content': [{'text': propnStr + prompt}]}]
+    runList = []
+    for rsp in bot.run(messages):
+        runList.append(rsp)
+    data = runList[len(runList) - 1][0]["content"]
+    print("placeNameTask",data)
+    parsed_data = json_repair.loads(data.replace('`', ''))
+
+    # 遍历列表
+    for item in parsed_data:
+        print(f"地名: {item['placeName']}, 回答: {item['回答']}")
+
+    # 如果需要进一步操作，例如只关注“正确”的回答
+    error_places = [place for place in parsed_data if place['回答'] == '错误']
+    print("placeNameTask",error_places)
+    if len(error_places)>0:
+        for t in error_places:
+            keyword= t['placeName']
+        # 查找包含关键字的段落
+            paragraphs = re.findall(r'.*?' + re.escape(keyword) + r'.*?\n', batch)
+            t["yuanwen"]=paragraphs[0]
+        return error_places
+    else:
+        return error_places
--- a/服务器文件/checkRepeatText.py
+++ b/服务器文件/checkRepeatText.py
@ -0,0 +1,160 @@
+import uuid
+from langchain_chroma import Chroma
+from langchain_community.embeddings import DashScopeEmbeddings
+from langchain_community.document_loaders import TextLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+
+from paddlenlp import Taskflow
+similarity = Taskflow("text_similarity" , truncation=True,max_length=102400)
+embeddings = DashScopeEmbeddings(dashscope_api_key="sk-ea89cf04431645b185990b8af8c9bb13")
+vector_store_path="vector_store"
+vectorstore = Chroma(persist_directory=vector_store_path, embedding_function=embeddings)
+import re
+import time
+from docx import Document
+
+# 记录程序开始的时间戳
+def getOutlineLevel(inputXml):
+    """
+    功能 从xml字段中提取出<w:outlineLvl w:val="number"/>中的数字number
+    参数 inputXml
+    返回 number
+    """
+    start_index = inputXml.find('<w:outlineLvl')
+    end_index = inputXml.find('>', start_index)
+    number = inputXml[start_index:end_index + 1]
+    number = re.search("\d+", number).group()
+    return number
+
+
+def isTitle(paragraph):
+    """
+    功能 判断该段落是否设置了大纲等级
+    参数 paragraph:段落
+    返回 None:普通正文，没有大纲级别 0:一级标题 1:二级标题 2:三级标题
+    """
+    # 如果是空行，直接返回None
+    if paragraph.text.strip() == '':
+        return None
+
+    # 如果该段落是直接在段落里设置大纲级别的，根据xml判断大纲级别
+    paragraphXml = paragraph._p.xml
+    if paragraphXml.find('<w:outlineLvl') >= 0:
+        return getOutlineLevel(paragraphXml)
+    # 如果该段落是通过样式设置大纲级别的，逐级检索样式及其父样式，判断大纲级别
+    targetStyle = paragraph.style
+    while targetStyle is not None:
+        # 如果在该级style中找到了大纲级别，返回
+        if targetStyle.element.xml.find('<w:outlineLvl') >= 0:
+            return getOutlineLevel(targetStyle.element.xml)
+        else:
+            targetStyle = targetStyle.base_style
+    # 如果在段落、样式里都没有找到大纲级别，返回None
+    return None
+
+#获取文档中 详细设计方案 章节的所有内容
+def getDocxToText(docxPath,titleName):
+    document = Document(docxPath)
+    # 逐段读取docx文档的内容
+    levelList=[]
+    words=[]
+    addStart = False
+    levelText=""
+    i = 0
+    for paragraph in document.paragraphs:
+        # 判断该段落的标题级别
+        # 这里用isTitle()临时代表，具体见下文介绍的方法
+        text = paragraph.text
+        if text.strip():#非空判断
+            print("非空")
+            if titleName:
+                level = isTitle(paragraph)
+                if(addStart and level=="0"):
+                    addStart=False
+                if(level=="0" and text.find(titleName)>=0):
+                    addStart=True
+                if level:
+                    levelList.append("{}：".format(level)+paragraph.text)
+                    levelText=text
+                else:
+                    if addStart:
+                        if(text.startswith("图") or text.startswith("注：")):
+                            continue
+                i=i+1
+                words.append("第{}个段落：".format(i)+text)
+            else:
+                words.append(text)
+
+    # 将所有段落文本拼接成一个字符串，并用换行符分隔
+    print("checkRepeatText",len(words))
+    if len(words)==0:
+        raise Exception("I know python!")
+    text = '\n'.join(words)
+
+    # 将文本写入txt文件
+    with open("checkRepeatText.txt", 'w', ) as txt_file:
+        txt_file.write(text)
+    time.sleep(3)
+    loader = TextLoader(file_path='checkRepeatText.txt')
+    docs = loader.load()
+    # print(docs)
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=50, chunk_overlap=10, add_start_index=True,
+                                                   separators=["\n\n", "\n"])
+    splits = text_splitter.split_documents(docs)
+    uuids = []
+    print(len(splits))
+    for i in range(len(splits)):
+        uuids.append(str(uuid.uuid4()))
+    print(len(uuids))
+    vectorstore = Chroma(persist_directory=vector_store_path, embedding_function=embeddings)
+    vectorstore.add_documents(documents=splits, ids=uuids)
+    while True:
+        time.sleep(0.3)
+        ress = vectorstore.similarity_search(words[0])
+        if (len(ress) > 0):
+            break
+    return  words,uuids
+
+
+# @app.route('/checkRepeatText/<filename>', methods=['GET'])
+def checkRepeatText(filename,titleName):
+    words,uuids=getDocxToText(filename,titleName)
+    try:
+    # 记录程序开始的时间戳‘
+        reslist = []
+        count = 0
+        for i in words:
+            count += 1
+            result = vectorstore.similarity_search(i)
+            textTag = i.split("：")[0]
+            print(i)
+            for content in result:
+                text = content.page_content
+                tag = text.split("：")[0].replace('\n', '')
+                if (textTag.find(tag) >= 0):
+                    continue
+                res = similarity([[i[i.find('：') + 1:], text[text.find('：') + 1:]]])
+                print(res[0]["similarity"])
+                if (res[0]["similarity"] > 0.95):
+                    # 判断重复内容是否被放入
+                    if (len(reslist) > 0):
+                        isExist = False
+                        for neirong in reslist:
+                            if i[i.find('：') + 1:] in neirong.values():
+                                isExist = True
+                                break
+                        if not isExist:
+                            reslist.append({"yuanwen1":i[i.find('：') + 1:],"yuanwen2":text[text.find('：') + 1:]})
+                            print(reslist)
+                    else:
+                        reslist.append({"yuanwen1":i[i.find('：') + 1:],"yuanwen2":text[text.find('：') + 1:]})
+                        print(i.split("：")[1] + "\n" + text.split("：")[1])
+    except Exception as e:
+        print("发生异常:",e)
+    finally:
+        # if(count>=300):
+        #     break
+        vectorstore.delete(ids=uuids)
+        print("已删除")
+    print(reslist)
+    return reslist
--- a/服务器文件/json_repair.py
+++ b/服务器文件/json_repair.py
@ -0,0 +1,712 @@
+"""
+This module will parse the JSON file following the BNF definition:
+
+    <json> ::= <container>
+
+    <primitive> ::= <number> | <string> | <boolean>
+    ; Where:
+    ; <number> is a valid real number expressed in one of a number of given formats
+    ; <string> is a string of valid characters enclosed in quotes
+    ; <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
+
+    <container> ::= <object> | <array>
+    <array> ::= '[' [ <json> *(', ' <json>) ] ']' ; A sequence of JSON values separated by commas
+    <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
+    <member> ::= <string> ': ' <json> ; A pair consisting of a name, and a JSON value
+
+If something is wrong (a missing parantheses or quotes for example) it will use a few simple heuristics to fix the JSON string:
+- Add the missing parentheses if the parser believes that the array or object should be closed
+- Quote strings or add missing single quotes
+- Adjust whitespaces and remove line breaks
+
+All supported use cases are in the unit tests
+"""
+
+import os
+import json
+from typing import Any, Dict, List, Optional, Union, TextIO, Tuple, Literal
+
+
+class StringFileWrapper:
+    # This is a trick to simplify the code, transform the filedescriptor handling into a string handling
+    def __init__(self, fd: TextIO) -> None:
+        self.fd = fd
+        self.length: int = 0
+
+    def __getitem__(self, index: Union[int, slice]) -> str:
+        if isinstance(index, slice):
+            self.fd.seek(index.start)
+            value = self.fd.read(index.stop - index.start)
+            self.fd.seek(index.start)
+            return value
+        else:
+            self.fd.seek(index)
+            return self.fd.read(1)
+
+    def __len__(self) -> int:
+        if self.length < 1:
+            current_position = self.fd.tell()
+            self.fd.seek(0, os.SEEK_END)
+            self.length = self.fd.tell()
+            self.fd.seek(current_position)
+        return self.length
+
+
+class LoggerConfig:
+    # This is a type class to simplify the declaration
+    def __init__(self, log_level: Optional[str]):
+        self.log: List[Dict[str, str]] = []
+        self.window: int = 10
+        self.log_level: str = log_level if log_level else "none"
+
+
+JSONReturnType = Union[Dict[str, Any], List[Any], str, float, int, bool, None]
+
+
+class JSONParser:
+    def __init__(
+        self,
+        json_str: Union[str, StringFileWrapper],
+        json_fd: Optional[TextIO],
+        logging: Optional[bool],
+    ) -> None:
+        # The string to parse
+        self.json_str = json_str
+        # Alternatively, the file description with a json file in it
+        if json_fd:
+            # This is a trick we do to treat the file wrapper as an array
+            self.json_str = StringFileWrapper(json_fd)
+        # Index is our iterator that will keep track of which character we are looking at right now
+        self.index: int = 0
+        # This is used in the object member parsing to manage the special cases of missing quotes in key or value
+        self.context: list[str] = []
+        # Use this to log the activity, but only if logging is active
+        self.logger = LoggerConfig(log_level="info" if logging else None)
+
+    def parse(
+        self,
+    ) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
+        json = self.parse_json()
+        if self.index < len(self.json_str):
+            self.log(
+                "The parser returned early, checking if there's more json elements",
+                "info",
+            )
+            json = [json]
+            last_index = self.index
+            while self.index < len(self.json_str):
+                j = self.parse_json()
+                if j != "":
+                    json.append(j)
+                if self.index == last_index:
+                    self.index += 1
+                last_index = self.index
+            # If nothing extra was found, don't return an array
+            if len(json) == 1:
+                self.log(
+                    "There were no more elements, returning the element without the array",
+                    "info",
+                )
+                json = json[0]
+        if self.logger.log_level == "none":
+            return json
+        else:
+            return json, self.logger.log
+
+    def parse_json(
+        self,
+    ) -> JSONReturnType:
+        while True:
+            char = self.get_char_at()
+            # This parser will ignore any basic element (string or number) that is not inside an array or object
+            is_in_context = len(self.context) > 0
+            # False means that we are at the end of the string provided
+            if char is False:
+                return ""
+            # <object> starts with '{'
+            elif char == "{":
+                self.index += 1
+                return self.parse_object()
+            # <array> starts with '['
+            elif char == "[":
+                self.index += 1
+                return self.parse_array()
+            # there can be an edge case in which a key is empty and at the end of an object
+            # like "key": }. We return an empty string here to close the object properly
+            elif char == "}":
+                self.log(
+                    "At the end of an object we found a key with missing value, skipping",
+                    "info",
+                )
+                return ""
+            # <string> starts with a quote
+            elif is_in_context and (char in ['"', "'", "“"] or char.isalpha()):
+                return self.parse_string()
+            # <number> starts with [0-9] or minus
+            elif is_in_context and (char.isdigit() or char == "-" or char == "."):
+                return self.parse_number()
+            # If everything else fails, we just ignore and move on
+            else:
+                self.index += 1
+
+    def parse_object(self) -> Dict[str, Any]:
+        # <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
+        obj = {}
+        # Stop when you either find the closing parentheses or you have iterated over the entire string
+        while (self.get_char_at() or "}") != "}":
+            # This is what we expect to find:
+            # <member> ::= <string> ': ' <json>
+
+            # Skip filler whitespaces
+            self.skip_whitespaces_at()
+
+            # Sometimes LLMs do weird things, if we find a ":" so early, we'll change it to "," and move on
+            if (self.get_char_at() or "") == ":":
+                self.log(
+                    "While parsing an object we found a : before a key, ignoring",
+                    "info",
+                )
+                self.index += 1
+
+            # We are now searching for they string key
+            # Context is used in the string parser to manage the lack of quotes
+            self.set_context("object_key")
+
+            self.skip_whitespaces_at()
+
+            # <member> starts with a <string>
+            key = ""
+            while self.get_char_at():
+                key = str(self.parse_string())
+
+                if key != "" or (key == "" and self.get_char_at() == ":"):
+                    # If the string is empty but there is a object divider, we are done here
+                    break
+
+            self.skip_whitespaces_at()
+
+            # We reached the end here
+            if (self.get_char_at() or "}") == "}":
+                continue
+
+            self.skip_whitespaces_at()
+
+            # An extreme case of missing ":" after a key
+            if (self.get_char_at() or "") != ":":
+                self.log(
+                    "While parsing an object we missed a : after a key",
+                    "info",
+                )
+
+            self.index += 1
+            self.reset_context()
+            self.set_context("object_value")
+            # The value can be any valid json
+            value = self.parse_json()
+
+            # Reset context since our job is done
+            self.reset_context()
+            obj[key] = value
+
+            if (self.get_char_at() or "") in [",", "'", '"']:
+                self.index += 1
+
+            # Remove trailing spaces
+            self.skip_whitespaces_at()
+
+        self.index += 1
+        return obj
+
+    def parse_array(self) -> List[Any]:
+        # <array> ::= '[' [ <json> *(', ' <json>) ] ']' ; A sequence of JSON values separated by commas
+        arr = []
+        self.set_context("array")
+        # Stop when you either find the closing parentheses or you have iterated over the entire string
+        while (self.get_char_at() or "]") != "]":
+            self.skip_whitespaces_at()
+            value = self.parse_json()
+
+            # It is possible that parse_json() returns nothing valid, so we stop
+            if value == "":
+                break
+
+            if value == "..." and self.get_char_at(-1) == ".":
+                self.log(
+                    "While parsing an array, found a stray '...'; ignoring it", "info"
+                )
+            else:
+                arr.append(value)
+
+            # skip over whitespace after a value but before closing ]
+            char = self.get_char_at()
+            while char and (char.isspace() or char == ","):
+                self.index += 1
+                char = self.get_char_at()
+
+        # Especially at the end of an LLM generated json you might miss the last "]"
+        char = self.get_char_at()
+        if char and char != "]":
+            self.log(
+                "While parsing an array we missed the closing ], adding it back", "info"
+            )
+            self.index -= 1
+
+        self.index += 1
+        self.reset_context()
+        return arr
+
+    def parse_string(self) -> Union[str, bool, None]:
+        # <string> is a string of valid characters enclosed in quotes
+        # i.e. { name: "John" }
+        # Somehow all weird cases in an invalid JSON happen to be resolved in this function, so be careful here
+
+        # Flag to manage corner cases related to missing starting quote
+        missing_quotes = False
+        doubled_quotes = False
+        lstring_delimiter = rstring_delimiter = '"'
+
+        char = self.get_char_at()
+        # A valid string can only start with a valid quote or, in our case, with a literal
+        while char and char not in ['"', "'", "“"] and not char.isalnum():
+            self.index += 1
+            char = self.get_char_at()
+
+        if not char:
+            # This is an empty string
+            return ""
+
+        # Ensuring we use the right delimiter
+        if char == "'":
+            lstring_delimiter = rstring_delimiter = "'"
+        elif char == "“":
+            lstring_delimiter = "“"
+            rstring_delimiter = "”"
+        elif char.isalnum():
+            # This could be a <boolean> and not a string. Because (T)rue or (F)alse or (N)ull are valid
+            # But remember, object keys are only of type string
+            if char.lower() in ["t", "f", "n"] and self.get_context() != "object_key":
+                value = self.parse_boolean_or_null()
+                if value != "":
+                    return value
+            self.log(
+                "While parsing a string, we found a literal instead of a quote",
+                "info",
+            )
+            self.log(
+                "While parsing a string, we found no starting quote. Will add the quote back",
+                "info",
+            )
+            missing_quotes = True
+
+        if not missing_quotes:
+            self.index += 1
+
+        # There is sometimes a weird case of doubled quotes, we manage this also later in the while loop
+        if self.get_char_at() == lstring_delimiter:
+            # If it's an empty key, this was easy
+            if self.get_context() == "object_key" and self.get_char_at(1) == ":":
+                self.index += 1
+                return ""
+            # Find the next delimiter
+            i = 1
+            next_c = self.get_char_at(i)
+            while next_c and next_c != rstring_delimiter:
+                i += 1
+                next_c = self.get_char_at(i)
+            # Now check that the next character is also a delimiter to ensure that we have "".....""
+            # In that case we ignore this rstring delimiter
+            if next_c and (self.get_char_at(i + 1) or "") == rstring_delimiter:
+                self.log(
+                    "While parsing a string, we found a valid starting doubled quote, ignoring it",
+                    "info",
+                )
+                doubled_quotes = True
+                self.index += 1
+            else:
+                # Ok this is not a doubled quote, check if this is an empty string or not
+                i = 1
+                next_c = self.get_char_at(i)
+                while next_c and next_c.isspace():
+                    i += 1
+                    next_c = self.get_char_at(i)
+                if next_c not in [",", "]", "}"]:
+                    self.log(
+                        "While parsing a string, we found a doubled quote but it was a mistake, removing one quote",
+                        "info",
+                    )
+                    self.index += 1
+
+        # Initialize our return value
+        string_acc = ""
+
+        # Here things get a bit hairy because a string missing the final quote can also be a key or a value in an object
+        # In that case we need to use the ":|,|}" characters as terminators of the string
+        # So this will stop if:
+        # * It finds a closing quote
+        # * It iterated over the entire sequence
+        # * If we are fixing missing quotes in an object, when it finds the special terminators
+        char = self.get_char_at()
+        while char and char != rstring_delimiter:
+            if missing_quotes:
+                if self.get_context() == "object_key" and (
+                    char == ":" or char.isspace()
+                ):
+                    self.log(
+                        "While parsing a string missing the left delimiter in object key context, we found a :, stopping here",
+                        "info",
+                    )
+                    break
+                elif self.get_context() == "object_value" and char in [",", "}"]:
+                    rstring_delimiter_missing = True
+                    # check if this is a case in which the closing comma is NOT missing instead
+                    i = 1
+                    next_c = self.get_char_at(i)
+                    while next_c and next_c != rstring_delimiter:
+                        i += 1
+                        next_c = self.get_char_at(i)
+                    if next_c:
+                        i += 1
+                        next_c = self.get_char_at(i)
+                        # found a delimiter, now we need to check that is followed strictly by a comma or brace
+                        while next_c and next_c.isspace():
+                            i += 1
+                            next_c = self.get_char_at(i)
+                        if next_c and next_c in [",", "}"]:
+                            rstring_delimiter_missing = False
+                    if rstring_delimiter_missing:
+                        self.log(
+                            "While parsing a string missing the left delimiter in object value context, we found a , or } and we couldn't determine that a right delimiter was present. Stopping here",
+                            "info",
+                        )
+                        break
+            string_acc += char
+            self.index += 1
+            char = self.get_char_at()
+            if char and len(string_acc) > 0 and string_acc[-1] == "\\":
+                # This is a special case, if people use real strings this might happen
+                self.log("Found a stray escape sequence, normalizing it", "info")
+                string_acc = string_acc[:-1]
+                if char in [rstring_delimiter, "t", "n", "r", "b", "\\"]:
+                    escape_seqs = {"t": "\t", "n": "\n", "r": "\r", "b": "\b"}
+                    string_acc += escape_seqs.get(char, char) or char
+                    self.index += 1
+                    char = self.get_char_at()
+            # ChatGPT sometimes forget to quote stuff in html tags or markdown, so we do this whole thing here
+            if char == rstring_delimiter:
+                # Special case here, in case of double quotes one after another
+                if doubled_quotes and self.get_char_at(1) == rstring_delimiter:
+                    self.log(
+                        "While parsing a string, we found a doubled quote, ignoring it",
+                        "info",
+                    )
+                    self.index += 1
+                elif missing_quotes and self.get_context() == "object_value":
+                    # In case of missing starting quote I need to check if the delimeter is the end or the beginning of a key
+                    i = 1
+                    next_c = self.get_char_at(i)
+                    while next_c and next_c not in [
+                        rstring_delimiter,
+                        lstring_delimiter,
+                    ]:
+                        i += 1
+                        next_c = self.get_char_at(i)
+                    if next_c:
+                        # We found a quote, now let's make sure there's a ":" following
+                        i += 1
+                        next_c = self.get_char_at(i)
+                        # found a delimiter, now we need to check that is followed strictly by a comma or brace
+                        while next_c and next_c.isspace():
+                            i += 1
+                            next_c = self.get_char_at(i)
+                        if next_c and next_c == ":":
+                            # Reset the cursor
+                            self.index -= 1
+                            char = self.get_char_at()
+                            self.log(
+                                "In a string with missing quotes and object value context, I found a delimeter but it turns out it was the beginning on the next key. Stopping here.",
+                                "info",
+                            )
+                            break
+                else:
+                    # Check if eventually there is a rstring delimiter, otherwise we bail
+                    i = 1
+                    next_c = self.get_char_at(i)
+                    check_comma_in_object_value = True
+                    while next_c and next_c not in [
+                        rstring_delimiter,
+                        lstring_delimiter,
+                    ]:
+                        # This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
+                        # This is because the routine after will make sure to correct any bad guess and this solves a corner case
+                        if check_comma_in_object_value and next_c.isalpha():
+                            check_comma_in_object_value = False
+                        # If we are in an object context, let's check for the right delimiters
+                        if (
+                            ("object_key" in self.context and next_c in [":", "}"])
+                            or ("object_value" in self.context and next_c == "}")
+                            or ("array" in self.context and next_c in ["]", ","])
+                            or (
+                                check_comma_in_object_value
+                                and self.get_context() == "object_value"
+                                and next_c == ","
+                            )
+                        ):
+                            break
+                        i += 1
+                        next_c = self.get_char_at(i)
+                    # If we stopped for a comma in object_value context, let's check if find a "} at the end of the string
+                    if next_c == "," and self.get_context() == "object_value":
+                        i += 1
+                        next_c = self.get_char_at(i)
+                        while next_c and next_c != rstring_delimiter:
+                            i += 1
+                            next_c = self.get_char_at(i)
+                        # Ok now I found a delimiter, let's skip whitespaces and see if next we find a }
+                        i += 1
+                        next_c = self.get_char_at(i)
+                        while next_c and next_c.isspace():
+                            i += 1
+                            next_c = self.get_char_at(i)
+                        if next_c == "}":
+                            # OK this is valid then
+                            self.log(
+                                "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here since this is the last element of the object, ignoring it",
+                                "info",
+                            )
+                            string_acc += str(char)
+                            self.index += 1
+                            char = self.get_char_at()
+                    elif next_c == rstring_delimiter:
+                        if self.get_context() == "object_value":
+                            # But this might not be it! This could be just a missing comma
+                            # We found a delimiter and we need to check if this is a key
+                            # so find a rstring_delimiter and a colon after
+                            i += 1
+                            next_c = self.get_char_at(i)
+                            while next_c and next_c != rstring_delimiter:
+                                i += 1
+                                next_c = self.get_char_at(i)
+                            i += 1
+                            next_c = self.get_char_at(i)
+                            while next_c and next_c != ":":
+                                if next_c in [
+                                    lstring_delimiter,
+                                    rstring_delimiter,
+                                    ",",
+                                ]:
+                                    break
+                                i += 1
+                                next_c = self.get_char_at(i)
+                            # Only if we fail to find a ':' then we know this is misplaced quote
+                            if next_c != ":":
+                                self.log(
+                                    "While parsing a string, we a misplaced quote that would have closed the string but has a different meaning here, ignoring it",
+                                    "info",
+                                )
+                                string_acc += str(char)
+                                self.index += 1
+                                char = self.get_char_at()
+
+        if (
+            char
+            and missing_quotes
+            and self.get_context() == "object_key"
+            and char.isspace()
+        ):
+            self.log(
+                "While parsing a string, handling an extreme corner case in which the LLM added a comment instead of valid string, invalidate the string and return an empty value",
+                "info",
+            )
+            self.skip_whitespaces_at()
+            if self.get_char_at() not in [":", ","]:
+                return ""
+
+        # A fallout of the previous special case in the while loop,
+        # we need to update the index only if we had a closing quote
+        if char != rstring_delimiter:
+            self.log(
+                "While parsing a string, we missed the closing quote, ignoring",
+                "info",
+            )
+        else:
+            self.index += 1
+
+        return string_acc.rstrip()
+
+    def parse_number(self) -> Union[float, int, str, JSONReturnType]:
+        # <number> is a valid real number expressed in one of a number of given formats
+        number_str = ""
+        number_chars = set("0123456789-.eE/,")
+        char = self.get_char_at()
+        is_array = self.get_context() == "array"
+        while char and char in number_chars and (char != "," or not is_array):
+            number_str += char
+            self.index += 1
+            char = self.get_char_at()
+        if len(number_str) > 1 and number_str[-1] in "-eE/,":
+            # The number ends with a non valid character for a number/currency, rolling back one
+            number_str = number_str[:-1]
+            self.index -= 1
+        try:
+            if "," in number_str:
+                return str(number_str)
+            if "." in number_str or "e" in number_str or "E" in number_str:
+                return float(number_str)
+            elif number_str == "-":
+                # If there is a stray "-" this will throw an exception, throw away this character
+                return self.parse_json()
+            else:
+                return int(number_str)
+        except ValueError:
+            return number_str
+
+    def parse_boolean_or_null(self) -> Union[bool, str, None]:
+        # <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
+        starting_index = self.index
+        char = (self.get_char_at() or "").lower()
+        value: Optional[Tuple[str, Optional[bool]]]
+        if char == "t":
+            value = ("true", True)
+        elif char == "f":
+            value = ("false", False)
+        elif char == "n":
+            value = ("null", None)
+
+        if value:
+            i = 0
+            while char and i < len(value[0]) and char == value[0][i]:
+                i += 1
+                self.index += 1
+                char = (self.get_char_at() or "").lower()
+            if i == len(value[0]):
+                return value[1]
+
+        # If nothing works reset the index before returning
+        self.index = starting_index
+        return ""
+
+    def get_char_at(self, count: int = 0) -> Union[str, Literal[False]]:
+        # Why not use something simpler? Because try/except in python is a faster alternative to an "if" statement that is often True
+        try:
+            return self.json_str[self.index + count]
+        except IndexError:
+            return False
+
+    def skip_whitespaces_at(self) -> None:
+        """
+        This function quickly iterates on whitespaces, syntactic sugar to make the code more concise
+        """
+        try:
+            char = self.json_str[self.index]
+        except IndexError:
+            return
+        while char.isspace():
+            self.index += 1
+            try:
+                char = self.json_str[self.index]
+            except IndexError:
+                return
+
+    def set_context(self, value: str) -> None:
+        # If a value is provided update the context variable and save in stack
+        if value:
+            self.context.append(value)
+
+    def reset_context(self) -> None:
+        self.context.pop()
+
+    def get_context(self) -> str:
+        return self.context[-1]
+
+    def log(self, text: str, level: str) -> None:
+        if level == self.logger.log_level:
+            context = ""
+            start = max(self.index - self.logger.window, 0)
+            end = min(self.index + self.logger.window, len(self.json_str))
+            context = self.json_str[start:end]
+            self.logger.log.append(
+                {
+                    "text": text,
+                    "context": context,
+                }
+            )
+
+
+def repair_json(
+    json_str: str = "",
+    return_objects: bool = False,
+    skip_json_loads: bool = False,
+    logging: bool = False,
+    json_fd: Optional[TextIO] = None,
+    ensure_ascii: bool = True,
+) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
+    """
+    Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
+    It will return the fixed string by default.
+    When `return_objects=True` is passed, it will return the decoded data structure instead.
+    When `skip_json_loads=True` is passed, it will not call the built-in json.loads() function
+    When `logging=True` is passed, it will return a tuple with the repaired json and a log of all repair actions
+    """
+    parser = JSONParser(json_str, json_fd, logging)
+    if skip_json_loads:
+        parsed_json = parser.parse()
+    else:
+        try:
+            if json_fd:
+                parsed_json = json.load(json_fd)
+            else:
+                parsed_json = json.loads(json_str)
+        except json.JSONDecodeError:
+            parsed_json = parser.parse()
+    # It's useful to return the actual object instead of the json string,
+    # it allows this lib to be a replacement of the json library
+    if return_objects or logging:
+        return parsed_json
+    return json.dumps(parsed_json, ensure_ascii=ensure_ascii)
+
+
+def loads(
+    json_str: str,
+    skip_json_loads: bool = False,
+    logging: bool = False,
+) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
+    """
+    This function works like `json.loads()` except that it will fix your JSON in the process.
+    It is a wrapper around the `repair_json()` function with `return_objects=True`.
+    """
+    return repair_json(
+        json_str=json_str,
+        return_objects=True,
+        skip_json_loads=skip_json_loads,
+        logging=logging,
+    )
+
+
+def load(
+    fd: TextIO, skip_json_loads: bool = False, logging: bool = False
+) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
+    """
+    This function works like `json.load()` except that it will fix your JSON in the process.
+    It is a wrapper around the `repair_json()` function with `json_fd=fd` and `return_objects=True`.
+    """
+    return repair_json(
+        json_fd=fd,
+        return_objects=True,
+        skip_json_loads=skip_json_loads,
+        logging=logging,
+    )
+
+
+def from_file(
+    filename: str,
+    skip_json_loads: bool = False,
+    logging: bool = False,
+) -> Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]:
+    """
+    This function is a wrapper around `load()` so you can pass the filename as string
+    """
+    fd = open(filename)
+    jsonobj = load(fd, skip_json_loads, logging)
+    fd.close()
+
+    return jsonobj
--- a/服务器文件/main.py
+++ b/服务器文件/main.py
@ -0,0 +1,45 @@
+from flask import Flask, request, jsonify
+import os
+from checkPlaceName import checkPlaceName
+# from checkRepeatText import checkRepeatText
+from checkCompanyName import checkCompanyName
+from checkDocumentError import getDocumentError
+app = Flask(__name__)
+UPLOAD_FOLDER = 'uploads'
+if not os.path.exists(UPLOAD_FOLDER):
+    os.makedirs(UPLOAD_FOLDER)
+@app.route('/upload', methods=['POST'])
+def upload_file():
+    if 'file' not in request.files:
+        return jsonify({"error": "No file part"}), 400
+    file = request.files['file']
+    if file.filename == '':
+        return jsonify({"error": "No selected file"}), 400
+    if file:
+        filename = file.filename
+        file.save(os.path.join(UPLOAD_FOLDER,filename))
+        return jsonify({"message": "File uploaded successfully"}), 200
+
+@app.route('/getDocumentError', methods=['GET'])
+def getDocumentErrorWeb():
+    filename = request.args.get('filename')
+    return getDocumentError(filename)
+@app.route('/checkPlaceName', methods=['GET'])
+def checkPlaceNameWeb():
+    filename = request.args.get('filename')
+    return checkPlaceName(filename)
+@app.route('/checkRepeatText', methods=['GET'])
+def checkRepeatTextWeb():
+    filename = request.args.get('filename')
+    sectionName=request.args.get('sectionName')
+    return checkRepeatText(filename,sectionName)
+@app.route('/checkCompanyName', methods=['GET'])
+def checkCompanyNameWeb():
+    filename = request.args.get('filename')
+    return checkCompanyName(filename)
+
+@app.route('/test/<filename>', methods=['GET'])
+def test(filename):
+    return filename
+if __name__ == '__main__':
+    app.run(host="0.0.0.0",port=80)