You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
64 lines
2.2 KiB
64 lines
2.2 KiB
5 months ago
|
from qwen_agent.agents import Assistant
|
||
|
# from qwen_agent.agents.doc_qa import ParallelDocQA
|
||
|
|
||
|
llm_cfg = {
|
||
|
#'model': 'qwen1.5-72b-chat',
|
||
|
'model':"qwen2-72b",
|
||
|
'model_server': 'http://127.0.0.1:1025/v1', # base_url, also known as api_base
|
||
|
# 'api_key': 'sk-ea89cf04431645b185990b8af8c9bb13',
|
||
|
}
|
||
|
bot = Assistant(llm=llm_cfg,
|
||
|
name='Assistant',
|
||
|
description='使用RAG检索并回答,支持文件类型:PDF/Word/PPT/TXT/HTML。'
|
||
|
)
|
||
|
prompt='''
|
||
|
请找是描述项目建设的章节名称
|
||
|
'''
|
||
|
messages = [{'role': 'user', 'content': [{'text': prompt}, {'file': ''}]}]
|
||
|
for rsp in bot.run(messages):
|
||
|
print(rsp)
|
||
|
# messages = [{'role': 'user', 'content': [{'text':prompt}]}]
|
||
|
# runList=[]
|
||
|
# for rsp in bot.run(messages):
|
||
|
# print(rsp)
|
||
|
import re
|
||
|
# from docx import Document
|
||
|
#
|
||
|
# document = Document('747991ddb29a49da903210959076bb9f.docx')
|
||
|
# # 逐段读取docx文档的内容
|
||
|
# levelList = []
|
||
|
# words = []
|
||
|
# addStart = False
|
||
|
# levelText = ""
|
||
|
# i = 0
|
||
|
# for paragraph in document.paragraphs:
|
||
|
# # 判断该段落的标题级别
|
||
|
# # 这里用isTitle()临时代表,具体见下文介绍的方法
|
||
|
# text = paragraph.text
|
||
|
# if text.strip(): # 非空判断
|
||
|
# # print("非空")
|
||
|
# words.append(text)
|
||
|
# # level = isTitle(paragraph)
|
||
|
# # if(addStart and level=="0"):
|
||
|
# # addStart=False
|
||
|
# # if(level=="0" and text.find("详细设计方案")>=0):
|
||
|
# # addStart=True
|
||
|
# # if level:
|
||
|
# # levelList.append("{}:".format(level)+paragraph.text)
|
||
|
# # levelText=text
|
||
|
# # else:
|
||
|
# # if addStart:
|
||
|
# # if(text.startswith("图") or text.startswith("注:")):
|
||
|
# # continue
|
||
|
# # i=i+1
|
||
|
# # words.append("第{}个段落:".format(i)+text)
|
||
|
#
|
||
|
# # 将所有段落文本拼接成一个字符串,并用换行符分隔
|
||
|
# print(len(words))
|
||
|
# text = '\n'.join(words)
|
||
|
# paragraphs = re.findall(r'.*?' + re.escape('宁波市') + r'.*?\n', text)
|
||
|
# print(paragraphs)
|
||
|
from langchain_community.document_loaders import TextLoader
|
||
|
|
||
|
loader = TextLoader('checkRepeatText.txt')
|
||
|
docs = loader.load()
|