feature@添加问答模式选择

This commit is contained in:
yanqiangmiffy 2023-04-20 02:06:02 +08:00
parent 57f12d9a21
commit 65d97b14b4
7 changed files with 176 additions and 101 deletions

View File

@ -8,6 +8,7 @@ colorTo: yellow
pinned: true
app_file: app.py
---
# Chinese-LangChain
> Chinese-LangChain中文langchain项目基于ChatGLM-6b+langchain实现本地化知识库检索与智能答案生成
@ -55,6 +56,8 @@ python main.py
## 🚀 特性
- 📝 2023/04/20 支持模型问答与检索问答模式切换
- 📝 2023/04/20 感谢HF官方提供免费算力添加HuggingFace Spaces在线体验[[🤗 DEMO](https://huggingface.co/spaces/ChallengeHub/Chinese-LangChain)
- 📝 2023/04/19 发布45万Wikipedia的文本预处理语料以及FAISS索引向量
- 🐯 2023/04/19 引入ChuanhuChatGPT皮肤
- 📱 2023/04/19 增加web search功能需要确保网络畅通(感谢[@wanghao07456](https://github.com/wanghao07456),提供的idea)
@ -87,6 +90,7 @@ python main.py
* [x] 支持加载不同知识库
* [x] 支持检索结果与LLM生成结果对比
* [ ] 支持检索生成结果与原始LLM生成结果对比
* [ ] 支持模型问答与检索问答
* [ ] 检索结果过滤与排序
* [x] 互联网检索结果接入
* [ ] 模型初始化有问题

68
app.py
View File

@ -1,6 +1,7 @@
import os
import shutil
from app_modules.overwrites import postprocess
from app_modules.presets import *
from clc.langchain_application import LangChainApplication
@ -8,15 +9,16 @@ from clc.langchain_application import LangChainApplication
# 修改成自己的配置!!!
class LangChainCFG:
llm_model_name = 'THUDM/chatglm-6b-int4-qe' # 本地模型文件 or huggingface远程仓库
embedding_model_name = 'GanymedeNil/text2vec-base-chinese' # 检索模型文件 or huggingface远程仓库
embedding_model_name = 'GanymedeNil/text2vec-large-chinese' # 检索模型文件 or huggingface远程仓库
vector_store_path = './cache'
docs_path = './docs'
kg_vector_stores = {
'中文维基百科': './cache/zh_wikipedia',
'大规模金融研报知识图谱': '.cache/financial_research_reports',
'初始化知识库': '.cache',
'大规模金融研报': './cache/financial_research_reports',
'初始化': './cache',
} # 可以替换成自己的知识库如果没有需要设置为None
# kg_vector_stores=None
patterns = ['模型问答', '知识库问答'] #
config = LangChainCFG()
@ -61,6 +63,7 @@ def predict(input,
embedding_model,
top_k,
use_web,
use_pattern,
history=None):
# print(large_language_model, embedding_model)
print(input)
@ -71,6 +74,14 @@ def predict(input,
web_content = application.source_service.search_web(query=input)
else:
web_content = ''
search_text = ''
if use_pattern == '模型问答':
result = application.get_llm_answer(query=input, web_content=web_content)
history.append((input, result))
search_text += web_content
return '', history, history, search_text
else:
resp = application.get_knowledge_based_answer(
query=input,
history_len=1,
@ -81,7 +92,6 @@ def predict(input,
chat_history=history
)
history.append((input, resp['result']))
search_text = ''
for idx, source in enumerate(resp['source_documents'][:4]):
sep = f'----------【搜索结果{idx + 1}:】---------------\n'
search_text += f'{sep}\n{source.page_content}\n\n'
@ -121,28 +131,35 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
step=1,
label="检索top-k文档",
interactive=True)
kg_name = gr.Radio(['中文维基百科',
'大规模金融研报知识图谱',
'初始化知识库'
],
label="知识库",
value='初始化知识库',
interactive=True)
set_kg_btn = gr.Button("重新加载知识库")
use_web = gr.Radio(["使用", "不使用"], label="web search",
info="是否使用网络搜索,使用时确保网络通常",
value="不使用"
)
use_pattern = gr.Radio(
[
'模型问答',
'知识库问答',
],
label="模式",
value='模型问答',
interactive=True)
kg_name = gr.Radio(['中文维基百科',
'大规模金融研报知识图谱',
'初始化知识库'
],
label="知识库",
value=None,
info="使用知识库问答,请加载知识库",
interactive=True)
set_kg_btn = gr.Button("加载知识库")
file = gr.File(label="将文件上传到知识库库,内容要尽量匹配",
visible=True,
file_types=['.txt', '.md', '.docx', '.pdf']
)
file.upload(upload_file,
inputs=file,
outputs=None)
with gr.Column(scale=4):
with gr.Row():
chatbot = gr.Chatbot(label='Chinese-LangChain').style(height=400)
@ -159,6 +176,10 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
with gr.Column(scale=2):
search = gr.Textbox(label='搜索结果')
# ============= 触发动作=============
file.upload(upload_file,
inputs=file,
outputs=None)
set_kg_btn.click(
set_knowledge,
show_progress=True,
@ -168,9 +189,12 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
# 发送按钮 提交
send.click(predict,
inputs=[
message, large_language_model,
embedding_model, top_k, use_web,
message,
large_language_model,
embedding_model,
top_k,
use_web,
use_pattern,
state
],
outputs=[message, chatbot, state, search])
@ -184,8 +208,12 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
# 输入框 回车
message.submit(predict,
inputs=[
message, large_language_model,
embedding_model, top_k, use_web,
message,
large_language_model,
embedding_model,
top_k,
use_web,
use_pattern,
state
],
outputs=[message, chatbot, state, search])

View File

@ -1,5 +1,5 @@
:root {
--chatbot-color-light: #F3F3F3;
--chatbot-color-light: rgba(255, 255, 255, 0.08);
--chatbot-color-dark: #121111;
}
@ -40,7 +40,7 @@ ol:not(.options), ul:not(.options) {
color: #000000 !important;
}
[data-testid = "bot"] {
background-color: #FFFFFF !important;
background-color: rgba(255, 255, 255, 0.08) !important;
}
[data-testid = "user"] {
background-color: #95EC69 !important;
@ -49,7 +49,7 @@ ol:not(.options), ul:not(.options) {
/* Dark mode */
.dark #chuanhu_chatbot {
background-color: var(--chatbot-color-dark) !important;
color: #FFFFFF !important;
color: rgba(255, 255, 255, 0.08) !important;
}
.dark [data-testid = "bot"] {
background-color: #2C2C2C !important;

View File

@ -12,7 +12,7 @@
class LangChainCFG:
llm_model_name = 'chatglm-6b' # 本地模型文件 or huggingface远程仓库
embedding_model_name = 'text2vec-large-chinese' # 检索模型文件 or huggingface远程仓库
llm_model_name = 'THUDM/chatglm-6b-int4-qe' # 本地模型文件 or huggingface远程仓库
embedding_model_name = 'GanymedeNil/text2vec-large-chinese' # 检索模型文件 or huggingface远程仓库
vector_store_path = '.'
docs_path = './docs'

View File

@ -9,10 +9,10 @@
@software: PyCharm
@description: coding..
"""
from langchain.chains import RetrievalQA
from langchain.prompts.prompt import PromptTemplate
from clc.config import LangChainCFG
from clc.gpt_service import ChatGLMService
from clc.source_service import SourceService
@ -23,15 +23,16 @@ class LangChainApplication(object):
self.llm_service = ChatGLMService()
self.llm_service.load_model(model_name_or_path=self.config.llm_model_name)
self.source_service = SourceService(config)
if self.config.kg_vector_stores is None:
print("init a source vector store")
self.source_service.init_source_vector()
else:
print("load zh_wikipedia source vector store ")
try:
self.source_service.load_vector_store(self.config.kg_vector_stores['初始化知识库'])
except Exception as e:
self.source_service.init_source_vector()
# if self.config.kg_vector_stores is None:
# print("init a source vector store")
# self.source_service.init_source_vector()
# else:
# print("load zh_wikipedia source vector store ")
# try:
# self.source_service.load_vector_store(self.config.kg_vector_stores['初始化知识库'])
# except Exception as e:
# self.source_service.init_source_vector()
def get_knowledge_based_answer(self, query,
history_len=5,
@ -75,11 +76,22 @@ class LangChainApplication(object):
result = knowledge_chain({"query": query})
return result
# if __name__ == '__main__':
# config = LangChainCFG()
# application = LangChainApplication(config)
def get_llm_answer(self, query='', web_content=''):
if web_content:
prompt = f'基于网络检索内容:{web_content},回答以下问题{query}'
else:
prompt = query
result = self.llm_service._call(prompt)
return result
if __name__ == '__main__':
config = LangChainCFG()
application = LangChainApplication(config)
# result = application.get_knowledge_based_answer('马保国是谁')
# print(result)
# application.source_service.add_document('/home/searchgpt/yq/Knowledge-ChatGLM/docs/added/马保国.txt')
# result = application.get_knowledge_based_answer('马保国是谁')
# print(result)
result = application.get_llm_answer('马保国是谁')
print(result)

View File

@ -13,7 +13,6 @@
import os
from duckduckgo_search import ddg
from duckduckgo_search.utils import SESSION
from langchain.document_loaders import UnstructuredFileLoader
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
@ -61,12 +60,16 @@ class SourceService(object):
# "http": f"socks5h://localhost:7890",
# "https": f"socks5h://localhost:7890"
# }
try:
results = ddg(query)
web_content = ''
if results:
for result in results:
web_content += result['body']
return web_content
except Exception as e:
print(f"网络检索异常:{query}")
return ''
# if __name__ == '__main__':
# config = LangChainCFG()
# source_service = SourceService(config)

66
main.py
View File

@ -1,6 +1,7 @@
import os
import shutil
from app_modules.overwrites import postprocess
from app_modules.presets import *
from clc.langchain_application import LangChainApplication
@ -13,10 +14,11 @@ class LangChainCFG:
docs_path = './docs'
kg_vector_stores = {
'中文维基百科': './cache/zh_wikipedia',
'大规模金融研报知识图谱': '.cache/financial_research_reports',
'初始化知识库': '.cache',
'大规模金融研报': './cache/financial_research_reports',
'初始化': './cache',
} # 可以替换成自己的知识库如果没有需要设置为None
# kg_vector_stores=None
patterns = ['模型问答', '知识库问答'] #
config = LangChainCFG()
@ -61,6 +63,7 @@ def predict(input,
embedding_model,
top_k,
use_web,
use_pattern,
history=None):
# print(large_language_model, embedding_model)
print(input)
@ -71,6 +74,14 @@ def predict(input,
web_content = application.source_service.search_web(query=input)
else:
web_content = ''
search_text = ''
if use_pattern == '模型问答':
result = application.get_llm_answer(query=input, web_content=web_content)
history.append((input, result))
search_text += web_content
return '', history, history, search_text
else:
resp = application.get_knowledge_based_answer(
query=input,
history_len=1,
@ -81,7 +92,6 @@ def predict(input,
chat_history=history
)
history.append((input, resp['result']))
search_text = ''
for idx, source in enumerate(resp['source_documents'][:4]):
sep = f'----------【搜索结果{idx + 1}:】---------------\n'
search_text += f'{sep}\n{source.page_content}\n\n'
@ -121,28 +131,35 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
step=1,
label="检索top-k文档",
interactive=True)
kg_name = gr.Radio(['中文维基百科',
'大规模金融研报知识图谱',
'初始化知识库'
],
label="知识库",
value='初始化知识库',
interactive=True)
set_kg_btn = gr.Button("重新加载知识库")
use_web = gr.Radio(["使用", "不使用"], label="web search",
info="是否使用网络搜索,使用时确保网络通常",
value="不使用"
)
use_pattern = gr.Radio(
[
'模型问答',
'知识库问答',
],
label="模式",
value='模型问答',
interactive=True)
kg_name = gr.Radio(['中文维基百科',
'大规模金融研报知识图谱',
'初始化知识库'
],
label="知识库",
value=None,
info="使用知识库问答,请加载知识库",
interactive=True)
set_kg_btn = gr.Button("加载知识库")
file = gr.File(label="将文件上传到知识库库,内容要尽量匹配",
visible=True,
file_types=['.txt', '.md', '.docx', '.pdf']
)
file.upload(upload_file,
inputs=file,
outputs=None)
with gr.Column(scale=4):
with gr.Row():
chatbot = gr.Chatbot(label='Chinese-LangChain').style(height=400)
@ -159,6 +176,10 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
with gr.Column(scale=2):
search = gr.Textbox(label='搜索结果')
# ============= 触发动作=============
file.upload(upload_file,
inputs=file,
outputs=None)
set_kg_btn.click(
set_knowledge,
show_progress=True,
@ -168,9 +189,12 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
# 发送按钮 提交
send.click(predict,
inputs=[
message, large_language_model,
embedding_model, top_k, use_web,
message,
large_language_model,
embedding_model,
top_k,
use_web,
use_pattern,
state
],
outputs=[message, chatbot, state, search])
@ -184,8 +208,12 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
# 输入框 回车
message.submit(predict,
inputs=[
message, large_language_model,
embedding_model, top_k, use_web,
message,
large_language_model,
embedding_model,
top_k,
use_web,
use_pattern,
state
],
outputs=[message, chatbot, state, search])