feature@添加问答模式选择
This commit is contained in:
parent
57f12d9a21
commit
65d97b14b4
|
@ -8,6 +8,7 @@ colorTo: yellow
|
|||
pinned: true
|
||||
app_file: app.py
|
||||
---
|
||||
|
||||
# Chinese-LangChain
|
||||
|
||||
> Chinese-LangChain:中文langchain项目,基于ChatGLM-6b+langchain实现本地化知识库检索与智能答案生成
|
||||
|
@ -55,6 +56,8 @@ python main.py
|
|||
|
||||
## 🚀 特性
|
||||
|
||||
- 📝 2023/04/20 支持模型问答与检索问答模式切换
|
||||
- 📝 2023/04/20 感谢HF官方提供免费算力,添加HuggingFace Spaces在线体验[[🤗 DEMO](https://huggingface.co/spaces/ChallengeHub/Chinese-LangChain)
|
||||
- 📝 2023/04/19 发布45万Wikipedia的文本预处理语料以及FAISS索引向量
|
||||
- 🐯 2023/04/19 引入ChuanhuChatGPT皮肤
|
||||
- 📱 2023/04/19 增加web search功能,需要确保网络畅通!(感谢[@wanghao07456](https://github.com/wanghao07456),提供的idea)
|
||||
|
@ -87,6 +90,7 @@ python main.py
|
|||
* [x] 支持加载不同知识库
|
||||
* [x] 支持检索结果与LLM生成结果对比
|
||||
* [ ] 支持检索生成结果与原始LLM生成结果对比
|
||||
* [ ] 支持模型问答与检索问答
|
||||
* [ ] 检索结果过滤与排序
|
||||
* [x] 互联网检索结果接入
|
||||
* [ ] 模型初始化有问题
|
||||
|
|
68
app.py
68
app.py
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from app_modules.overwrites import postprocess
|
||||
from app_modules.presets import *
|
||||
from clc.langchain_application import LangChainApplication
|
||||
|
||||
|
@ -8,15 +9,16 @@ from clc.langchain_application import LangChainApplication
|
|||
# 修改成自己的配置!!!
|
||||
class LangChainCFG:
|
||||
llm_model_name = 'THUDM/chatglm-6b-int4-qe' # 本地模型文件 or huggingface远程仓库
|
||||
embedding_model_name = 'GanymedeNil/text2vec-base-chinese' # 检索模型文件 or huggingface远程仓库
|
||||
embedding_model_name = 'GanymedeNil/text2vec-large-chinese' # 检索模型文件 or huggingface远程仓库
|
||||
vector_store_path = './cache'
|
||||
docs_path = './docs'
|
||||
kg_vector_stores = {
|
||||
'中文维基百科': './cache/zh_wikipedia',
|
||||
'大规模金融研报知识图谱': '.cache/financial_research_reports',
|
||||
'初始化知识库': '.cache',
|
||||
'大规模金融研报': './cache/financial_research_reports',
|
||||
'初始化': './cache',
|
||||
} # 可以替换成自己的知识库,如果没有需要设置为None
|
||||
# kg_vector_stores=None
|
||||
patterns = ['模型问答', '知识库问答'] #
|
||||
|
||||
|
||||
config = LangChainCFG()
|
||||
|
@ -61,6 +63,7 @@ def predict(input,
|
|||
embedding_model,
|
||||
top_k,
|
||||
use_web,
|
||||
use_pattern,
|
||||
history=None):
|
||||
# print(large_language_model, embedding_model)
|
||||
print(input)
|
||||
|
@ -71,6 +74,14 @@ def predict(input,
|
|||
web_content = application.source_service.search_web(query=input)
|
||||
else:
|
||||
web_content = ''
|
||||
search_text = ''
|
||||
if use_pattern == '模型问答':
|
||||
result = application.get_llm_answer(query=input, web_content=web_content)
|
||||
history.append((input, result))
|
||||
search_text += web_content
|
||||
return '', history, history, search_text
|
||||
|
||||
else:
|
||||
resp = application.get_knowledge_based_answer(
|
||||
query=input,
|
||||
history_len=1,
|
||||
|
@ -81,7 +92,6 @@ def predict(input,
|
|||
chat_history=history
|
||||
)
|
||||
history.append((input, resp['result']))
|
||||
search_text = ''
|
||||
for idx, source in enumerate(resp['source_documents'][:4]):
|
||||
sep = f'----------【搜索结果{idx + 1}:】---------------\n'
|
||||
search_text += f'{sep}\n{source.page_content}\n\n'
|
||||
|
@ -121,28 +131,35 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|||
step=1,
|
||||
label="检索top-k文档",
|
||||
interactive=True)
|
||||
kg_name = gr.Radio(['中文维基百科',
|
||||
'大规模金融研报知识图谱',
|
||||
'初始化知识库'
|
||||
],
|
||||
label="知识库",
|
||||
value='初始化知识库',
|
||||
interactive=True)
|
||||
set_kg_btn = gr.Button("重新加载知识库")
|
||||
|
||||
use_web = gr.Radio(["使用", "不使用"], label="web search",
|
||||
info="是否使用网络搜索,使用时确保网络通常",
|
||||
value="不使用"
|
||||
)
|
||||
use_pattern = gr.Radio(
|
||||
[
|
||||
'模型问答',
|
||||
'知识库问答',
|
||||
],
|
||||
label="模式",
|
||||
value='模型问答',
|
||||
interactive=True)
|
||||
|
||||
kg_name = gr.Radio(['中文维基百科',
|
||||
'大规模金融研报知识图谱',
|
||||
'初始化知识库'
|
||||
],
|
||||
label="知识库",
|
||||
value=None,
|
||||
info="使用知识库问答,请加载知识库",
|
||||
interactive=True)
|
||||
set_kg_btn = gr.Button("加载知识库")
|
||||
|
||||
file = gr.File(label="将文件上传到知识库库,内容要尽量匹配",
|
||||
visible=True,
|
||||
file_types=['.txt', '.md', '.docx', '.pdf']
|
||||
)
|
||||
|
||||
file.upload(upload_file,
|
||||
inputs=file,
|
||||
outputs=None)
|
||||
with gr.Column(scale=4):
|
||||
with gr.Row():
|
||||
chatbot = gr.Chatbot(label='Chinese-LangChain').style(height=400)
|
||||
|
@ -159,6 +176,10 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|||
with gr.Column(scale=2):
|
||||
search = gr.Textbox(label='搜索结果')
|
||||
|
||||
# ============= 触发动作=============
|
||||
file.upload(upload_file,
|
||||
inputs=file,
|
||||
outputs=None)
|
||||
set_kg_btn.click(
|
||||
set_knowledge,
|
||||
show_progress=True,
|
||||
|
@ -168,9 +189,12 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|||
# 发送按钮 提交
|
||||
send.click(predict,
|
||||
inputs=[
|
||||
message, large_language_model,
|
||||
embedding_model, top_k, use_web,
|
||||
|
||||
message,
|
||||
large_language_model,
|
||||
embedding_model,
|
||||
top_k,
|
||||
use_web,
|
||||
use_pattern,
|
||||
state
|
||||
],
|
||||
outputs=[message, chatbot, state, search])
|
||||
|
@ -184,8 +208,12 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|||
# 输入框 回车
|
||||
message.submit(predict,
|
||||
inputs=[
|
||||
message, large_language_model,
|
||||
embedding_model, top_k, use_web,
|
||||
message,
|
||||
large_language_model,
|
||||
embedding_model,
|
||||
top_k,
|
||||
use_web,
|
||||
use_pattern,
|
||||
state
|
||||
],
|
||||
outputs=[message, chatbot, state, search])
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
:root {
|
||||
--chatbot-color-light: #F3F3F3;
|
||||
--chatbot-color-light: rgba(255, 255, 255, 0.08);
|
||||
--chatbot-color-dark: #121111;
|
||||
}
|
||||
|
||||
|
@ -40,7 +40,7 @@ ol:not(.options), ul:not(.options) {
|
|||
color: #000000 !important;
|
||||
}
|
||||
[data-testid = "bot"] {
|
||||
background-color: #FFFFFF !important;
|
||||
background-color: rgba(255, 255, 255, 0.08) !important;
|
||||
}
|
||||
[data-testid = "user"] {
|
||||
background-color: #95EC69 !important;
|
||||
|
@ -49,7 +49,7 @@ ol:not(.options), ul:not(.options) {
|
|||
/* Dark mode */
|
||||
.dark #chuanhu_chatbot {
|
||||
background-color: var(--chatbot-color-dark) !important;
|
||||
color: #FFFFFF !important;
|
||||
color: rgba(255, 255, 255, 0.08) !important;
|
||||
}
|
||||
.dark [data-testid = "bot"] {
|
||||
background-color: #2C2C2C !important;
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
|
||||
class LangChainCFG:
|
||||
llm_model_name = 'chatglm-6b' # 本地模型文件 or huggingface远程仓库
|
||||
embedding_model_name = 'text2vec-large-chinese' # 检索模型文件 or huggingface远程仓库
|
||||
llm_model_name = 'THUDM/chatglm-6b-int4-qe' # 本地模型文件 or huggingface远程仓库
|
||||
embedding_model_name = 'GanymedeNil/text2vec-large-chinese' # 检索模型文件 or huggingface远程仓库
|
||||
vector_store_path = '.'
|
||||
docs_path = './docs'
|
||||
|
|
|
@ -9,10 +9,10 @@
|
|||
@software: PyCharm
|
||||
@description: coding..
|
||||
"""
|
||||
|
||||
from langchain.chains import RetrievalQA
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
|
||||
from clc.config import LangChainCFG
|
||||
from clc.gpt_service import ChatGLMService
|
||||
from clc.source_service import SourceService
|
||||
|
||||
|
@ -23,15 +23,16 @@ class LangChainApplication(object):
|
|||
self.llm_service = ChatGLMService()
|
||||
self.llm_service.load_model(model_name_or_path=self.config.llm_model_name)
|
||||
self.source_service = SourceService(config)
|
||||
if self.config.kg_vector_stores is None:
|
||||
print("init a source vector store")
|
||||
self.source_service.init_source_vector()
|
||||
else:
|
||||
print("load zh_wikipedia source vector store ")
|
||||
try:
|
||||
self.source_service.load_vector_store(self.config.kg_vector_stores['初始化知识库'])
|
||||
except Exception as e:
|
||||
self.source_service.init_source_vector()
|
||||
|
||||
# if self.config.kg_vector_stores is None:
|
||||
# print("init a source vector store")
|
||||
# self.source_service.init_source_vector()
|
||||
# else:
|
||||
# print("load zh_wikipedia source vector store ")
|
||||
# try:
|
||||
# self.source_service.load_vector_store(self.config.kg_vector_stores['初始化知识库'])
|
||||
# except Exception as e:
|
||||
# self.source_service.init_source_vector()
|
||||
|
||||
def get_knowledge_based_answer(self, query,
|
||||
history_len=5,
|
||||
|
@ -75,11 +76,22 @@ class LangChainApplication(object):
|
|||
result = knowledge_chain({"query": query})
|
||||
return result
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# config = LangChainCFG()
|
||||
# application = LangChainApplication(config)
|
||||
def get_llm_answer(self, query='', web_content=''):
|
||||
if web_content:
|
||||
prompt = f'基于网络检索内容:{web_content},回答以下问题{query}'
|
||||
else:
|
||||
prompt = query
|
||||
result = self.llm_service._call(prompt)
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
config = LangChainCFG()
|
||||
application = LangChainApplication(config)
|
||||
# result = application.get_knowledge_based_answer('马保国是谁')
|
||||
# print(result)
|
||||
# application.source_service.add_document('/home/searchgpt/yq/Knowledge-ChatGLM/docs/added/马保国.txt')
|
||||
# result = application.get_knowledge_based_answer('马保国是谁')
|
||||
# print(result)
|
||||
result = application.get_llm_answer('马保国是谁')
|
||||
print(result)
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
import os
|
||||
|
||||
from duckduckgo_search import ddg
|
||||
from duckduckgo_search.utils import SESSION
|
||||
from langchain.document_loaders import UnstructuredFileLoader
|
||||
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
||||
from langchain.vectorstores import FAISS
|
||||
|
@ -61,12 +60,16 @@ class SourceService(object):
|
|||
# "http": f"socks5h://localhost:7890",
|
||||
# "https": f"socks5h://localhost:7890"
|
||||
# }
|
||||
try:
|
||||
results = ddg(query)
|
||||
web_content = ''
|
||||
if results:
|
||||
for result in results:
|
||||
web_content += result['body']
|
||||
return web_content
|
||||
except Exception as e:
|
||||
print(f"网络检索异常:{query}")
|
||||
return ''
|
||||
# if __name__ == '__main__':
|
||||
# config = LangChainCFG()
|
||||
# source_service = SourceService(config)
|
||||
|
|
66
main.py
66
main.py
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from app_modules.overwrites import postprocess
|
||||
from app_modules.presets import *
|
||||
from clc.langchain_application import LangChainApplication
|
||||
|
||||
|
@ -13,10 +14,11 @@ class LangChainCFG:
|
|||
docs_path = './docs'
|
||||
kg_vector_stores = {
|
||||
'中文维基百科': './cache/zh_wikipedia',
|
||||
'大规模金融研报知识图谱': '.cache/financial_research_reports',
|
||||
'初始化知识库': '.cache',
|
||||
'大规模金融研报': './cache/financial_research_reports',
|
||||
'初始化': './cache',
|
||||
} # 可以替换成自己的知识库,如果没有需要设置为None
|
||||
# kg_vector_stores=None
|
||||
patterns = ['模型问答', '知识库问答'] #
|
||||
|
||||
|
||||
config = LangChainCFG()
|
||||
|
@ -61,6 +63,7 @@ def predict(input,
|
|||
embedding_model,
|
||||
top_k,
|
||||
use_web,
|
||||
use_pattern,
|
||||
history=None):
|
||||
# print(large_language_model, embedding_model)
|
||||
print(input)
|
||||
|
@ -71,6 +74,14 @@ def predict(input,
|
|||
web_content = application.source_service.search_web(query=input)
|
||||
else:
|
||||
web_content = ''
|
||||
search_text = ''
|
||||
if use_pattern == '模型问答':
|
||||
result = application.get_llm_answer(query=input, web_content=web_content)
|
||||
history.append((input, result))
|
||||
search_text += web_content
|
||||
return '', history, history, search_text
|
||||
|
||||
else:
|
||||
resp = application.get_knowledge_based_answer(
|
||||
query=input,
|
||||
history_len=1,
|
||||
|
@ -81,7 +92,6 @@ def predict(input,
|
|||
chat_history=history
|
||||
)
|
||||
history.append((input, resp['result']))
|
||||
search_text = ''
|
||||
for idx, source in enumerate(resp['source_documents'][:4]):
|
||||
sep = f'----------【搜索结果{idx + 1}:】---------------\n'
|
||||
search_text += f'{sep}\n{source.page_content}\n\n'
|
||||
|
@ -121,28 +131,35 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|||
step=1,
|
||||
label="检索top-k文档",
|
||||
interactive=True)
|
||||
kg_name = gr.Radio(['中文维基百科',
|
||||
'大规模金融研报知识图谱',
|
||||
'初始化知识库'
|
||||
],
|
||||
label="知识库",
|
||||
value='初始化知识库',
|
||||
interactive=True)
|
||||
set_kg_btn = gr.Button("重新加载知识库")
|
||||
|
||||
use_web = gr.Radio(["使用", "不使用"], label="web search",
|
||||
info="是否使用网络搜索,使用时确保网络通常",
|
||||
value="不使用"
|
||||
)
|
||||
use_pattern = gr.Radio(
|
||||
[
|
||||
'模型问答',
|
||||
'知识库问答',
|
||||
],
|
||||
label="模式",
|
||||
value='模型问答',
|
||||
interactive=True)
|
||||
|
||||
kg_name = gr.Radio(['中文维基百科',
|
||||
'大规模金融研报知识图谱',
|
||||
'初始化知识库'
|
||||
],
|
||||
label="知识库",
|
||||
value=None,
|
||||
info="使用知识库问答,请加载知识库",
|
||||
interactive=True)
|
||||
set_kg_btn = gr.Button("加载知识库")
|
||||
|
||||
file = gr.File(label="将文件上传到知识库库,内容要尽量匹配",
|
||||
visible=True,
|
||||
file_types=['.txt', '.md', '.docx', '.pdf']
|
||||
)
|
||||
|
||||
file.upload(upload_file,
|
||||
inputs=file,
|
||||
outputs=None)
|
||||
with gr.Column(scale=4):
|
||||
with gr.Row():
|
||||
chatbot = gr.Chatbot(label='Chinese-LangChain').style(height=400)
|
||||
|
@ -159,6 +176,10 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|||
with gr.Column(scale=2):
|
||||
search = gr.Textbox(label='搜索结果')
|
||||
|
||||
# ============= 触发动作=============
|
||||
file.upload(upload_file,
|
||||
inputs=file,
|
||||
outputs=None)
|
||||
set_kg_btn.click(
|
||||
set_knowledge,
|
||||
show_progress=True,
|
||||
|
@ -168,9 +189,12 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|||
# 发送按钮 提交
|
||||
send.click(predict,
|
||||
inputs=[
|
||||
message, large_language_model,
|
||||
embedding_model, top_k, use_web,
|
||||
|
||||
message,
|
||||
large_language_model,
|
||||
embedding_model,
|
||||
top_k,
|
||||
use_web,
|
||||
use_pattern,
|
||||
state
|
||||
],
|
||||
outputs=[message, chatbot, state, search])
|
||||
|
@ -184,8 +208,12 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|||
# 输入框 回车
|
||||
message.submit(predict,
|
||||
inputs=[
|
||||
message, large_language_model,
|
||||
embedding_model, top_k, use_web,
|
||||
message,
|
||||
large_language_model,
|
||||
embedding_model,
|
||||
top_k,
|
||||
use_web,
|
||||
use_pattern,
|
||||
state
|
||||
],
|
||||
outputs=[message, chatbot, state, search])
|
||||
|
|
Loading…
Reference in New Issue