66 lines
2.2 KiB
Python
66 lines
2.2 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding:utf-8 _*-
|
|
"""
|
|
@author:quincy qiang
|
|
@license: Apache Licence
|
|
@file: search.py
|
|
@time: 2023/04/17
|
|
@contact: yanqiangmiffy@gamil.com
|
|
@software: PyCharm
|
|
@description: coding..
|
|
"""
|
|
|
|
import os
|
|
|
|
from langchain.document_loaders import UnstructuredFileLoader
|
|
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
|
from langchain.vectorstores import FAISS
|
|
|
|
|
|
class SourceService(object):
|
|
def __init__(self, config):
|
|
self.config = config
|
|
self.embeddings = HuggingFaceEmbeddings(model_name=self.config.embedding_model_name)
|
|
self.docs_path = self.config.docs_path
|
|
self.vector_store_path = self.config.vector_store_path
|
|
|
|
def init_source_vector(self):
|
|
"""
|
|
初始化本地知识库向量
|
|
:return:
|
|
"""
|
|
docs = []
|
|
for doc in os.listdir(self.docs_path):
|
|
if doc.endswith('.txt'):
|
|
print(doc)
|
|
loader = UnstructuredFileLoader(f'{self.docs_path}/{doc}', mode="elements")
|
|
doc = loader.load()
|
|
docs.extend(doc)
|
|
self.vector_store = FAISS.from_documents(docs, self.embeddings)
|
|
self.vector_store.save_local(self.vector_store_path)
|
|
|
|
def add_document(self, document_path):
|
|
loader = UnstructuredFileLoader(document_path, mode="elements")
|
|
doc = loader.load()
|
|
self.vector_store.add_documents(doc)
|
|
self.vector_store.save_local(self.vector_store_path)
|
|
|
|
def load_vector_store(self):
|
|
self.vector_store = FAISS.load_local(self.vector_store_path, self.embeddings)
|
|
return self.vector_store
|
|
|
|
# if __name__ == '__main__':
|
|
# config = LangChainCFG()
|
|
# source_service = SourceService(config)
|
|
# source_service.init_source_vector()
|
|
# search_result = source_service.vector_store.similarity_search_with_score('科比')
|
|
# print(search_result)
|
|
#
|
|
# source_service.add_document('/home/searchgpt/yq/Knowledge-ChatGLM/docs/added/科比.txt')
|
|
# search_result = source_service.vector_store.similarity_search_with_score('科比')
|
|
# print(search_result)
|
|
#
|
|
# vector_store=source_service.load_vector_store()
|
|
# search_result = source_service.vector_store.similarity_search_with_score('科比')
|
|
# print(search_result)
|