基于LangChain的RAG应用开发（04）-不使用LangChain的链实现历史消息的RAG应用

摘要

在上文中，学习了使用内置链、LCEL、Agent的方式构建带有历史消息记录的RAG应用。除了基于Agent的构建方式外，其他两种构建方式大体流程类似。基本都是先构建Prompt模板，然后使用LCEL或内置链的形式，将大模型、Prompt等组装起来，无论是LCEL或内置链，都是基于LangChain的链去完成的。

为了进一步理解各步骤执行情况，本文将不使用LangChain的链去构建RAG模型，进一步去体会。

LLM类

简单实现LLM的初始化，并带有返回模型的功能，模型包括：LLM、Embedding。代码如下：

from langchain_openai import ChatOpenAIfrom langchain_community.embeddings import ZhipuAIEmbeddingsfrom dotenv import load_dotenvimport osload_dotenv()class LLMModel:    def __init__(self):        self.api_key = os.getenv("api_key")        self.base_url = os.getenv("base_url")        self.zhipu_api_key = os.getenv("ZHIPU_API_KEY")        self.zhipu_embedding_model = os.getenv("ZHIPU_EMBEDDING_MODEL")    def get_llm_model(self):        return ChatOpenAI(model = "deepseek-chat", api_key = self.api_key, base_url = self.base_url)    def get_embedding_model(self):        return ZhipuAIEmbeddings(api_key = self.zhipu_api_key, model = self.zhipu_embedding_model)

向量库Chroma类

初始化向量库，同时需要具备获取检索器，文档的增、删功能，具体如下：

from langchain_chroma import Chromafrom llm_model import LLMModelfrom langchain_text_splitters import RecursiveCharacterTextSplitterfrom langchain_community.document_loaders import PyPDFLoaderclass ChromaDB:    def __init__(self):        self.db = Chroma(            embedding_function=LLMModel().get_embedding_model(),            persist_directory="./chroma_db"        )# 获取检索器    def get_retriever(self):        return self.db.as_retriever(            search_type = "mmr"        )    def add_docs(self,docs):        for doc in docs:            self.db.add_documents([doc])    # 加载PDF文档并向量化存储    def load_save_documents(self,file_path):        # todo 验证路径合法性        loader = PyPDFLoader(file_path)        spliter = RecursiveCharacterTextSplitter(            chunk_size=100,            chunk_overlap=20,            is_separator_regex=True,            length_function=len        )        docs = []        for doc in loader.lazy_load():            docs.append(doc)        doc_after_split = spliter.split_documents(docs)        self.add_docs(doc_after_split)# 根据ID删除文档    def delete_docs(self,ids):        self.db.delete(ids)

构建历史消息持久化存储

不使用LangChain的BaseChatMessageHistory类来存储历史消息，使用自定义类存储，此处简单使用csv文件进行演示，实现历史消息存储和查询功能，使用session_id区分会话。代码如下：

import osimport pandas as pdclass ChatHistory:    def __init__(self, chat_history_file_path='./chat_history.csv'):        self.chat_history_file_path = chat_history_file_path    def save_chat_history(self,session_id, user_message,bot_message,):        """        保存聊天历史        :param chat_history_file_path: 聊天历史存储地址        :param user_message: 用户输入的消息        :param bot_message: llm回复的消息        :return:        """        df = pd.DataFrame({'session_id':str(session_id),'user_message': [user_message], 'bot_message': [bot_message]})        if os.path.exists(self.chat_history_file_path):            df.to_csv(self.chat_history_file_path, mode='a', header=False, index=False)        else:            df.to_csv(self.chat_history_file_path, mode='w', header=True, index=False)    # 根据session_id查询对话历史信息    def load_chat_history(self, session_id):        """        根据session_id查询对话历史信息        :param session_id: 会话ID        :return:        """        if os.path.exists(self.chat_history_file_path):            df = pd.read_csv(self.chat_history_file_path)            df = df.loc[df['session_id'] == str(session_id)]        else:            df = pd.DataFrame(columns=['session_id', 'user_message', 'bot_message'])        return df

构建带有历史消息的ChatBot类

自定义问答ChatBot类，具备保存、查询历史消息，基于历史消息进行问答的功能。代码如下：

from llm_model import LLMModelfrom chat_history import ChatHistoryfrom langchain_core.messages import HumanMessage,SystemMessageclass ChatBot:    def __init__(self):        self.llm_model = LLMModel()        self.ch = ChatHistory()        self.llm = self.llm_model.get_llm_model()    def add_chat_record(self, session_id, question, answer):        self.ch.save_chat_history(session_id, question, answer)    def get_chat_record(self, session_id):        history_pd = self.ch.load_chat_history(session_id)        history = []        if len(history_pd) > 0:            for _, row in history_pd.iterrows():                list_ = row.tolist()                dic_ = {"human_message": list_[1], "ai_meaasge": list_[2]}                history.append(dic_)        return history    def get_answer_with_session(self, session_id, question):        history = self.get_chat_record(session_id)        self.system_prompt = f"你是一个深度学习的专家，你需要根据用户的输入，以及用户上下文：{history}，理解用户问题，给出通俗易懂，且简洁的回复。如果用户上下文不存在，可以忽略上下文理解用户问题。如果用户问题与上下文没有关联密切关联，可以忽略上下文理解问题。"        response = self.llm.invoke([HumanMessage(content = question), SystemMessage(content = self.system_prompt)])        bot_message = response.content        self.add_chat_record(session_id, question, bot_message)        return response

总结：
在上述代码中，并没有使用到LangChain的链机制，在get_answer_with_session方法中进行了手动实现数据流转。

构建带有历史消息的RAG应用

将ChatBot作为基类，重写get_answer_with_session方法，实现知识库检索功能。代码如下：

from chat_bot import ChatBotfrom langchain_core.messages import HumanMessage,SystemMessagefrom chroma_db import ChromaDBclass RAGChatBot(ChatBot):    def __init__(self):        super().__init__()        self.chroma_db = ChromaDB()        self.retriever = self.chroma_db.get_retriever()    def get_context(self, question, session_id):        print("开始查询历史消息记录！")        history = self.get_chat_record(session_id)        print(f"历史记录为：{history}")        print("———————————————————————————————————————")        self.contextualize_system_prompt = f"给定一个聊天历史和最新的用户问题，该最新的用户问题可能需要引用聊天历史中的上下文，制定一个独立的问题，" \                                           f"可以在没有聊天历史的情况下理解。不要回答任何问题，只要在需要时重新表述最新问题，否则就原样返回。聊天历史如下：{history}"        print("开始重构用户问题！")        response = self.llm.invoke([HumanMessage(content=question), SystemMessage(content=self.contextualize_system_prompt)])        print(f"重构后的用户提问为：{response.content}")        print("———————————————————————————————————————")        print("开始查询知识库！")        context = self.retriever.invoke(response.content)        return context    def get_answer_with_session(self, session_id, question):        """            根据session_id和question，获取answer        """        history = self.get_chat_record(session_id)        context = self.get_context(question, session_id)        context_format = "\n\n".join(doc.page_content for doc in context)        print(f"知识库查询结果为：{context}")        print("———————————————————————————————————————")        self.system_prompt = f"你是一个基于深度学习数据融合的专家，你需要根据用户的输入、用户历史会话：{history}、以及用户上下文：{context_format}，理解用户问题，给出通俗易懂，且简洁的回复。"        response = self.llm.invoke([HumanMessage(content=question), SystemMessage(content=self.system_prompt)])        self.add_chat_record(session_id, question, response.content)        return response

原文地址：https://www.cnblogs.com/AfroNicky/p/18910831

摘要

LLM类

向量库Chroma类

构建历史消息持久化存储

构建带有历史消息的ChatBot类

构建带有历史消息的RAG应用

Fish AI Reader

FishAI

联系邮箱 441953276@qq.com

相关标签