14 Practice Lab: RAG Implementation

🧪 Lab Overview

Duration: 2–3 hours | Difficulty: ⭐⭐⭐☆☆ (Intermediate) Goal: Build a complete RAG-powered document assistant with source citations and evaluation.

14.1 What You’ll Build

A RAG Document Assistant that: - Answers questions from your documents with citations - Evaluates its own answer quality (faithfulness + relevance) - Shows retrieved context alongside answers - Works over PDF, TXT, and Markdown files

14.2 Step 1: Complete RAG Pipeline

# file: rag_assistant.py
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from pathlib import Path
import os

class RAGAssistant:
    """Production-ready RAG document assistant."""

    RAG_PROMPT = ChatPromptTemplate.from_messages([
        ("system", """You are an expert document analyst. Answer questions
based ONLY on the provided context. Always cite your sources.

If the answer is not in the context, say: "I couldn't find information
about this in the provided documents."

Context:
{context}

When citing, use the format: [Source: filename, chunk X]
"""),
        ("human", "{question}")
    ])

    def __init__(self, docs_folder: str, model: str = "gpt-4o-mini"):
        self.llm = ChatOpenAI(model=model, temperature=0)
        self.embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
        self.vectorstore = None
        self._build_index(docs_folder)

    def _load_documents(self, folder: str):
        """Load all documents from folder."""
        documents = []
        for path in Path(folder).rglob("*"):
            if path.suffix == ".pdf":
                documents.extend(PyPDFLoader(str(path)).load())
            elif path.suffix in [".txt", ".md"]:
                documents.extend(TextLoader(str(path)).load())
        return documents

    def _build_index(self, docs_folder: str):
        """Build the vector index."""
        print("📚 Loading documents...")
        docs = self._load_documents(docs_folder)

        print("✂️  Chunking text...")
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=900, chunk_overlap=150
        )
        chunks = splitter.split_documents(docs)

        print(f"🔢 Creating embeddings for {len(chunks)} chunks...")
        self.vectorstore = Chroma.from_documents(
            chunks, self.embeddings,
            persist_directory="./rag_index"
        )
        print(f"✅ Index ready! ({len(chunks)} chunks)")

    def _format_docs_with_sources(self, docs) -> str:
        """Format retrieved docs with source metadata."""
        formatted = []
        for i, doc in enumerate(docs):
            source = doc.metadata.get("source", "Unknown")
            filename = Path(source).name if source != "Unknown" else "Unknown"
            formatted.append(
                f"[Chunk {i+1} | Source: {filename}]\n{doc.page_content}"
            )
        return "\n\n" + "---".join(formatted)

    def ask(self, question: str, k: int = 4) -> dict:
        """Answer a question with full context."""
        retriever = self.vectorstore.as_retriever(
            search_type="mmr",
            search_kwargs={"k": k, "fetch_k": 20}
        )

        retrieved_docs = retriever.invoke(question)
        context = self._format_docs_with_sources(retrieved_docs)

        response = self.llm.invoke(
            self.RAG_PROMPT.format_messages(
                context=context,
                question=question
            )
        )

        return {
            "question": question,
            "answer": response.content,
            "sources": [Path(d.metadata.get("source","")).name
                       for d in retrieved_docs],
            "context": context
        }

    def interactive(self):
        """Start interactive Q&A session."""
        print("\n🤖 RAG Document Assistant")
        print("Commands: 'quit' | 'sources' (show retrieved context)\n")

        show_sources = False
        while True:
            question = input("❓ Ask: ").strip()
            if not question:
                continue
            if question.lower() == "quit":
                break
            if question.lower() == "sources":
                show_sources = not show_sources
                print(f"Source display: {'ON' if show_sources else 'OFF'}")
                continue

            result = self.ask(question)

            print(f"\n💬 Answer:\n{result['answer']}")
            print(f"\n📎 Sources: {', '.join(set(result['sources']))}")

            if show_sources:
                print(f"\n📄 Context:\n{result['context'][:1000]}...")

# Usage
assistant = RAGAssistant("./documents")
assistant.interactive()

14.3 Step 2: RAG Evaluation

Evaluate your RAG system’s quality automatically:

# file: rag_eval.py
from openai import OpenAI

client = OpenAI()

def evaluate_rag_answer(question: str, answer: str, context: str) -> dict:
    """Evaluate RAG answer on faithfulness and relevance."""

    eval_prompt = f"""Evaluate this RAG system response.

Question: {question}
Retrieved Context: {context[:1500]}
Generated Answer: {answer}

Score each dimension 1-5:
1. Faithfulness: Is the answer supported by the context?
2. Relevance: Does the answer address the question?
3. Completeness: Does it cover all aspects of the question?
4. Conciseness: Is it appropriately concise?

Return JSON only:
{{"faithfulness": X, "relevance": X, "completeness": X, "conciseness": X,
  "overall": X, "issues": "list any problems"}}"""

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": eval_prompt}],
        response_format={"type": "json_object"},
        temperature=0
    )
    import json
    return json.loads(response.choices[0].message.content)

# Run evaluation suite
test_questions = [
    "What is the main topic of the documents?",
    "What are the key recommendations?",
    "Are there any limitations mentioned?"
]

assistant = RAGAssistant("./documents")

print("\n📊 RAG Evaluation Report")
print("=" * 50)
for q in test_questions:
    result = assistant.ask(q)
    scores = evaluate_rag_answer(q, result["answer"], result["context"])
    print(f"\nQ: {q[:60]}")
    print(f"  Faithfulness: {scores['faithfulness']}/5")
    print(f"  Relevance:    {scores['relevance']}/5")
    print(f"  Overall:      {scores['overall']}/5")
    if scores.get("issues"):
        print(f"  Issues:       {scores['issues']}")

14.4 Lab Challenges 🏆

Easy: Add document upload support for new PDFs without rebuilding the whole index
Medium: Build a Streamlit UI with a chat interface and source highlighting
Hard: Implement RAGAS evaluation metrics (faithfulness, answer relevance, context recall)

✅ Lab Complete!

You’ve built a production-quality RAG system! Next: LangGraph for building stateful AI workflows.

# Practice Lab: RAG Implementation {#sec-lab-rag} ::: {.callout-important icon="false"} ## 🧪 Lab Overview **Duration:** 2–3 hours | **Difficulty:** ⭐⭐⭐☆☆ (Intermediate) **Goal:** Build a complete RAG-powered document assistant with source citations and evaluation. ::: ## What You'll Build A **RAG Document Assistant** that: - Answers questions from your documents with citations - Evaluates its own answer quality (faithfulness + relevance) - Shows retrieved context alongside answers - Works over PDF, TXT, and Markdown files --- ## Step 1: Complete RAG Pipeline ```python # file: rag_assistant.py from langchain_openai import ChatOpenAI, OpenAIEmbeddings from langchain_community.document_loaders import PyPDFLoader, TextLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough, RunnableParallel from pathlib import Path import os class RAGAssistant: """Production-ready RAG document assistant.""" RAG_PROMPT = ChatPromptTemplate.from_messages([ ("system", """You are an expert document analyst. Answer questions based ONLY on the provided context. Always cite your sources. If the answer is not in the context, say: "I couldn't find information about this in the provided documents." Context: {context} When citing, use the format: [Source: filename, chunk X] """), ("human", "{question}") ]) def __init__(self, docs_folder: str, model: str = "gpt-4o-mini"): self.llm = ChatOpenAI(model=model, temperature=0) self.embeddings = OpenAIEmbeddings(model="text-embedding-3-small") self.vectorstore = None self._build_index(docs_folder) def _load_documents(self, folder: str): """Load all documents from folder.""" documents = [] for path in Path(folder).rglob("*"): if path.suffix == ".pdf": documents.extend(PyPDFLoader(str(path)).load()) elif path.suffix in [".txt", ".md"]: documents.extend(TextLoader(str(path)).load()) return documents def _build_index(self, docs_folder: str): """Build the vector index.""" print("📚 Loading documents...") docs = self._load_documents(docs_folder) print("✂️ Chunking text...") splitter = RecursiveCharacterTextSplitter( chunk_size=900, chunk_overlap=150 ) chunks = splitter.split_documents(docs) print(f"🔢 Creating embeddings for {len(chunks)} chunks...") self.vectorstore = Chroma.from_documents( chunks, self.embeddings, persist_directory="./rag_index" ) print(f"✅ Index ready! ({len(chunks)} chunks)") def _format_docs_with_sources(self, docs) -> str: """Format retrieved docs with source metadata.""" formatted = [] for i, doc in enumerate(docs): source = doc.metadata.get("source", "Unknown") filename = Path(source).name if source != "Unknown" else "Unknown" formatted.append( f"[Chunk {i+1} | Source: {filename}]\n{doc.page_content}" ) return "\n\n" + "---".join(formatted) def ask(self, question: str, k: int = 4) -> dict: """Answer a question with full context.""" retriever = self.vectorstore.as_retriever( search_type="mmr", search_kwargs={"k": k, "fetch_k": 20} ) retrieved_docs = retriever.invoke(question) context = self._format_docs_with_sources(retrieved_docs) response = self.llm.invoke( self.RAG_PROMPT.format_messages( context=context, question=question ) ) return { "question": question, "answer": response.content, "sources": [Path(d.metadata.get("source","")).name for d in retrieved_docs], "context": context } def interactive(self): """Start interactive Q&A session.""" print("\n🤖 RAG Document Assistant") print("Commands: 'quit' | 'sources' (show retrieved context)\n") show_sources = False while True: question = input("❓ Ask: ").strip() if not question: continue if question.lower() == "quit": break if question.lower() == "sources": show_sources = not show_sources print(f"Source display: {'ON' if show_sources else 'OFF'}") continue result = self.ask(question) print(f"\n💬 Answer:\n{result['answer']}") print(f"\n📎 Sources: {', '.join(set(result['sources']))}") if show_sources: print(f"\n📄 Context:\n{result['context'][:1000]}...") # Usage assistant = RAGAssistant("./documents") assistant.interactive() ``` --- ## Step 2: RAG Evaluation Evaluate your RAG system's quality automatically: ```python # file: rag_eval.py from openai import OpenAI client = OpenAI() def evaluate_rag_answer(question: str, answer: str, context: str) -> dict: """Evaluate RAG answer on faithfulness and relevance.""" eval_prompt = f"""Evaluate this RAG system response. Question: {question} Retrieved Context: {context[:1500]} Generated Answer: {answer} Score each dimension 1-5: 1. Faithfulness: Is the answer supported by the context? 2. Relevance: Does the answer address the question? 3. Completeness: Does it cover all aspects of the question? 4. Conciseness: Is it appropriately concise? Return JSON only: {{"faithfulness": X, "relevance": X, "completeness": X, "conciseness": X, "overall": X, "issues": "list any problems"}}""" response = client.chat.completions.create( model="gpt-4o", messages=[{"role": "user", "content": eval_prompt}], response_format={"type": "json_object"}, temperature=0 ) import json return json.loads(response.choices[0].message.content) # Run evaluation suite test_questions = [ "What is the main topic of the documents?", "What are the key recommendations?", "Are there any limitations mentioned?" ] assistant = RAGAssistant("./documents") print("\n📊 RAG Evaluation Report") print("=" * 50) for q in test_questions: result = assistant.ask(q) scores = evaluate_rag_answer(q, result["answer"], result["context"]) print(f"\nQ: {q[:60]}") print(f" Faithfulness: {scores['faithfulness']}/5") print(f" Relevance: {scores['relevance']}/5") print(f" Overall: {scores['overall']}/5") if scores.get("issues"): print(f" Issues: {scores['issues']}") ``` --- ## Lab Challenges 🏆 1. **Easy**: Add document upload support for new PDFs without rebuilding the whole index 2. **Medium**: Build a Streamlit UI with a chat interface and source highlighting 3. **Hard**: Implement RAGAS evaluation metrics (faithfulness, answer relevance, context recall) --- ::: {.callout-note icon="false"} ## ✅ Lab Complete! You've built a production-quality RAG system! Next: **LangGraph** for building stateful AI workflows. :::