14  Practice Lab: RAG Implementation

Important🧪 Lab Overview

Duration: 2–3 hours | Difficulty: ⭐⭐⭐☆☆ (Intermediate) Goal: Build a complete RAG-powered document assistant with source citations and evaluation.

14.1 What You’ll Build

A RAG Document Assistant that: - Answers questions from your documents with citations - Evaluates its own answer quality (faithfulness + relevance) - Shows retrieved context alongside answers - Works over PDF, TXT, and Markdown files


14.2 Step 1: Complete RAG Pipeline

# file: rag_assistant.py
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from pathlib import Path
import os

class RAGAssistant:
    """Production-ready RAG document assistant."""

    RAG_PROMPT = ChatPromptTemplate.from_messages([
        ("system", """You are an expert document analyst. Answer questions
based ONLY on the provided context. Always cite your sources.

If the answer is not in the context, say: "I couldn't find information
about this in the provided documents."

Context:
{context}

When citing, use the format: [Source: filename, chunk X]
"""),
        ("human", "{question}")
    ])

    def __init__(self, docs_folder: str, model: str = "gpt-4o-mini"):
        self.llm = ChatOpenAI(model=model, temperature=0)
        self.embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
        self.vectorstore = None
        self._build_index(docs_folder)

    def _load_documents(self, folder: str):
        """Load all documents from folder."""
        documents = []
        for path in Path(folder).rglob("*"):
            if path.suffix == ".pdf":
                documents.extend(PyPDFLoader(str(path)).load())
            elif path.suffix in [".txt", ".md"]:
                documents.extend(TextLoader(str(path)).load())
        return documents

    def _build_index(self, docs_folder: str):
        """Build the vector index."""
        print("📚 Loading documents...")
        docs = self._load_documents(docs_folder)

        print("✂️  Chunking text...")
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=900, chunk_overlap=150
        )
        chunks = splitter.split_documents(docs)

        print(f"🔢 Creating embeddings for {len(chunks)} chunks...")
        self.vectorstore = Chroma.from_documents(
            chunks, self.embeddings,
            persist_directory="./rag_index"
        )
        print(f"✅ Index ready! ({len(chunks)} chunks)")

    def _format_docs_with_sources(self, docs) -> str:
        """Format retrieved docs with source metadata."""
        formatted = []
        for i, doc in enumerate(docs):
            source = doc.metadata.get("source", "Unknown")
            filename = Path(source).name if source != "Unknown" else "Unknown"
            formatted.append(
                f"[Chunk {i+1} | Source: {filename}]\n{doc.page_content}"
            )
        return "\n\n" + "---".join(formatted)

    def ask(self, question: str, k: int = 4) -> dict:
        """Answer a question with full context."""
        retriever = self.vectorstore.as_retriever(
            search_type="mmr",
            search_kwargs={"k": k, "fetch_k": 20}
        )

        retrieved_docs = retriever.invoke(question)
        context = self._format_docs_with_sources(retrieved_docs)

        response = self.llm.invoke(
            self.RAG_PROMPT.format_messages(
                context=context,
                question=question
            )
        )

        return {
            "question": question,
            "answer": response.content,
            "sources": [Path(d.metadata.get("source","")).name
                       for d in retrieved_docs],
            "context": context
        }

    def interactive(self):
        """Start interactive Q&A session."""
        print("\n🤖 RAG Document Assistant")
        print("Commands: 'quit' | 'sources' (show retrieved context)\n")

        show_sources = False
        while True:
            question = input("❓ Ask: ").strip()
            if not question:
                continue
            if question.lower() == "quit":
                break
            if question.lower() == "sources":
                show_sources = not show_sources
                print(f"Source display: {'ON' if show_sources else 'OFF'}")
                continue

            result = self.ask(question)

            print(f"\n💬 Answer:\n{result['answer']}")
            print(f"\n📎 Sources: {', '.join(set(result['sources']))}")

            if show_sources:
                print(f"\n📄 Context:\n{result['context'][:1000]}...")

# Usage
assistant = RAGAssistant("./documents")
assistant.interactive()

14.3 Step 2: RAG Evaluation

Evaluate your RAG system’s quality automatically:

# file: rag_eval.py
from openai import OpenAI

client = OpenAI()

def evaluate_rag_answer(question: str, answer: str, context: str) -> dict:
    """Evaluate RAG answer on faithfulness and relevance."""

    eval_prompt = f"""Evaluate this RAG system response.

Question: {question}
Retrieved Context: {context[:1500]}
Generated Answer: {answer}

Score each dimension 1-5:
1. Faithfulness: Is the answer supported by the context?
2. Relevance: Does the answer address the question?
3. Completeness: Does it cover all aspects of the question?
4. Conciseness: Is it appropriately concise?

Return JSON only:
{{"faithfulness": X, "relevance": X, "completeness": X, "conciseness": X,
  "overall": X, "issues": "list any problems"}}"""

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": eval_prompt}],
        response_format={"type": "json_object"},
        temperature=0
    )
    import json
    return json.loads(response.choices[0].message.content)

# Run evaluation suite
test_questions = [
    "What is the main topic of the documents?",
    "What are the key recommendations?",
    "Are there any limitations mentioned?"
]

assistant = RAGAssistant("./documents")

print("\n📊 RAG Evaluation Report")
print("=" * 50)
for q in test_questions:
    result = assistant.ask(q)
    scores = evaluate_rag_answer(q, result["answer"], result["context"])
    print(f"\nQ: {q[:60]}")
    print(f"  Faithfulness: {scores['faithfulness']}/5")
    print(f"  Relevance:    {scores['relevance']}/5")
    print(f"  Overall:      {scores['overall']}/5")
    if scores.get("issues"):
        print(f"  Issues:       {scores['issues']}")

14.4 Lab Challenges 🏆

  1. Easy: Add document upload support for new PDFs without rebuilding the whole index
  2. Medium: Build a Streamlit UI with a chat interface and source highlighting
  3. Hard: Implement RAGAS evaluation metrics (faithfulness, answer relevance, context recall)

Note✅ Lab Complete!

You’ve built a production-quality RAG system! Next: LangGraph for building stateful AI workflows.