14 Practice Lab: RAG Implementation
14.1 What You’ll Build
A RAG Document Assistant that: - Answers questions from your documents with citations - Evaluates its own answer quality (faithfulness + relevance) - Shows retrieved context alongside answers - Works over PDF, TXT, and Markdown files
14.2 Step 1: Complete RAG Pipeline
# file: rag_assistant.py
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from pathlib import Path
import os
class RAGAssistant:
"""Production-ready RAG document assistant."""
RAG_PROMPT = ChatPromptTemplate.from_messages([
("system", """You are an expert document analyst. Answer questions
based ONLY on the provided context. Always cite your sources.
If the answer is not in the context, say: "I couldn't find information
about this in the provided documents."
Context:
{context}
When citing, use the format: [Source: filename, chunk X]
"""),
("human", "{question}")
])
def __init__(self, docs_folder: str, model: str = "gpt-4o-mini"):
self.llm = ChatOpenAI(model=model, temperature=0)
self.embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
self.vectorstore = None
self._build_index(docs_folder)
def _load_documents(self, folder: str):
"""Load all documents from folder."""
documents = []
for path in Path(folder).rglob("*"):
if path.suffix == ".pdf":
documents.extend(PyPDFLoader(str(path)).load())
elif path.suffix in [".txt", ".md"]:
documents.extend(TextLoader(str(path)).load())
return documents
def _build_index(self, docs_folder: str):
"""Build the vector index."""
print("📚 Loading documents...")
docs = self._load_documents(docs_folder)
print("✂️ Chunking text...")
splitter = RecursiveCharacterTextSplitter(
chunk_size=900, chunk_overlap=150
)
chunks = splitter.split_documents(docs)
print(f"🔢 Creating embeddings for {len(chunks)} chunks...")
self.vectorstore = Chroma.from_documents(
chunks, self.embeddings,
persist_directory="./rag_index"
)
print(f"✅ Index ready! ({len(chunks)} chunks)")
def _format_docs_with_sources(self, docs) -> str:
"""Format retrieved docs with source metadata."""
formatted = []
for i, doc in enumerate(docs):
source = doc.metadata.get("source", "Unknown")
filename = Path(source).name if source != "Unknown" else "Unknown"
formatted.append(
f"[Chunk {i+1} | Source: {filename}]\n{doc.page_content}"
)
return "\n\n" + "---".join(formatted)
def ask(self, question: str, k: int = 4) -> dict:
"""Answer a question with full context."""
retriever = self.vectorstore.as_retriever(
search_type="mmr",
search_kwargs={"k": k, "fetch_k": 20}
)
retrieved_docs = retriever.invoke(question)
context = self._format_docs_with_sources(retrieved_docs)
response = self.llm.invoke(
self.RAG_PROMPT.format_messages(
context=context,
question=question
)
)
return {
"question": question,
"answer": response.content,
"sources": [Path(d.metadata.get("source","")).name
for d in retrieved_docs],
"context": context
}
def interactive(self):
"""Start interactive Q&A session."""
print("\n🤖 RAG Document Assistant")
print("Commands: 'quit' | 'sources' (show retrieved context)\n")
show_sources = False
while True:
question = input("❓ Ask: ").strip()
if not question:
continue
if question.lower() == "quit":
break
if question.lower() == "sources":
show_sources = not show_sources
print(f"Source display: {'ON' if show_sources else 'OFF'}")
continue
result = self.ask(question)
print(f"\n💬 Answer:\n{result['answer']}")
print(f"\n📎 Sources: {', '.join(set(result['sources']))}")
if show_sources:
print(f"\n📄 Context:\n{result['context'][:1000]}...")
# Usage
assistant = RAGAssistant("./documents")
assistant.interactive()14.3 Step 2: RAG Evaluation
Evaluate your RAG system’s quality automatically:
# file: rag_eval.py
from openai import OpenAI
client = OpenAI()
def evaluate_rag_answer(question: str, answer: str, context: str) -> dict:
"""Evaluate RAG answer on faithfulness and relevance."""
eval_prompt = f"""Evaluate this RAG system response.
Question: {question}
Retrieved Context: {context[:1500]}
Generated Answer: {answer}
Score each dimension 1-5:
1. Faithfulness: Is the answer supported by the context?
2. Relevance: Does the answer address the question?
3. Completeness: Does it cover all aspects of the question?
4. Conciseness: Is it appropriately concise?
Return JSON only:
{{"faithfulness": X, "relevance": X, "completeness": X, "conciseness": X,
"overall": X, "issues": "list any problems"}}"""
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": eval_prompt}],
response_format={"type": "json_object"},
temperature=0
)
import json
return json.loads(response.choices[0].message.content)
# Run evaluation suite
test_questions = [
"What is the main topic of the documents?",
"What are the key recommendations?",
"Are there any limitations mentioned?"
]
assistant = RAGAssistant("./documents")
print("\n📊 RAG Evaluation Report")
print("=" * 50)
for q in test_questions:
result = assistant.ask(q)
scores = evaluate_rag_answer(q, result["answer"], result["context"])
print(f"\nQ: {q[:60]}")
print(f" Faithfulness: {scores['faithfulness']}/5")
print(f" Relevance: {scores['relevance']}/5")
print(f" Overall: {scores['overall']}/5")
if scores.get("issues"):
print(f" Issues: {scores['issues']}")14.4 Lab Challenges 🏆
- Easy: Add document upload support for new PDFs without rebuilding the whole index
- Medium: Build a Streamlit UI with a chat interface and source highlighting
- Hard: Implement RAGAS evaluation metrics (faithfulness, answer relevance, context recall)