8 Practice Lab: LangChain
8.1 What You’ll Build
A Document Q&A Assistant that: - Loads and processes PDF/text documents - Answers questions about the document content - Remembers conversation context - Streams responses in real time
8.2 Step 1: Install LangChain
pip install langchain langchain-openai langchain-community \
python-dotenv pypdf rich8.3 Step 2: Basic LCEL Pipeline
# file: langchain_basics.py
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
# Build a simple summarisation chain
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.3)
summarise_prompt = ChatPromptTemplate.from_template("""
Summarise the following text in {style} style.
Keep it to {max_sentences} sentences.
Text: {text}
Summary:
""")
chain = summarise_prompt | llm | StrOutputParser()
# Test it
result = chain.invoke({
"text": """Artificial intelligence is transforming how businesses operate.
From automating routine tasks to enabling sophisticated data analysis,
AI tools are becoming essential for competitive advantage.""",
"style": "professional executive",
"max_sentences": "2"
})
print(result)8.4 Step 3: Document Q&A System
# file: document_qa.py
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
def load_documents(file_path: str):
"""Load documents from file."""
if file_path.endswith(".pdf"):
loader = PyPDFLoader(file_path)
else:
loader = TextLoader(file_path)
return loader.load()
def build_qa_chain(documents):
"""Build a Q&A chain from documents."""
# Step 1: Split into chunks
splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
chunks = splitter.split_documents(documents)
# Step 2: Create embeddings and vector store
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vector_store = Chroma.from_documents(chunks, embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 4})
# Step 3: Define the QA prompt
qa_prompt = ChatPromptTemplate.from_messages([
("system", """You are an expert analyst. Answer questions using only
the provided context. If the answer isn't in the context, say so.
Context:
{context}
"""),
("human", "{question}")
])
# Step 4: Build the chain
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| qa_prompt
| llm
| StrOutputParser()
)
return chain
# Usage
docs = load_documents("your_document.txt")
qa_chain = build_qa_chain(docs)
questions = [
"What is the main topic of this document?",
"What are the key recommendations?",
"What evidence supports the main argument?"
]
for q in questions:
print(f"\n❓ {q}")
print(f"💬 {qa_chain.invoke(q)}")8.5 Step 4: Add Streaming
# Streaming responses
import sys
print("💬 ", end="", flush=True)
for chunk in qa_chain.stream("What are the key findings?"):
print(chunk, end="", flush=True)
sys.stdout.flush()
print() # newline at end8.6 Lab Challenges 🏆
- Easy: Add a document word count summary before starting Q&A
- Medium: Support multiple documents loaded from a folder
- Hard: Add conversation memory so follow-up questions work naturally