Cookbook
Agentic Workflows
Integrate Docling into AI agent workflows with low-latency processing
Agentic Workflows
This page is not accurate! Each item needs to be validated.
Learn how to integrate Docling for IBM watsonx into AI agent workflows for real-time document processing and decision making.
Overview
AI agents need to process documents quickly to make informed decisions. Docling's low-latency mode enables agents to convert documents on-demand during their reasoning process, providing structured content for analysis and action.
What You'll Build
- Low-latency document conversion for agents
- Agent tool integration
- Real-time document analysis
- Multi-step agentic workflows
Prerequisites
- Docling for IBM watsonx account with Service URL and API Key
- Python 3.8+
- LangChain or similar agent framework
- OpenAI API key (or other LLM provider)
Architecture
Low-Latency Mode
Enable low-latency mode for faster agent responses:
from docling.service_client import DoclingServiceClient
import os
SERVICE_URL = os.getenv("DOCLING_SERVICE_URL")
API_KEY = os.getenv("DOCLING_API_KEY")
with DoclingServiceClient(url=SERVICE_URL, api_key=API_KEY) as client:
result = client.convert(
source="https://example.com/document.pdf",
options={
"low_latency": True,
"output_format": "markdown"
}
)
content = result.document.export_to_markdown()LangChain Agent Integration
Step 1: Create Docling Tool
from langchain.tools import Tool
from docling.service_client import DoclingServiceClient
import os
SERVICE_URL = os.getenv("DOCLING_SERVICE_URL")
API_KEY = os.getenv("DOCLING_API_KEY")
def convert_document(url: str) -> str:
"""Convert a document from URL to markdown."""
with DoclingServiceClient(url=SERVICE_URL, api_key=API_KEY) as client:
result = client.convert(
source=url,
options={"low_latency": True}
)
return result.document.export_to_markdown()
docling_tool = Tool(
name="DocumentConverter",
func=convert_document,
description="""
Useful for converting documents (PDFs, images, Office files) to text.
Input should be a URL to the document.
Returns the document content in markdown format.
"""
)Step 2: Create Agent
from langchain.agents import initialize_agent, AgentType
from langchain.chat_models import ChatOpenAI
from langchain.tools import Tool
# Initialize LLM
llm = ChatOpenAI(model="gpt-4", temperature=0)
# Create tools list
tools = [
docling_tool,
# Add other tools as needed
]
# Initialize agent
agent = initialize_agent(
tools=tools,
llm=llm,
agent=AgentType.OPENAI_FUNCTIONS,
verbose=True
)Step 3: Run Agent
# Agent can now convert documents as needed
response = agent.run(
"Analyze this financial report and summarize the key metrics: "
"https://example.com/q4-report.pdf"
)
print(response)Complete Agent Example
import os
from langchain.agents import initialize_agent, AgentType, Tool
from langchain.chat_models import ChatOpenAI
from docling.service_client import DoclingServiceClient
# Configuration
SERVICE_URL = os.getenv("DOCLING_SERVICE_URL")
API_KEY = os.getenv("DOCLING_API_KEY")
# Document conversion tool
def convert_document(url: str) -> str:
"""Convert document to markdown."""
with DoclingServiceClient(url=SERVICE_URL, api_key=API_KEY) as client:
result = client.convert(
source=url,
options={"low_latency": True}
)
return result.document.export_to_markdown()
# Extract tables tool
def extract_tables(url: str) -> str:
"""Extract tables from document."""
with DoclingServiceClient(url=SERVICE_URL, api_key=API_KEY) as client:
result = client.convert(
source=url,
options={
"low_latency": True,
"output_format": "json"
}
)
tables = []
for item in result.document.items:
if item.type == "table":
tables.append(item.content)
return "\n\n".join(tables) if tables else "No tables found"
# Create tools
tools = [
Tool(
name="ConvertDocument",
func=convert_document,
description="Convert a document from URL to markdown text"
),
Tool(
name="ExtractTables",
func=extract_tables,
description="Extract all tables from a document"
)
]
# Initialize agent
llm = ChatOpenAI(model="gpt-4", temperature=0)
agent = initialize_agent(
tools=tools,
llm=llm,
agent=AgentType.OPENAI_FUNCTIONS,
verbose=True
)
# Use the agent
if __name__ == "__main__":
result = agent.run(
"Compare the financial tables in these two reports: "
"https://example.com/q3-report.pdf and "
"https://example.com/q4-report.pdf"
)
print(result)Multi-Step Workflows
Research Agent
from langchain.agents import Tool
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
# Research workflow
def research_workflow(topic: str, document_urls: list) -> str:
"""Multi-step research workflow."""
# Step 1: Convert all documents
documents = []
with DoclingServiceClient(url=SERVICE_URL, api_key=API_KEY) as client:
for url in document_urls:
result = client.convert(
source=url,
options={"low_latency": True}
)
documents.append({
'url': url,
'content': result.document.export_to_markdown()
})
# Step 2: Analyze each document
analyses = []
for doc in documents:
prompt = PromptTemplate(
input_variables=["topic", "content"],
template="Analyze this document for information about {topic}:\n\n{content}"
)
chain = LLMChain(llm=llm, prompt=prompt)
analysis = chain.run(topic=topic, content=doc['content'])
analyses.append(analysis)
# Step 3: Synthesize findings
synthesis_prompt = PromptTemplate(
input_variables=["topic", "analyses"],
template="Synthesize these analyses about {topic}:\n\n{analyses}"
)
synthesis_chain = LLMChain(llm=llm, prompt=synthesis_prompt)
final_report = synthesis_chain.run(
topic=topic,
analyses="\n\n---\n\n".join(analyses)
)
return final_reportDocument Q&A Agent
def create_qa_agent():
"""Create agent for document Q&A."""
def answer_from_document(question_and_url: str) -> str:
"""Answer question based on document content."""
# Parse input
parts = question_and_url.split("|")
question = parts[0].strip()
url = parts[1].strip()
# Convert document
with DoclingServiceClient(url=SERVICE_URL, api_key=API_KEY) as client:
result = client.convert(
source=url,
options={"low_latency": True}
)
content = result.document.export_to_markdown()
# Answer question
prompt = f"Based on this document, answer: {question}\n\nDocument:\n{content}"
response = llm.predict(prompt)
return response
qa_tool = Tool(
name="DocumentQA",
func=answer_from_document,
description="Answer questions about a document. Input format: 'question | document_url'"
)
return initialize_agent(
tools=[qa_tool],
llm=llm,
agent=AgentType.OPENAI_FUNCTIONS
)Performance Optimization
Caching
Cache converted documents to avoid redundant conversions:
from functools import lru_cache
@lru_cache(maxsize=100)
def convert_document_cached(url: str) -> str:
"""Convert document with caching."""
with DoclingServiceClient(url=SERVICE_URL, api_key=API_KEY) as client:
result = client.convert(
source=url,
options={"low_latency": True}
)
return result.document.export_to_markdown()Parallel Processing
Process multiple documents in parallel:
from concurrent.futures import ThreadPoolExecutor
def convert_documents_parallel(urls: list) -> list:
"""Convert multiple documents in parallel."""
with ThreadPoolExecutor(max_workers=5) as executor:
with DoclingServiceClient(url=SERVICE_URL, api_key=API_KEY) as client:
futures = [
executor.submit(
client.convert,
source=url,
options={"low_latency": True}
)
for url in urls
]
results = [f.result() for f in futures]
return [r.document.export_to_markdown() for r in results]Error Handling
def safe_convert_document(url: str) -> str:
"""Convert document with error handling."""
try:
with DoclingServiceClient(url=SERVICE_URL, api_key=API_KEY) as client:
result = client.convert(
source=url,
options={"low_latency": True}
)
return result.document.export_to_markdown()
except Exception as e:
return f"Error converting document: {str(e)}"Best Practices
Agent Design
- Use low-latency mode - Enable for faster agent responses
- Implement caching - Avoid redundant conversions
- Handle errors gracefully - Provide fallback responses
- Limit document size - Set reasonable size limits
Tool Design
- Clear descriptions - Help agent understand when to use tools
- Structured inputs - Define clear input formats
- Informative outputs - Return actionable information
- Error messages - Provide helpful error context
Performance
- Parallel processing - Convert multiple documents simultaneously
- Selective conversion - Only convert when necessary
- Chunk large documents - Process in manageable pieces
- Monitor usage - Track API calls and optimize
Use Cases
Contract Analysis Agent
# Agent that analyzes contracts and extracts key terms
agent.run(
"Review this contract and identify any unusual clauses: "
"https://example.com/contract.pdf"
)Research Assistant
# Agent that researches topics across multiple papers
agent.run(
"What are the latest developments in quantum computing? "
"Check these papers: https://arxiv.org/pdf/paper1.pdf, "
"https://arxiv.org/pdf/paper2.pdf"
)Financial Analyst
# Agent that analyzes financial reports
agent.run(
"Compare the revenue growth between Q3 and Q4: "
"https://example.com/q3-report.pdf, "
"https://example.com/q4-report.pdf"
)Next Steps
- Explore MCP Integration for Claude Desktop
- Learn about RAG Applications for knowledge bases
- Check Pipeline Integration for automation