Graph Orchestration

DAG-based workflow execution with dynamic scheduling, failure strategies, and visualization

Overview

GraphRunner enables you to orchestrate complex multi-agent workflows as directed acyclic graphs (DAGs). Unlike sequential agent chains or simple parallel execution, graphs provide precise control over dependencies, concurrency, and failure handling.

Key Features: Dynamic indegree scheduling, configurable concurrency limits, three failure strategies (fail_fast, allow_independent, always_run), per-node retry policies, soft-fail mode, cleanup nodes, and built-in visualization utilities.

Basic Usage

Simple Linear Pipeline

from agentic import (
    GraphRunner, GraphConfig, GraphNode,
    GraphNodeStartEvent, GraphNodeCompleteEvent, GraphCompleteEvent
)

# Configure the graph
config = GraphConfig(
    graph_id="data_pipeline",
    max_concurrency=4,
    failure_strategy="fail_fast"
)

graph = GraphRunner(config, context)

# Add nodes with dependencies
graph.add_node(GraphNode("fetch", fetch_agent))
graph.add_node(GraphNode("validate", validate_agent), ["fetch"])
graph.add_node(GraphNode("process", process_agent), ["validate"])
graph.add_node(GraphNode("analyze", analyze_agent), ["process"])

# Execute with streaming events
async for event in graph.run_stream():
    if isinstance(event, GraphNodeStartEvent):
        print(f"Starting {event.node_id}...")
    elif isinstance(event, GraphNodeCompleteEvent):
        print(f"{event.node_id}: {event.status.value}")
    elif isinstance(event, GraphCompleteEvent):
        print(f"\nGraph {event.status}")
        print(f"Stats: {event.stats}")

Batch Execution

# Execute and get final statuses
statuses = graph.run()  # dict[str, GraphNodeStatus]

for node_id, status in statuses.items():
    print(f"{node_id}: {status.value}")

Graph Configuration

GraphConfig Parameters

Parameter Type Default Description
graph_id str required Unique identifier for this graph
max_concurrency int 8 Maximum nodes executing in parallel
failure_strategy str "fail_fast" "fail_fast", "allow_independent", or "always_run"
persist_state bool False Write final graph state summary to context
state_context_key str | None None Custom context key for state (default: "graph:{graph_id}:state")

Node Configuration

GraphNode Parameters

GraphNode(
    id="node_name",                   # Unique node identifier
    executable=agent_or_callable,     # AgentRunner | LogicRunner | Callable
    output_key=None,                    # Context key to store output
    output_selector=None,               # Function to extract output from result
    retry_config=None,                 # Node-level RetryConfig override
    failure_mode="fail",                # "fail" | "soft_fail"
    run_on_failure=False               # Run even if upstream nodes failed
)

Executable Types

AgentRunner Nodes

# Standard agent execution
node = GraphNode("fetch_data", AgentRunner(fetch_agent))

LogicRunner Nodes

# Multi-step logic flow as a node
logic = LogicRunner(agent, context, patterns, logic_config)
node = GraphNode("validation_loop", logic)

Custom Callable Nodes

# Async generator yielding events
async def merge_results(ctx: ContextManager) -> AsyncIterator[BaseEvent]:
    data1 = ctx.get("branch1_output")
    data2 = ctx.get("branch2_output")
    merged = f"{data1}\n{data2}"
    ctx.set("merged_output", merged)
    yield StatusEvent(AgentStatus.OK, "Results merged")

node = GraphNode("merge", merge_results)

# Simple async function (no events)
async def simple_transform(ctx: ContextManager) -> str:
    data = ctx.get("input")
    return data.upper()

node = GraphNode(
    "transform",
    simple_transform,
    output_key="transformed"  # Return value stored here
)

Output Capture

# Capture agent response in context
node = GraphNode(
    "analyzer",
    AgentRunner(analysis_agent),
    output_key="analysis_result",
    output_selector=lambda r: r.segments.response
)

# Later nodes can access the output
async def use_analysis(ctx: ContextManager):
    analysis = ctx.get("analysis_result")
    print(f"Previous analysis: {analysis}")

Failure Strategies

Control how the graph handles node failures with three strategies:

fail_fast (Default)

Stop scheduling new nodes immediately when any node fails. Running nodes complete, but no new nodes start except those with run_on_failure=True.

Use when: Any failure invalidates the entire workflow and you want to stop as soon as possible.

config = GraphConfig(
    graph_id="pipeline",
    failure_strategy="fail_fast"
)

allow_independent

Skip nodes that have failed ancestors, but continue executing independent branches. Nodes are only skipped if they transitively depend on a failed node.

Use when: You have parallel branches and want successful branches to complete even if others fail.

config = GraphConfig(
    graph_id="pipeline",
    failure_strategy="allow_independent"
)

# Example: If branch1 fails, branch2 still runs
graph.add_node(GraphNode("root", root_agent))
graph.add_node(GraphNode("branch1", agent1), ["root"])
graph.add_node(GraphNode("branch2", agent2), ["root"])
graph.add_node(GraphNode("merge1", merge_agent), ["branch1"])  # Skipped if branch1 fails
graph.add_node(GraphNode("merge2", merge_agent), ["branch2"])  # Runs if branch2 succeeds

always_run

Run all nodes regardless of failures. The graph continues executing until all reachable nodes complete.

Use when: You want maximum coverage and need to see results from all nodes, even if some fail.

config = GraphConfig(
    graph_id="analysis",
    failure_strategy="always_run"
)

Advanced Patterns

Diamond DAG (Parallel + Merge)

# Split-process-merge pattern
graph.add_node(GraphNode("fetch", fetch_agent))

# Parallel processing branches
graph.add_node(GraphNode("process_a", agent_a), ["fetch"])
graph.add_node(GraphNode("process_b", agent_b), ["fetch"])

# Merge results (waits for both branches)
graph.add_node(
    GraphNode("merge", merge_agent),
    ["process_a", "process_b"]
)

Cleanup Nodes

# Node that runs even on failure (e.g., cleanup, logging)
cleanup_node = GraphNode(
    "cleanup",
    cleanup_agent,
    run_on_failure=True  # Executes regardless of upstream status
)

graph.add_node(cleanup_node, ["process"])

Soft-Fail Nodes

# Node that doesn't fail the graph on error
optional_node = GraphNode(
    "optional_enrichment",
    enrichment_agent,
    failure_mode="soft_fail"  # Errors logged but treated as success
)

graph.add_node(optional_node, ["fetch"])
graph.add_node(GraphNode("continue", next_agent), ["optional_enrichment"])

Per-Node Retry

from agentic import RetryConfig

# Retry critical nodes automatically
critical_node = GraphNode(
    "api_call",
    api_agent,
    retry_config=RetryConfig(
        max_attempts=3,
        base_delay=2.0,
        backoff="exponential",
        retry_on=(TimeoutError, ConnectionError)
    )
)

graph.add_node(critical_node)

Conditional Branches

# Use custom callables for branching logic
async def route_based_on_input(ctx: ContextManager):
    input_type = ctx.get("input_type")

    if input_type == "text":
        ctx.set("route", "text_branch")
    else:
        ctx.set("route", "data_branch")

    yield StatusEvent(AgentStatus.OK, f"Routed to {input_type}")

graph.add_node(GraphNode("router", route_based_on_input))
graph.add_node(GraphNode("text_processor", text_agent), ["router"])
graph.add_node(GraphNode("data_processor", data_agent), ["router"])

Graph Visualization

Export graph structure for documentation and debugging using built-in visualization utilities.

Mermaid Format

from agentic import to_mermaid

# Basic structure
mermaid = to_mermaid(graph)
print(mermaid)
# Output:
# flowchart TD
#     fetch["fetch"]
#     process["process"]
#     analyze["analyze"]
#     fetch --> process
#     process --> analyze

# With metadata (node types, flags, output keys)
mermaid_detailed = to_mermaid(graph, include_metadata=True)
print(mermaid_detailed)
# Output:
# flowchart TD
#     fetch["fetch | AgentRunner | →fetched_data"]
#     cleanup["cleanup | AgentRunner | cleanup"]
#     fetch --> cleanup

Graphviz DOT Format

from agentic import to_dot

# Export to DOT
dot = to_dot(graph, include_metadata=True)

# Save to file
with open("graph.dot", "w") as f:
    f.write(dot)

# Render with Graphviz:
# dot -Tpng graph.dot -o graph.png
# dot -Tsvg graph.dot -o graph.svg
Visualization Notes: The visualization utilities are read-only, stateless, and have zero effect on graph execution. They're pure functions that only examine graph structure.

State Persistence

# Save graph execution state to context
config = GraphConfig(
    graph_id="pipeline",
    persist_state=True,
    state_context_key="pipeline:final_state"  # Optional custom key
)

graph = GraphRunner(config, context)

# After execution, state is saved as JSON
state = context.get("pipeline:final_state")
# {
#   "graph_id": "pipeline",
#   "status": "success",
#   "stats": {"completed": 4, "failed": 0, "skipped": 0, "pending": 0},
#   "node_statuses": {"fetch": "completed", "process": "completed", ...},
#   "errors": {}
# }

Event Handling

GraphRunner emits 4 graph-specific events plus all underlying agent/tool/pattern events:

async for event in graph.run_stream():
    match event.type:
        case "graph_start":
            print(f"Graph started: {event.graph_id} ({event.total_nodes} nodes)")

        case "graph_node_start":
            print(f"Node {event.node_id} starting (parents: {event.parents})")

        case "graph_node_complete":
            if event.status == GraphNodeStatus.COMPLETED:
                print(f"✓ {event.node_id} completed")
            elif event.status == GraphNodeStatus.FAILED:
                print(f"✗ {event.node_id} failed: {event.error_message}")
            elif event.status == GraphNodeStatus.SKIPPED:
                print(f"○ {event.node_id} skipped")

        case "graph_complete":
            print(f"\nGraph finished: {event.status}")
            print(f"Completed: {event.stats['completed']}")
            print(f"Failed: {event.stats['failed']}")
            print(f"Skipped: {event.stats['skipped']}")

        case "llm_chunk" | "tool_start" | "tool_end":
            # Handle underlying events from nodes
            pass

Concurrency Control

GraphRunner uses dynamic indegree scheduling with configurable concurrency:

# Limit parallel execution
config = GraphConfig(
    graph_id="pipeline",
    max_concurrency=2  # Only 2 nodes run at a time
)

# Even with many ready nodes, max 2 execute in parallel
graph.add_node(GraphNode("root", root_agent))
graph.add_node(GraphNode("a", agent_a), ["root"])
graph.add_node(GraphNode("b", agent_b), ["root"])
graph.add_node(GraphNode("c", agent_c), ["root"])
graph.add_node(GraphNode("d", agent_d), ["root"])

# After 'root' completes, only 2 of (a,b,c,d) run concurrently

Cycle Detection

GraphRunner automatically detects cycles during graph construction:

graph.add_node(GraphNode("a", agent_a))
graph.add_node(GraphNode("b", agent_b), ["a"])
graph.add_node(GraphNode("c", agent_c), ["b"])

try:
    graph.add_node(GraphNode("a", agent_a), ["c"])  # Creates cycle!
except ValueError as e:
    print(e)  # "Adding node 'a' would create a cycle..."
Note: For iterative loops, use LogicRunner as a node rather than creating cycles in the graph.

Best Practices

Performance Considerations

Common Patterns

Fan-Out / Fan-In

# Process multiple items in parallel, then merge
graph.add_node(GraphNode("split", splitter))
for i in range(5):
    graph.add_node(GraphNode(f"process_{i}", processor), ["split"])
graph.add_node(
    GraphNode("merge", merger),
    [f"process_{i}" for i in range(5)]
)

Multi-Stage Pipeline

# Classic data pipeline with validation at each stage
stages = ["extract", "transform", "load"]
for i, stage in enumerate(stages):
    graph.add_node(GraphNode(stage, agents[stage]))
    if i > 0:
        graph.add_node(GraphNode(stage), [stages[i-1]])

Try-Multiple / First-Success

# Try multiple approaches, use first that succeeds
graph.add_node(GraphNode("approach_a", agent_a, failure_mode="soft_fail"))
graph.add_node(GraphNode("approach_b", agent_b, failure_mode="soft_fail"))
graph.add_node(GraphNode("approach_c", agent_c, failure_mode="soft_fail"))

async def pick_best(ctx: ContextManager):
    for key in ["approach_a", "approach_b", "approach_c"]:
        result = ctx.get(key)
        if result:
            ctx.set("final_result", result)
            break

graph.add_node(
    GraphNode("selector", pick_best),
    ["approach_a", "approach_b", "approach_c"]
)

Next Steps