diff --git a/README.md b/README.md
index da5cf32..4487e42 100644
--- a/README.md
+++ b/README.md
@@ -1,93 +1,125 @@
-# Workflow - a library organizes steps with dependencies into DAG (Directed-Acyclic-Graph) for Go
+# go-workflow
+
 [![Go Report Card](https://goreportcard.com/badge/github.com/Azure/go-workflow)](https://goreportcard.com/report/github.com/Azure/go-workflow)
 [![Go Test Status](https://github.com/Azure/go-workflow/actions/workflows/go.yml/badge.svg)](https://github.com/Azure/go-workflow/actions/workflows/go.yml)
 [![Go Test Coverage](https://raw.githubusercontent.com/Azure/go-workflow/badges/.badges/main/coverage.svg)](/.github/.testcoverage.yml)
 
-## Overview
+> Describe steps and the dependencies between them. We run them as a DAG — concurrently,
+> with retry, timeout, conditions and interceptors — and block until everything is done.
 
-> Strongly encourage everyone to read examples in the [example](./example) directory to have a quick understanding of how to use this library.
+```go
+// Two steps that pass data through a typed dependency.
+type Fetch struct{ URL, Body string }
+type Save struct{ Body, Path string }
 
-`go-workflow` helps Go developers organize steps with dependencies into a Directed-Acyclic-Graph (DAG).
-- It provides a simple and flexible way to define and execute a workflow.
-- It is easy to implement steps and compose them into a composite step.
-- It uses **goroutine** to execute steps concurrently.
-- It supports **retry**, **timeout**, and other configurations for each step.
-- It supports **callbacks** to hook before / after each step.
+func (f *Fetch) Do(ctx context.Context) error { f.Body = httpGet(ctx, f.URL); return nil }
+func (s *Save) Do(ctx context.Context) error  { return os.WriteFile(s.Path, []byte(s.Body), 0o644) }
 
-See it in action:
+func main() {
+	fetch := &Fetch{URL: "https://example.com"}
+	save := &Save{Path: "page.html"}
+
+	w := new(flow.Workflow)
+	w.Add(
+		// Retry the fetch up to 3 times, capped at 30s total.
+		flow.Step(fetch).
+			Retry(func(o *flow.RetryOption) { o.Attempts = 3 }).
+			Timeout(30*time.Second),
+
+		// save runs only after fetch succeeds, and reads its output as its input.
+		flow.Step(save).DependsOn(fetch).
+			Input(func(_ context.Context, s *Save) error {
+				s.Body = fetch.Body
+				return nil
+			}),
+	)
+
+	if err := w.Do(context.Background()); err != nil {
+		log.Fatal(err) // *flow.ErrWorkflow — one entry per failing step.
+	}
+}
+```
 
-```go
-package yours
+## Why
 
-import (
-    "context"
+- **Tiny interface.** A step is anything with `Do(context.Context) error`. No codegen, no DSL.
+- **Dependencies as code.** `Step(x).DependsOn(y)`, `Pipe(...)`, `BatchPipe(...)`, `If/Switch`.
+- **Concurrent by default.** Each ready step runs in its own goroutine; cap with `MaxConcurrency`.
+- **Per-step controls.** Retry with backoff, timeout, conditions, typed `Input`/`Output`, before/after hooks.
+- **Composable.** A `Workflow` is itself a `Step`, so workflows nest — interceptors and options
+  flow into children automatically.
+- **No surprises.** `Workflow.Do` blocks until every goroutine has exited and every step is terminal.
 
-    flow "github.com/Azure/go-workflow"
-)
+## Install
 
-type Step struct{ Value string }
+```bash
+go get github.com/Azure/go-workflow
+```
 
-// All required for a step is `Do(context.Context) error`
-func (s *Step) Do(ctx context.Context) error {
-    fmt.Println(s.Value)
-    return nil
-}
+Requires Go 1.23+.
+
+## How a step ends up
 
-func main() {
-    // declare steps
-    var (
-        a = new(Step)
-        b = &Step{Value: "B"}
-        c = flow.Func("declare from anonymous function", func(ctx context.Context) error {
-            fmt.Println("C")
-            return nil
-        })
-    )
-    // compose steps into a workflow!
-    w := new(flow.Workflow)
-    w.Add(
-        flow.Step(b).DependsOn(a),     // use DependsOn to define dependencies
-        flow.Steps(a, b).DependsOn(c), // final execution order: c -> a -> b
-
-        // other configurations, like retry, timeout, condition, etc.
-        flow.Step(c).
-            Retry(func(ro *flow.RetryOption) {
-                ro.Attempts = 3 // retry 3 times
-            }).
-            Timeout(10*time.Minute), // timeout after 10 minutes
-
-        // use Input to change step at runtime
-        flow.Step(a).Input(func(ctx context.Context, a *Step) error {
-            a.Value = "A"
-            return nil
-        }),
-    )
-    // execute the workflow and block until all steps are terminated
-    err := w.Do(context.Background())
-}
+```
+Pending → Running → Succeeded | Failed | Canceled | Skipped
 ```
 
-## Document from AI
-You can also check the document from deepwiki: https://deepwiki.com/Azure/go-workflow
+`Skipped` and `Canceled` are settled inline by the scheduler when a step's `Condition` decides
+it shouldn't run — no goroutine, no concurrency lease, no interceptor chain. A failing step does
+**not** abort siblings; only downstream steps see it (and become `Skipped` under the default
+`AllSucceeded` condition).
 
-## Contributing
+`Workflow.Do` returns `nil` on success, or an `ErrWorkflow` (`map[Steper]StepResult`) you can
+range over. `ErrCycleDependency` is returned from preflight if your graph isn't a DAG.
+
+## Wiring the graph
+
+| Helper                                 | Means                                                                          |
+|----------------------------------------|--------------------------------------------------------------------------------|
+| `flow.Step(s)`                         | Add one typed step (enables typed `Input`/`Output`).                           |
+| `flow.Steps(s1, s2, …)`                | Add several independent steps (run in parallel).                               |
+| `flow.Pipe(a, b, c)`                   | Linear pipeline `a → b → c`.                                                   |
+| `flow.BatchPipe(Steps(a,b), Steps(c))` | Every step in batch _i_ depends on every step in batch _i-1_.                  |
+| `flow.If(...)`, `flow.Switch(...)`     | Conditional branches based on the result of a target step.                     |
+
+Common chainables on the result: `DependsOn`, `When(cond)`, `Retry(...)`, `Timeout(d)`,
+`Input(fn)`, `Output(fn)`, `BeforeStep(fn)`, `AfterStep(fn)`. `Add(...)` is repeatable —
+calling it again merges new config into existing steps.
+
+## Workflow knobs
 
-This project welcomes contributions and suggestions.  Most contributions require you to agree to a
-Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
-the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
+Set fields on `flow.Workflow` before `Do`:
+
+| Field                  | Effect                                                                       |
+|------------------------|------------------------------------------------------------------------------|
+| `MaxConcurrency`       | Max running steps at once. `0` = unlimited.                                  |
+| `DontPanic`            | Recover panics into `ErrPanic` instead of crashing.                          |
+| `SkipAsError`          | Treat `Skipped` as workflow failure (default: skipped is OK).                |
+| `DefaultOption`        | Base `*StepOption` applied (then overridable) to every step.                 |
+| `StepInterceptors`     | Wrap full step lifetime (across retries).                                    |
+| `AttemptInterceptors`  | Wrap each individual attempt (`Before → Do → After`).                        |
+| `IsolateInterceptors`  | When nested as a child step, don't inherit parent interceptors.              |
+| `Clock`                | Inject a clock for deterministic tests.                                      |
+
+## Learn more
+
+- **[`example/`](./example)** — runnable, narrated examples for every feature, in increasing
+  order of complexity (`01_step_do_test.go` → `14_mock_step_test.go`). Best place to start.
+- **[`openspec/specs/`](./openspec/specs)** — formal specs for execution model, branching,
+  conditions, retry/timeout, composite steps, interceptors and workflow options.
+- **DeepWiki:** <https://deepwiki.com/Azure/go-workflow>
+
+## Contributing
 
-When you submit a pull request, a CLA bot will automatically determine whether you need to provide
-a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
-provided by the bot. You will only need to do this once across all repos using our CLA.
+This project welcomes contributions. Most contributions require you to agree to a Contributor
+License Agreement — see <https://cla.opensource.microsoft.com>. The CLA bot will guide you on
+your first PR.
 
-This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
-For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
-contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
+This project follows the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
+Questions? <opencode@microsoft.com>.
 
 ## Trademarks
 
-This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft
-trademarks or logos is subject to and must follow
-[Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
-Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
-Any use of third-party trademarks or logos are subject to those third-party's policies.
+This project may contain trademarks for Microsoft projects, products, or services. Authorized
+use must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
+Third-party trademarks are subject to their own policies.
diff --git a/branch.go b/branch.go
index c8db743..cbc7940 100644
--- a/branch.go
+++ b/branch.go
@@ -4,49 +4,71 @@ import (
 	"context"
 )
 
-// BranchCheckFunc checks the target and returns true if the branch should be selected.
+// BranchCheckFunc inspects target after it has run and decides whether the
+// branch this check guards should be selected. Returning a non-nil error
+// fails the selected branch's step (delivered as the BeforeStep error).
 type BranchCheckFunc[T Steper] func(context.Context, T) (bool, error)
 
-// If adds a conditional branch to the workflow.
+// If wires a target step plus a Then/Else branch into the workflow:
 //
-//	If(someStep, func(ctx context.Context, someStep *SomeStep) (bool, error) {
-//		// branch condition here, true -> Then, false -> Else.
-//		// if error is returned, then fail the selected branch step.
+//	If(target, func(ctx context.Context, target *Target) (bool, error) {
+//	    // true  -> run the Then branch.
+//	    // false -> run the Else branch.
+//	    // err   -> the selected branch step fails with this error.
 //	}).
-//	Then(thenStep).
-//	Else(elseStep)
+//	    Then(thenStep).
+//	    Else(elseStep)
+//
+// All Then/Else steps depend on target and on the check's outcome — they
+// won't be considered until target has terminated.
 func If[T Steper](target T, check BranchCheckFunc[T]) *IfBranch[T] {
 	return &IfBranch[T]{Target: target, BranchCheck: BranchCheck[T]{Check: check}}
 }
 
-// IfBranch adds target step, then and else step to workflow,
-// and check the target step and determine which branch to go.
+// IfBranch is the configurable If(...) builder. It registers:
+//
+//   - the Target step (with an AfterStep that runs the BranchCheck after
+//     Target.Do has completed),
+//   - all Then steps, gated by a Condition that fires only when
+//     BranchCheck.OK == true,
+//   - all Else steps, gated by the inverse Condition,
+//   - a shared BeforeStep on every branch step that surfaces a non-nil
+//     BranchCheck.Error as the step's failure.
 type IfBranch[T Steper] struct {
-	Target      T // the target to check
+	Target      T // the step whose result the branch check inspects.
 	BranchCheck BranchCheck[T]
 	ThenStep    []Steper
 	ElseStep    []Steper
-	Cond        Condition // Cond is the When condition for both ThenStep and ElseStep, not target Step!
+	// Cond is layered IN ADDITION to the branch check: it applies to BOTH
+	// ThenStep and ElseStep — NOT to Target. Defaults to DefaultCondition.
+	Cond Condition
 }
 
-// Then adds steps to the Then branch.
+// Then appends step(s) to the Then branch.
 func (i *IfBranch[T]) Then(th ...Steper) *IfBranch[T] {
 	i.ThenStep = append(i.ThenStep, th...)
 	return i
 }
 
-// Else adds steps to the Else branch.
+// Else appends step(s) to the Else branch.
 func (i *IfBranch[T]) Else(el ...Steper) *IfBranch[T] {
 	i.ElseStep = append(i.ElseStep, el...)
 	return i
 }
 
-// When adds a condition to both Then and Else steps, not the Target!
-// Default to DefaultCondition.
+// When sets the upstream-evaluation Condition applied to ALL Then and Else
+// steps. It is composed with the branch check (both must allow the step to
+// run). Default is DefaultCondition.
+//
+// NOTE: this does NOT affect the Target step.
 func (i *IfBranch[T]) When(cond Condition) *IfBranch[T] {
 	i.Cond = cond
 	return i
 }
+
+// isThen returns the Condition for either the Then branch (isThen=true) or
+// the Else branch (isThen=false): first defer to i.Cond on the upstreams,
+// then accept only if the BranchCheck's OK matches the requested side.
 func (i *IfBranch[T]) isThen(isThen bool) Condition {
 	return func(ctx context.Context, ups map[Steper]StepResult) StepStatus {
 		if status := ConditionOrDefault(i.Cond)(ctx, ups); status != Running {
@@ -58,6 +80,12 @@ func (i *IfBranch[T]) isThen(isThen bool) Condition {
 		return Skipped
 	}
 }
+
+// AddToWorkflow lays the If(...) construct into a {step→config} map. It
+// chains: Target (with AfterStep running the branch check), Then branch
+// (gated by isThen(true)), Else branch (gated by isThen(false)), and a
+// shared BeforeStep on every branch step that propagates BranchCheck.Error
+// as the step's failure.
 func (i *IfBranch[T]) AddToWorkflow() map[Steper]*StepConfig {
 	return Steps().Merge(
 		Steps(i.Target).AfterStep(func(ctx context.Context, s Steper, err error) error {
@@ -79,46 +107,56 @@ func (i *IfBranch[T]) AddToWorkflow() map[Steper]*StepConfig {
 	).AddToWorkflow()
 }
 
-// Switch adds a switch branch to the workflow.
+// Switch wires a target step plus a Case/Default selection into the workflow:
 //
-//	Switch(someStep).
-//		Case(case1, func(ctx context.Context, someStep *SomeStep) (bool, error) {
-//			// branch condition here, true to select this branch
-//			// error will fail the case
-//		}).
-//		Default(defaultStep), // the step to run if all case checks return false
-//	)
+//	Switch(target).
+//	    Case(case1, func(ctx context.Context, t *Target) (bool, error) {
+//	        // true -> run case1.
+//	        // err  -> case1 fails with this error.
+//	    }).
+//	    Case(case2, func(ctx context.Context, t *Target) (bool, error) { ... }).
+//	    Default(defaultStep) // runs only if every Case check returned false.
 func Switch[T Steper](target T) *SwitchBranch[T] {
 	return &SwitchBranch[T]{Target: target, CasesToCheck: make(map[Steper]*BranchCheck[T])}
 }
 
-// SwitchBranch adds target step, cases and default step to workflow,
-// and check the target step and determine which branch to go.
+// SwitchBranch is the configurable Switch(...) builder. It registers:
+//
+//   - the Target step,
+//   - one or more Case steps (each gated by its own BranchCheck),
+//   - an optional Default step that runs iff none of the Case checks selected
+//     their case (and depends on every Case so the decision is observable).
 type SwitchBranch[T Steper] struct {
 	Target       T
 	CasesToCheck map[Steper]*BranchCheck[T]
 	DefaultStep  []Steper
-	Cond         Condition
+	// Cond is the upstream-evaluation Condition for ALL case steps and the
+	// default step — NOT the Target. Defaults to DefaultCondition.
+	Cond Condition
 }
 
-// BranchCheck represents a branch to be checked.
+// BranchCheck is the per-branch state recorded by If/Switch: the check
+// function plus its most recent result (OK / Error). The result is set when
+// the framework runs Do() during the Target's AfterStep (for If) or when the
+// case condition is evaluated (for Switch).
 type BranchCheck[T Steper] struct {
 	Check BranchCheckFunc[T]
 	OK    bool
 	Error error
 }
 
+// Do invokes the check function and records its result on the BranchCheck.
 func (bc *BranchCheck[T]) Do(ctx context.Context, target T) {
 	bc.OK, bc.Error = bc.Check(ctx, target)
 }
 
-// Case adds a case to the switch branch.
+// Case registers a single case step with its branch check.
 func (s *SwitchBranch[T]) Case(step Steper, check BranchCheckFunc[T]) *SwitchBranch[T] {
 	return s.Cases([]Steper{step}, check)
 }
 
-// Cases adds multiple cases to the switch branch.
-// The check function will be executed for each case step.
+// Cases registers multiple steps that share the same branch check function.
+// (The same check is recorded once per step — each gets its own result.)
 func (s *SwitchBranch[T]) Cases(steps []Steper, check BranchCheckFunc[T]) *SwitchBranch[T] {
 	for _, step := range steps {
 		s.CasesToCheck[step] = &BranchCheck[T]{Check: check}
@@ -126,17 +164,22 @@ func (s *SwitchBranch[T]) Cases(steps []Steper, check BranchCheckFunc[T]) *Switc
 	return s
 }
 
-// Default adds default step(s) to the switch branch.
+// Default appends fallback step(s) that run when every Case check returns false.
 func (s *SwitchBranch[T]) Default(step ...Steper) *SwitchBranch[T] {
 	s.DefaultStep = append(s.DefaultStep, step...)
 	return s
 }
 
-// When adds a condition to all case steps and default, not the Target!
+// When sets the upstream-evaluation Condition applied to all Case steps and
+// the Default step. NOT applied to the Target.
 func (s *SwitchBranch[T]) When(cond Condition) *SwitchBranch[T] {
 	s.Cond = cond
 	return s
 }
+
+// isCase builds the Condition for a specific case step. It first defers to
+// s.Cond on the upstreams, then runs that case's branch check and returns
+// Running iff the check accepted.
 func (s *SwitchBranch[T]) isCase(c Steper) func(ctx context.Context, ups map[Steper]StepResult) StepStatus {
 	return func(ctx context.Context, ups map[Steper]StepResult) StepStatus {
 		if status := ConditionOrDefault(s.Cond)(ctx, ups); status != Running {
@@ -151,13 +194,18 @@ func (s *SwitchBranch[T]) isCase(c Steper) func(ctx context.Context, ups map[Ste
 		return Skipped
 	}
 }
+
+// isDefault is the Default step's Condition: skip if any case selected itself,
+// otherwise consult s.Cond (with case steps filtered out of the upstream map
+// so their Skipped status doesn't poison conditions like AllSucceeded).
 func (s *SwitchBranch[T]) isDefault(ctx context.Context, ups map[Steper]StepResult) StepStatus {
 	for _, check := range s.CasesToCheck {
 		if check.OK {
 			return Skipped
 		}
 	}
-	// default branch ignores the status from cases
+	// Hide the case steps from the user-supplied condition: their Skipped
+	// status is intentional and not a sign of upstream failure.
 	up := make(map[Steper]StepResult)
 	for step, status := range ups {
 		if _, isCase := s.CasesToCheck[step]; !isCase {
@@ -169,6 +217,11 @@ func (s *SwitchBranch[T]) isDefault(ctx context.Context, ups map[Steper]StepResu
 	}
 	return Running
 }
+
+// AddToWorkflow lays the Switch(...) construct into a {step→config} map.
+// Every Case step depends on Target and is gated by isCase(step). The
+// Default step (if any) depends on Target AND every Case (so it observes
+// their decisions) and is gated by isDefault.
 func (s *SwitchBranch[T]) AddToWorkflow() map[Steper]*StepConfig {
 	steps := Steps()
 	cases := []Steper{}
diff --git a/build_step.go b/build_step.go
index d631bf7..5ec417e 100644
--- a/build_step.go
+++ b/build_step.go
@@ -1,19 +1,37 @@
 package flow
 
-// StepBuilder allows to build the internal Steps when adding into Workflow.
+// StepBuilder is the per-Workflow memo that ensures every Step's optional
+// BuildStep() hook fires at most once.
 //
-//	type StepImpl struct {}
+// A Step type can implement BuildStep() to assemble its internal sub-steps
+// lazily — typically the first time it is added to a Workflow:
+//
+//	type StepImpl struct{}
 //	func (s *StepImpl) Unwrap() []flow.Steper { return /* internal steps */ }
 //	func (s *StepImpl) Do(ctx context.Context) error { /* ... */ }
-//	func (s *StepImpl) BuildStep() { /* build internal steps */ }
+//	func (s *StepImpl) BuildStep()                  { /* assemble children */ }
 //
 //	workflow.Add(
-//		flow.Step(new(StepImpl)), // here will call StepImpl.BuildStep() once implicitly
+//	    flow.Step(new(StepImpl)), // BuildStep() fires here, exactly once.
 //	)
+//
+// The StepBuilder is embedded in Workflow itself, so Workflow.Add transparently
+// invokes BuildStep on every newly seen step.
 type StepBuilder struct{ built Set[Steper] }
 
-// BuildStep calls BuildStep() method of the Steper if it's implemented,
-// and ensure it's called only once for each Steper.
+// BuildStep walks the tree of step (pre-order) and triggers BuildStep() on
+// each node that implements it, recording the node so future calls skip it.
+//
+// Two early-exit rules keep behaviour predictable when composing workflows:
+//
+//   - If a node implements `BuildStep(Steper)` (the StepBuilder shape itself,
+//     i.e. it manages a sub-workflow of its own), descent stops at that node —
+//     the inner workflow is responsible for building its own contents.
+//   - If a node implements `Reset()`, it is reset before BuildStep() runs, so
+//     the build always starts from a clean slate.
+//
+// In both build cases the walker returns TraverseEndBranch so the parent
+// composite's children aren't double-visited from this side.
 func (sb *StepBuilder) BuildStep(s Steper) {
 	if sb.built == nil {
 		sb.built = make(Set[Steper])
diff --git a/condition.go b/condition.go
index ca6e2c3..fe978bd 100644
--- a/condition.go
+++ b/condition.go
@@ -6,19 +6,30 @@ import (
 	"fmt"
 )
 
-// StepStatus describes the status of a Step.
+// StepStatus describes the lifecycle state of a Step inside a Workflow.
+//
+// A step starts at Pending, is moved to Running by the scheduler when it is
+// dispatched to a goroutine, and ends in one of the four terminal states:
+// Failed, Succeeded, Canceled or Skipped. Use IsTerminated() to test for
+// "done".
+//
+// Steps that the scheduler decides not to run (Skipped / Canceled, settled
+// inline by the Condition check) move from Pending straight to a terminal
+// state without ever entering Running.
 type StepStatus string
 
 const (
-	Pending   StepStatus = ""          // Pending means the Step has not started yet.
-	Running   StepStatus = "Running"   // Running means the Step is in progress.
-	Failed    StepStatus = "Failed"    // Failed means the Step has terminated and failed.
-	Succeeded StepStatus = "Succeeded" // Succeeded means the Step has terminated and succeeded.
-	Canceled  StepStatus = "Canceled"  // Canceled means the Step has terminated and been canceled.
-	Skipped   StepStatus = "Skipped"   // Skipped means the Step has terminated and been skipped.
+	Pending   StepStatus = ""          // not yet started.
+	Running   StepStatus = "Running"   // currently executing in a worker goroutine.
+	Failed    StepStatus = "Failed"    // terminal: Do (or a callback) returned a non-nil error.
+	Succeeded StepStatus = "Succeeded" // terminal: Do returned nil.
+	Canceled  StepStatus = "Canceled"  // terminal: ctx was canceled, or the error wraps context.Canceled / DeadlineExceeded.
+	Skipped   StepStatus = "Skipped"   // terminal: the Condition decided not to run the step.
 )
 
-// IsTerminated returns true if the StepStatus is one of the terminated states (Failed, Succeeded, Canceled, Skipped).
+// IsTerminated reports whether the status is one of Failed, Succeeded,
+// Canceled or Skipped. The Workflow tick loop polls this to decide when
+// downstream steps may be considered.
 func (s StepStatus) IsTerminated() bool {
 	switch s {
 	case Failed, Succeeded, Canceled, Skipped:
@@ -27,6 +38,9 @@ func (s StepStatus) IsTerminated() bool {
 	return false
 }
 
+// String renders the status for logs / errors. Pending is rendered as
+// "Pending" rather than the empty string, and unknown values are tagged so
+// they stand out.
 func (s StepStatus) String() string {
 	switch s {
 	case Pending:
@@ -38,15 +52,28 @@ func (s StepStatus) String() string {
 	}
 }
 
-// Condition is a function to determine what's the next status of Step.
-// Condition makes the decision based on the status and result of all the Upstream Steps.
-// Condition is only called when all Upstream Steps are terminated.
+// Condition decides what should happen to a step once all of its upstreams
+// have terminated. It returns the next StepStatus:
+//
+//   - Running                   → the scheduler will dispatch the step to a worker.
+//   - Skipped / Canceled / etc. → the scheduler settles the step inline, with
+//     no goroutine, no concurrency lease, and no interceptor chain.
+//
+// The map passed in keys every direct upstream by its root Steper and exposes
+// its terminal StepResult. The condition is invoked with the workflow's
+// context, so it can also observe ctx.Err() to react to a top-level cancel.
 type Condition func(ctx context.Context, ups map[Steper]StepResult) StepStatus
 
 var (
-	// DefaultCondition used in workflow, defaults to AllSucceeded
+	// DefaultCondition is the Condition used when a step doesn't set its own
+	// via When(). Defaults to AllSucceeded — i.e. a step runs only if every
+	// upstream succeeded.
 	DefaultCondition Condition = AllSucceeded
-	// DefaultIsCanceled is used to determine whether an error is being regarded as canceled.
+
+	// DefaultIsCanceled classifies an error as a "cancellation" rather than a
+	// "failure". The built-in conditions and the worker's terminal-status
+	// computation both consult this hook. Override it to recognize your own
+	// cancellation sentinels.
 	DefaultIsCanceled = func(err error) bool {
 		switch {
 		case errors.Is(err, context.Canceled),
@@ -58,12 +85,15 @@ var (
 	}
 )
 
-// Always runs the step as long as all upstream steps are terminated
+// Always runs the step regardless of upstream outcomes (as long as every
+// upstream is terminated, which the scheduler guarantees before calling).
 func Always(context.Context, map[Steper]StepResult) StepStatus {
 	return Running
 }
 
-// AllSucceeded runs the step when all upstream steps are Succeeded
+// AllSucceeded runs the step only when every upstream succeeded. If any
+// upstream is in a non-Succeeded terminal state the step becomes Skipped. If
+// the workflow context is already canceled, the step becomes Canceled.
 func AllSucceeded(ctx context.Context, ups map[Steper]StepResult) StepStatus {
 	if DefaultIsCanceled(ctx.Err()) {
 		return Canceled
@@ -76,7 +106,8 @@ func AllSucceeded(ctx context.Context, ups map[Steper]StepResult) StepStatus {
 	return Running
 }
 
-// AnySucceeded runs the step when any upstream step is Succeeded
+// AnySucceeded runs the step as soon as at least one upstream succeeded;
+// otherwise it is Skipped. Canceled context still wins.
 func AnySucceeded(ctx context.Context, ups map[Steper]StepResult) StepStatus {
 	if DefaultIsCanceled(ctx.Err()) {
 		return Canceled
@@ -89,7 +120,8 @@ func AnySucceeded(ctx context.Context, ups map[Steper]StepResult) StepStatus {
 	return Skipped
 }
 
-// AllSucceededOrSkipped runs the step when all upstream steps are Succeeded or Skipped
+// AllSucceededOrSkipped tolerates Skipped upstreams: the step runs as long as
+// no upstream is Failed or Canceled. Canceled context still wins.
 func AllSucceededOrSkipped(ctx context.Context, ups map[Steper]StepResult) StepStatus {
 	if DefaultIsCanceled(ctx.Err()) {
 		return Canceled
@@ -102,7 +134,10 @@ func AllSucceededOrSkipped(ctx context.Context, ups map[Steper]StepResult) StepS
 	return Running
 }
 
-// BeCanceled runs the step only when the context is canceled
+// BeCanceled inverts the usual "context cancel skips me" rule: this step runs
+// only when the workflow context is already canceled, otherwise it is
+// Skipped. Useful for cleanup steps that should fire only when the workflow
+// is being torn down.
 func BeCanceled(ctx context.Context, _ map[Steper]StepResult) StepStatus {
 	if DefaultIsCanceled(ctx.Err()) {
 		return Running
@@ -110,7 +145,8 @@ func BeCanceled(ctx context.Context, _ map[Steper]StepResult) StepStatus {
 	return Skipped
 }
 
-// AnyFailed runs the step when any upstream step is Failed
+// AnyFailed runs the step when at least one upstream failed; otherwise it is
+// Skipped. Useful for "on failure" branches. Canceled context still wins.
 func AnyFailed(ctx context.Context, ups map[Steper]StepResult) StepStatus {
 	if DefaultIsCanceled(ctx.Err()) {
 		return Canceled
@@ -123,7 +159,8 @@ func AnyFailed(ctx context.Context, ups map[Steper]StepResult) StepStatus {
 	return Skipped
 }
 
-// ConditionOr will use defaultCond if cond is nil.
+// ConditionOr returns cond unchanged, or defaultCond if cond is nil. Lets
+// callers compose conditions without a nil check.
 func ConditionOr(cond, defaultCond Condition) Condition {
 	return func(ctx context.Context, ups map[Steper]StepResult) StepStatus {
 		if cond == nil {
@@ -133,5 +170,5 @@ func ConditionOr(cond, defaultCond Condition) Condition {
 	}
 }
 
-// ConditionOrDefault will use DefaultCondition if cond is nil.
+// ConditionOrDefault is ConditionOr with the package-level DefaultCondition.
 func ConditionOrDefault(cond Condition) Condition { return ConditionOr(cond, DefaultCondition) }
diff --git a/error.go b/error.go
index e5ab8d3..3322003 100644
--- a/error.go
+++ b/error.go
@@ -8,15 +8,27 @@ import (
 	"time"
 )
 
-// Succeed marks the current step as `Succeeded`, while still reports the error.
+// Succeed wraps err in an ErrSucceed so that StatusFromError will classify
+// the resulting error as Succeeded. Use it when your step has reportable
+// information to bubble up but you still want the step counted as a success.
 func Succeed(err error) ErrSucceed { return ErrSucceed{err} }
 
-// Cancel marks the current step as `Canceled`, and reports the error.
+// Cancel wraps err in an ErrCancel so the step is classified as Canceled.
 func Cancel(err error) ErrCancel { return ErrCancel{err} }
 
-// Skip marks the current step as `Skipped`, and reports the error.
+// Skip wraps err in an ErrSkip so the step is classified as Skipped.
 func Skip(err error) ErrSkip { return ErrSkip{err} }
 
+// Status-marker errors. They behave like ordinary error wrappers (Unwrap
+// returns the underlying error) but additionally tell StatusFromError which
+// terminal StepStatus to assign:
+//
+//   - ErrSucceed   → Succeeded
+//   - ErrCancel    → Canceled
+//   - ErrSkip      → Skipped
+//   - ErrPanic     → Failed (only ever produced when Workflow.DontPanic is true)
+//   - ErrBeforeStep→ Failed (the failure happened in a Before/Input callback,
+//     not in Do itself)
 type ErrSucceed struct{ error }
 type ErrCancel struct{ error }
 type ErrSkip struct{ error }
@@ -29,7 +41,11 @@ func (e ErrSkip) Unwrap() error       { return e.error }
 func (e ErrPanic) Unwrap() error      { return e.error }
 func (e ErrBeforeStep) Unwrap() error { return e.error }
 
-// WithStackTraces saves stack frames into error
+// WithStackTraces returns a wrapper that captures up to `depth` runtime
+// frames (skipping the topmost `skip` frames) and attaches them to err as an
+// ErrWithStackTraces. Frames matched by any `ignores` predicate are dropped.
+//
+// catchPanicAsError uses this to enrich panic errors with a filtered stack.
 func WithStackTraces(skip, depth int, ignores ...func(runtime.Frame) bool) func(error) error {
 	return func(err error) error {
 		pc := make([]uintptr, depth)
@@ -57,12 +73,15 @@ func WithStackTraces(skip, depth int, ignores ...func(runtime.Frame) bool) func(
 	}
 }
 
-// ErrWithStackTraces saves stack frames into error, and prints error into
+// ErrWithStackTraces decorates an error with the runtime frames that were
+// active when WithStackTraces was applied. Its Error() formatting is:
 //
-//	error message
+//	<inner error message>
 //
 //	Stack Traces:
-//		file:line
+//	    file:line
+//	    file:line
+//	    ...
 type ErrWithStackTraces struct {
 	Err    error
 	Frames []runtime.Frame
@@ -75,6 +94,8 @@ func (e ErrWithStackTraces) Error() string {
 	}
 	return e.Err.Error()
 }
+
+// StackTraces renders each captured frame as "file:line".
 func (e ErrWithStackTraces) StackTraces() []string {
 	stacks := make([]string, 0, len(e.Frames))
 	for i := range e.Frames {
@@ -83,7 +104,16 @@ func (e ErrWithStackTraces) StackTraces() []string {
 	return stacks
 }
 
-// StatusFromError gets the StepStatus from error.
+// StatusFromError classifies an error into a terminal StepStatus.
+//
+//   - nil                                              → Succeeded
+//   - any error wrapping (via Unwrap) ErrSucceed/Cancel/Skip → that status
+//   - anything else                                    → Failed
+//
+// Note: context.Canceled / context.DeadlineExceeded are NOT translated here —
+// the worker in workflow.go applies that policy after consulting
+// DefaultIsCanceled, so the per-step Status ends up Canceled for cancellation
+// errors even if StatusFromError reported Failed.
 func StatusFromError(err error) StepStatus {
 	if err == nil {
 		return Succeeded
@@ -104,17 +134,21 @@ func StatusFromError(err error) StepStatus {
 	}
 }
 
-// StepResult contains the status and error of a Step.
+// StepResult is the public terminal record of a single step's run: its final
+// status, the last error observed (may be nil for Succeeded), and the wall
+// clock time the step finished. FinishedAt is zero if the step never ran.
 type StepResult struct {
 	Status     StepStatus
 	Err        error
 	FinishedAt time.Time
 }
 
-// StatusError will be printed as:
+// Error renders a StepResult as:
 //
 //	[Status]
-//		error message
+//	    error message
+//
+// (with the error message indented).
 func (e StepResult) Error() string {
 	rv := fmt.Sprintf("[%s]", e.Status)
 	if e.Err != nil {
@@ -124,16 +158,20 @@ func (e StepResult) Error() string {
 }
 func (e StepResult) Unwrap() error { return e.Err }
 
+// indent rewrites any inner newlines so multi-line errors stay aligned under
+// the leading status tag.
 func indent(s string) string { return strings.ReplaceAll(s, "\n", "\n\t") }
 
-// ErrWorkflow contains all errors reported from terminated Steps in Workflow.
-//
-// Keys are root Steps, values are its status and error.
+// ErrWorkflow is the error returned by Workflow.Do when one or more steps did
+// not finish in a Succeeded (or Skipped, depending on SkipAsError) state. It
+// is keyed by ROOT step (composite-step internals are folded into their
+// containing root).
 type ErrWorkflow map[Steper]StepResult
 
-// sortedSteps returns ErrWorkflow keys sorted by FinishedAt ascending.
-// Steps with zero FinishedAt (never ran) sort last.
-// Tie-break: lexicographic order of String(step).
+// sortedSteps orders the keys of an ErrWorkflow for stable rendering:
+// finished steps first (oldest FinishedAt first), then never-ran steps; ties
+// are broken by String(step). This makes Error() output reproducible across
+// runs even though the underlying map iteration order is randomized.
 func sortedSteps(e ErrWorkflow) []Steper {
 	steps := make([]Steper, 0, len(e))
 	for step := range e {
@@ -145,7 +183,7 @@ func sortedSteps(e ErrWorkflow) []Steper {
 		zeroI := ti.IsZero()
 		zeroJ := tj.IsZero()
 		if zeroI != zeroJ {
-			return !zeroI // non-zero before zero
+			return !zeroI // non-zero (finished) before zero (never ran)
 		}
 		if !ti.Equal(tj) {
 			return ti.Before(tj)
@@ -155,6 +193,8 @@ func sortedSteps(e ErrWorkflow) []Steper {
 	return steps
 }
 
+// Unwrap returns the per-step errors in deterministic order so errors.Is /
+// errors.As can search through them.
 func (e ErrWorkflow) Unwrap() []error {
 	steps := sortedSteps(e)
 	rv := make([]error, 0, len(e))
@@ -166,10 +206,13 @@ func (e ErrWorkflow) Unwrap() []error {
 	return rv
 }
 
-// ErrWorkflow will be printed as:
+// Error renders an ErrWorkflow as a deterministic, multi-line dump:
 //
-//	Step: [Status]
-//		error message
+//	step1: [Status]
+//	    error message
+//	step2: [Status]
+//	    error message
+//	...
 func (e ErrWorkflow) Error() string {
 	var builder strings.Builder
 	for _, step := range sortedSteps(e) {
@@ -179,6 +222,7 @@ func (e ErrWorkflow) Error() string {
 	return builder.String()
 }
 
+// AllSucceeded reports whether every step ended in Succeeded.
 func (e ErrWorkflow) AllSucceeded() bool {
 	for _, sErr := range e {
 		if sErr.Status != Succeeded {
@@ -187,10 +231,14 @@ func (e ErrWorkflow) AllSucceeded() bool {
 	}
 	return true
 }
+
+// AllSucceededOrSkipped reports whether every step ended in Succeeded or
+// Skipped. (Skipped steps may still carry an Err describing why they were
+// skipped — this method ignores that.)
 func (e ErrWorkflow) AllSucceededOrSkipped() bool {
 	for _, sErr := range e {
 		switch sErr.Status {
-		case Succeeded, Skipped: // skipped step can have error to indicate why it's skipped
+		case Succeeded, Skipped:
 		default:
 			return false
 		}
@@ -198,11 +246,23 @@ func (e ErrWorkflow) AllSucceededOrSkipped() bool {
 	return true
 }
 
+// ErrWorkflowIsRunning is returned by Workflow.Do (and Workflow.Reset) when
+// the workflow is already executing in another goroutine. The workflow is
+// single-runner: wait for the in-flight Do to return before invoking again.
 var ErrWorkflowIsRunning = fmt.Errorf("Workflow is running, please wait for it terminated")
 
-// ErrCycleDependency means there is a cycle-dependency in your Workflow!!!
+// ErrCycleDependency is returned by Workflow.Do's preflight check when the
+// declared graph isn't acyclic. It maps each step still in a cycle to the
+// upstream step(s) that prevented it from being topologically scanned.
 type ErrCycleDependency map[Steper][]Steper
 
+// Error renders an ErrCycleDependency as:
+//
+//	Cycle Dependency Error:
+//	    stepA depends on [
+//	        stepB
+//	    ]
+//	    ...
 func (e ErrCycleDependency) Error() string {
 	depErr := make([]string, 0, len(e))
 	for step, ups := range e {
diff --git a/example/01_quickstart_test.go b/example/01_quickstart_test.go
new file mode 100644
index 0000000..c4fd941
--- /dev/null
+++ b/example/01_quickstart_test.go
@@ -0,0 +1,144 @@
+package flow_test
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"sort"
+
+	flow "github.com/Azure/go-workflow"
+)
+
+// # Quickstart: a 3-minute tour of go-workflow
+//
+// **What you'll learn**
+//   - Any struct of yours becomes a Step by adding one method:
+//     `Do(context.Context) error`. No interface to embed, no generics, no
+//     decorators. Your domain types ARE the workflow.
+//   - A Workflow is a DAG of Steps; Steps with no path between them run
+//     in parallel.
+//   - Use `Input` to flow data from upstream Steps into a downstream
+//     Step's fields, right before its `Do` runs.
+//
+// **The scenario**
+//
+// Build a user profile that combines two pieces of data fetched from
+// independent endpoints:
+//
+//	    ┌── FetchUser ──┐
+//	    │               │
+//	  start             ├──► BuildProfile ──► (result)
+//	    │               │
+//	    └── FetchPosts ─┘
+//
+// `FetchUser` and `FetchPosts` have no dependency on each other so the
+// Workflow runs them concurrently. `BuildProfile` waits until both are
+// done; an `Input` callback then copies their outputs into BuildProfile's
+// own fields, and `Do` reads them.
+//
+// Read on for 02_steps_and_deps_test.go to see more wiring shapes.
+func ExampleWorkflow_quickstart() {
+	// httptest stand-ins for two real services. In a real program these
+	// would be remote HTTP calls; the rest of the file works the same way.
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		switch r.URL.Path {
+		case "/user":
+			_ = json.NewEncoder(w).Encode(map[string]string{"name": "Alice"})
+		case "/posts":
+			_ = json.NewEncoder(w).Encode([]string{"hello", "world"})
+		}
+	}))
+	defer server.Close()
+
+	// Construct the steps. Each one is just a value of our own struct type.
+	// Configuration goes in via the constructor; results come out via fields.
+	user := &FetchUser{BaseURL: server.URL}
+	posts := &FetchPosts{BaseURL: server.URL}
+	profile := &BuildProfile{}
+
+	// Wire the graph and the data flow in one go. Input(fn) registers fn
+	// to run after every upstream has terminated and before profile.Do —
+	// so user.Name and posts.Posts are safe to read inside fn.
+	w := new(flow.Workflow)
+	w.Add(
+		flow.Step(profile).
+			DependsOn(user, posts).
+			Input(func(ctx context.Context, p *BuildProfile) error {
+				p.Name = user.Name
+				p.Posts = posts.Posts
+				return nil
+			}),
+	)
+
+	if err := w.Do(context.Background()); err != nil {
+		fmt.Println("error:", err)
+	}
+	// Output:
+	// Alice has 2 posts: [hello world]
+}
+
+// FetchUser is a Step. The struct holds its configuration (BaseURL) and
+// publishes its result (Name) — both as plain exported fields. There is
+// nothing magic about it: any type with a Do(context.Context) error
+// method satisfies flow.Steper.
+type FetchUser struct {
+	BaseURL string // input: configured at construction time
+	Name    string // output: filled in by Do
+}
+
+func (f *FetchUser) Do(ctx context.Context) error {
+	var body map[string]string
+	if err := getJSON(ctx, f.BaseURL+"/user", &body); err != nil {
+		return err
+	}
+	f.Name = body["name"]
+	return nil
+}
+
+// FetchPosts is another Step. Same shape — a struct with config-in,
+// result-out, and a Do method.
+type FetchPosts struct {
+	BaseURL string
+	Posts   []string
+}
+
+func (f *FetchPosts) Do(ctx context.Context) error {
+	return getJSON(ctx, f.BaseURL+"/posts", &f.Posts)
+}
+
+// BuildProfile is the downstream Step. Its inputs (Name, Posts) are
+// plain fields populated by the Input callback at wiring time — see
+// the Input(...) call in ExampleWorkflow_quickstart above. Do then just
+// reads those fields.
+//
+// Keeping data on Step fields (rather than reaching into upstream
+// objects from Do) makes BuildProfile self-contained: it can be tested
+// in isolation by setting Name and Posts and calling Do directly.
+type BuildProfile struct {
+	Name  string   // input, filled by Input callback
+	Posts []string // input, filled by Input callback
+}
+
+func (b *BuildProfile) Do(ctx context.Context) error {
+	posts := append([]string(nil), b.Posts...)
+	sort.Strings(posts) // map iteration is unordered upstream; pin the output for the godoc check.
+	fmt.Printf("%s has %d posts: %v\n", b.Name, len(posts), posts)
+	return nil
+}
+
+// getJSON is a small test helper. Real code would handle errors properly;
+// this is a quickstart, not an HTTP tutorial.
+func getJSON(ctx context.Context, url string, out any) error {
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+	if err != nil {
+		return err
+	}
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		return err
+	}
+	defer resp.Body.Close()
+	return json.NewDecoder(resp.Body).Decode(out)
+}
diff --git a/example/01_step_do_test.go b/example/01_step_do_test.go
deleted file mode 100644
index bbf3c46..0000000
--- a/example/01_step_do_test.go
+++ /dev/null
@@ -1,75 +0,0 @@
-package flow_test
-
-import (
-	"context"
-	"fmt"
-
-	flow "github.com/Azure/go-workflow"
-)
-
-// Examples are a good place to ramp-up and have a quick look at this package's features.
-//
-// for basic usage,	   please go to 01 - 09
-// for advanced usage, please go to 10 - ..
-
-// # Step and Workflow
-//
-// Introduce two core concepts:
-//
-//   - Step
-//   - Workflow
-//
-// Where Step is the unit of a Workflow,
-// and Steps are connected with dependencies to form a Workflow (actually a Directed-Acyclic-Graph).
-//
-// They cooperate to provide features:
-//
-//   - Steps are easy to implement, just a trivial interface `Steper`
-//   - Declare dependencies between Steps to form a Workflow
-//   - Workflow executes Steps in a topological order
-//
-// Let's start with implementing a Step:
-//
-// To satisfy the interface of Step, just implement
-//
-//	type Steper interface {
-//		Do(context.Context) error
-//	}
-func ExampleSteper_Do() {
-	// Create a Workflow
-	workflow := new(flow.Workflow)
-
-	// Create Steps
-	// Notice we normally use pointer to struct as Step,
-	// internally Steps are stored in Workflow as map keys, so Steps need to be comparable.
-	foo := new(Foo)
-	bar := new(Bar)
-
-	// Connect the Steps into the Workflow
-	workflow.Add(
-		flow.Step(foo).DependsOn(bar),
-	)
-
-	// As the code says, step `foo` depends on step `bar`, or `bar` happens-before `foo`.
-	// In `flow` terms, we call `foo` as Downstream, `bar` as Upstream, since the flow is from Up to Down.
-	// We'll cover dependency detail in next session.
-
-	_ = workflow.Do(context.TODO())
-	// Output:
-	// Bar
-	// Foo
-}
-
-type Foo struct{}
-
-func (f *Foo) Do(ctx context.Context) error {
-	fmt.Println("Foo")
-	return nil
-}
-
-type Bar struct{}
-
-func (b *Bar) Do(context.Context) error {
-	fmt.Println("Bar")
-	return nil
-}
diff --git a/example/02_dependency_test.go b/example/02_dependency_test.go
deleted file mode 100644
index 161fa24..0000000
--- a/example/02_dependency_test.go
+++ /dev/null
@@ -1,52 +0,0 @@
-package flow_test
-
-import (
-	"context"
-	"fmt"
-
-	flow "github.com/Azure/go-workflow"
-)
-
-// # Dependency
-//
-// Steps are connected with dependencies to form a Workflow.
-//
-// `flow` provides rich featured Step dependency builders,
-// and the syntax is pretty close to plain English:
-//
-//	Step(someTask).DependsOn(upstreamTask)
-//	Steps(taskA, taskB).DependsOn(taskC, taskD)
-//
-// Most time, `Step` and `Steps` are mutually exchangeable.
-// The only difference is that:
-//
-//	Step supports a generic method `Input`, check next session about BeforeStep and AfterStep callbacks.
-func ExampleSteps() {
-	workflow := new(flow.Workflow)
-
-	// Besides, `flow` also provides a convenient way to create a Step implementation without declaring type,
-	// (since you need a type to implement interface `Steper`).
-	// Use `Func` to wrap any arbitrary function into a Step.
-	doNothing := func(context.Context) error { return nil }
-	var (
-		a = flow.Func("a", doNothing)
-		b = flow.Func("b", doNothing)
-		c = flow.Func("c", doNothing)
-		d = flow.Func("d", doNothing)
-	)
-
-	workflow.Add(
-		flow.Step(a).DependsOn(b, c),
-		flow.Steps(b, c).DependsOn(d),
-	)
-
-	fmt.Println(workflow.UpstreamOf(a))
-	fmt.Println(workflow.UpstreamOf(b))
-	fmt.Println(workflow.UpstreamOf(c))
-	fmt.Println(workflow.UpstreamOf(d))
-	// Output:
-	// map[b:[Pending] c:[Pending]]
-	// map[d:[Pending]]
-	// map[d:[Pending]]
-	// map[]
-}
diff --git a/example/02_steps_and_deps_test.go b/example/02_steps_and_deps_test.go
new file mode 100644
index 0000000..a0e5a99
--- /dev/null
+++ b/example/02_steps_and_deps_test.go
@@ -0,0 +1,170 @@
+package flow_test
+
+import (
+	"context"
+	"fmt"
+
+	flow "github.com/Azure/go-workflow"
+)
+
+// # Steps & Dependencies: how to wire a Workflow
+//
+// **What you'll learn**
+//   - Three ways to express a dependency graph: `DependsOn`, `Pipe`, and
+//     `BatchPipe`.
+//   - Give a Step a friendly display name with `flow.Name`.
+//
+// **A note on Step types**
+//
+// In 01_quickstart we built one struct per Step — that is the recommended
+// way to write production Steps. In this file we declare a tiny `stage`
+// struct once and reuse instances of it: the focus here is on the WIRING,
+// not on the Step bodies. Anything with a `Do(context.Context) error`
+// method is a valid Step, including a struct as small as this:
+//
+//	type stage struct{ name string }
+//	func (s *stage) Do(ctx context.Context) error { ... }
+//
+// **Mental model**
+//
+// A Workflow is a directed acyclic graph (DAG). Each Step is a node; each
+// dependency is an edge from upstream to downstream. The Workflow runs
+// every Step exactly once, respecting topological order: a Step starts as
+// soon as all its upstreams are terminated, and Steps with no path between
+// them may run in parallel.
+//
+// We'll wire the same toy CI/CD pipeline in three different ways so you
+// can pick the style that fits your code:
+//
+//	    clone ──► build ──► test ──► publish
+//	         \─► lint ────────┘
+//
+// build and lint both need clone; test needs both build and lint;
+// publish needs test.
+
+// stage is a tiny shared Step type for the wiring examples in this file.
+// Real Steps would carry richer state and do real work; see 01_quickstart
+// for the recommended one-struct-per-Step style.
+type stage struct{ name string }
+
+func (s *stage) String() string { return s.name }
+func (s *stage) Do(ctx context.Context) error {
+	fmt.Println(s.name)
+	return nil
+}
+
+// ExampleWorkflow_dependsOn shows the most explicit style: every edge is
+// declared with DependsOn. Verbose but unambiguous, and it works for any
+// shape of graph.
+func ExampleWorkflow_dependsOn() {
+	var (
+		clone   = &stage{"clone"}
+		build   = &stage{"build"}
+		lint    = &stage{"lint"}
+		test    = &stage{"test"}
+		publish = &stage{"publish"}
+	)
+
+	w := new(flow.Workflow)
+	w.Add(
+		flow.Steps(build, lint).DependsOn(clone), // fan-out: both depend on clone
+		flow.Step(test).DependsOn(build, lint),   // fan-in: waits for both
+		flow.Step(publish).DependsOn(test),
+	)
+
+	_ = w.Do(context.Background())
+	// Unordered output:
+	// clone
+	// build
+	// lint
+	// test
+	// publish
+}
+
+// ExampleWorkflow_pipe shows the shorthand for *linear* chains. Pipe(a, b, c)
+// is exactly Step(b).DependsOn(a) + Step(c).DependsOn(b). Use Pipe when the
+// graph is a straight line; it reads top-to-bottom like a script.
+func ExampleWorkflow_pipe() {
+	var (
+		clone   = &stage{"clone"}
+		build   = &stage{"build"}
+		test    = &stage{"test"}
+		publish = &stage{"publish"}
+	)
+
+	w := new(flow.Workflow)
+	w.Add(
+		// Pure linear pipeline. Equivalent to three DependsOn calls.
+		flow.Pipe(clone, build, test, publish),
+	)
+
+	_ = w.Do(context.Background())
+	// Output:
+	// clone
+	// build
+	// test
+	// publish
+}
+
+// ExampleWorkflow_batchPipe shows BatchPipe — a shorthand for "every step
+// in the next batch depends on every step in the previous one". This is
+// the cleanest way to describe a fan-out / fan-in topology.
+//
+// Compare with ExampleWorkflow_dependsOn above: same graph, fewer edges to
+// type out.
+func ExampleWorkflow_batchPipe() {
+	var (
+		clone   = &stage{"clone"}
+		build   = &stage{"build"}
+		lint    = &stage{"lint"}
+		test    = &stage{"test"}
+		publish = &stage{"publish"}
+	)
+
+	w := new(flow.Workflow)
+	w.Add(
+		flow.BatchPipe(
+			flow.Steps(clone),
+			flow.Steps(build, lint), // both depend on clone (in parallel)
+			flow.Steps(test),        // waits for build AND lint
+			flow.Steps(publish),
+		),
+	)
+
+	_ = w.Do(context.Background())
+	// Unordered output:
+	// clone
+	// build
+	// lint
+	// test
+	// publish
+}
+
+// ExampleName shows how to give a Step a friendly display name. The name
+// is what gets printed by `String()` — so it shows up in error messages
+// (`ErrWorkflow`), in interceptor logs, and anywhere the library prints
+// the Step.
+//
+// Useful when:
+//   - your Step is an anonymous struct or third-party type with no good name;
+//   - you want to disambiguate two instances of the same struct type;
+//   - your name depends on runtime data (use NameFunc / NameStringer).
+func ExampleName() {
+	// A bare struct without a String() method prints like *flow_test.compile.
+	type compile struct{ flow.NoOpStep }
+	step := &compile{}
+
+	w := new(flow.Workflow)
+	w.Add(
+		// Wrap step in a NamedStep that prints "compile (release)" instead.
+		flow.Name(step, "compile (release)"),
+	)
+
+	_ = w.Do(context.Background())
+	// Reach back through the wrapper to print the registered Step's name.
+	for _, s := range w.Steps() {
+		fmt.Println(s)
+	}
+	// Output:
+	// compile (release)
+}
diff --git a/example/03_data_flow_test.go b/example/03_data_flow_test.go
new file mode 100644
index 0000000..18fc611
--- /dev/null
+++ b/example/03_data_flow_test.go
@@ -0,0 +1,146 @@
+package flow_test
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	flow "github.com/Azure/go-workflow"
+)
+
+// # Data flow: passing values between Steps
+//
+// **What you'll learn**
+//   - The standard pattern: each Step exposes its inputs and outputs as
+//     plain fields; `Input` callbacks copy upstream outputs into the
+//     downstream's input fields right before `Do` runs.
+//   - When you don't want to define a struct per Step, `flow.Func` /
+//     `FuncIO` / `FuncI` / `FuncO` produce ready-made generic Steps
+//     (`*flow.Function[I, O]`) that work with `Input` the same way.
+//   - When `Input` callbacks run (after upstreams terminate, before `Do`).
+
+// ExampleAddStep_Input shows the standard pattern, expanded into a 3-step
+// pipeline that reads a feed, counts items, and announces the count.
+//
+// Each Step is a plain struct: inputs are fields filled by `Input`,
+// outputs are fields written by `Do`. The downstream's `Input` callback
+// reads from upstreams (captured by closure) and writes into the
+// downstream's input fields.
+//
+// Why use Input rather than holding direct pointers to upstreams (as in
+// 01_quickstart)? Two reasons:
+//   - The Step stays self-contained: you can construct one and call its
+//     `Do` directly in a unit test by setting the input fields yourself.
+//   - The wiring lives next to `DependsOn`, so the data flow and the
+//     dependency are declared together where you read the workflow.
+func ExampleAddStep_Input() {
+	feed := &fetchFeed{}
+	count := &countItems{}
+	announce := &announceCount{}
+
+	w := new(flow.Workflow)
+	w.Add(
+		flow.Step(count).
+			DependsOn(feed).
+			Input(func(ctx context.Context, c *countItems) error {
+				c.Body = feed.Body
+				return nil
+			}),
+		flow.Step(announce).
+			DependsOn(count).
+			Input(func(ctx context.Context, a *announceCount) error {
+				a.N = count.N
+				return nil
+			}),
+	)
+
+	_ = w.Do(context.Background())
+	// Output:
+	// found 3 items
+}
+
+type fetchFeed struct {
+	Body string // output
+}
+
+func (f *fetchFeed) Do(ctx context.Context) error {
+	f.Body = "item\nitem\nitem\nfooter" // pretend this is an HTTP fetch.
+	return nil
+}
+
+type countItems struct {
+	Body string // input — copied in by Input callback
+	N    int    // output
+}
+
+func (c *countItems) Do(ctx context.Context) error {
+	for _, line := range strings.Split(c.Body, "\n") {
+		if line == "item" {
+			c.N++
+		}
+	}
+	return nil
+}
+
+type announceCount struct {
+	N int // input — copied in by Input callback
+}
+
+func (a *announceCount) Do(ctx context.Context) error {
+	fmt.Printf("found %d items\n", a.N)
+	return nil
+}
+
+// ExampleFunction_inputOutput shows the convenience variant — when you
+// don't want to declare a struct just to define a Step body. `flow.Func`
+// and friends produce a generic `*flow.Function[I, O]` whose `Input`
+// field is the typed input and `Output` field is the typed output:
+//
+//	flow.Func    — no input, no output (just a Do function)
+//	flow.FuncO   — no input, typed output
+//	flow.FuncI   — typed input, no output
+//	flow.FuncIO  — typed input, typed output
+//
+// Mechanics are exactly the same as the struct version above: the
+// `Input` callback runs after upstreams terminate, and you copy the
+// values across.
+func ExampleFunction_inputOutput() {
+	var (
+		fetch = flow.FuncO("FetchFeed", func(ctx context.Context) (string, error) {
+			return "item\nitem\nfooter", nil
+		})
+		count = flow.FuncIO("CountItems", func(ctx context.Context, body string) (int, error) {
+			n := 0
+			for _, line := range strings.Split(body, "\n") {
+				if line == "item" {
+					n++
+				}
+			}
+			return n, nil
+		})
+		announce = flow.FuncI("Announce", func(ctx context.Context, n int) error {
+			fmt.Printf("found %d items\n", n)
+			return nil
+		})
+	)
+
+	w := new(flow.Workflow)
+	w.Add(
+		flow.Step(count).
+			DependsOn(fetch).
+			Input(func(ctx context.Context, f *flow.Function[string, int]) error {
+				f.Input = fetch.Output
+				return nil
+			}),
+		flow.Step(announce).
+			DependsOn(count).
+			Input(func(ctx context.Context, f *flow.Function[int, struct{}]) error {
+				f.Input = count.Output
+				return nil
+			}),
+	)
+
+	_ = w.Do(context.Background())
+	// Output:
+	// found 2 items
+}
diff --git a/example/03_io_data_flow_test.go b/example/03_io_data_flow_test.go
deleted file mode 100644
index a8058f2..0000000
--- a/example/03_io_data_flow_test.go
+++ /dev/null
@@ -1,154 +0,0 @@
-package flow_test
-
-import (
-	"context"
-	"errors"
-	"fmt"
-
-	flow "github.com/Azure/go-workflow"
-)
-
-// # Data Flow via `Input` and `Output`
-//
-// After connected Steps into Workflow via dependencies,
-// there is a very common scenarios that passing value / data through dependency.
-//
-// `flow` is designed with the support of flowing data between Steps, introduce `Input`:
-//
-//	Step(someTask).
-//		DependsOn(upstreamTask).
-//		Input(func(_ context.Context, someTask *SomeTask) error {
-//			// fill someTask with data that
-//			// only available at runtime
-//			someTask.Input = upstreamTask.Output
-//		}).Output(func(_ context.Context, someTask *SomeTask) error {
-//			// get output from someTask
-//			use(someTask.Output)
-//		}),
-//
-// Notice the callbacks declares in Input() and Output() are executed at runtime, before Do, and per try.
-func ExampleAddStep_Input() {
-	// Now, let's connect the Steps into Workflow with data flow.
-	var (
-		workflow = new(flow.Workflow)
-		imBob    = new(ImBob)
-		sayHello = new(SayHello)
-	)
-
-	workflow.Add(
-		flow.Step(sayHello).DependsOn(imBob).
-			Input(func(ctx context.Context, sayHello *SayHello) error {
-				sayHello.Who = imBob.Output // imBob's Output will be passed to sayHello's Input
-				return nil
-			}),
-		// Notice the Input callback signature, the second parameter is the Step itself.
-		// This design is intended to make the Input callback more flexible and reusable.
-	)
-	andAlice := func(ctx context.Context, anySayHello *SayHello) error {
-		anySayHello.Who += " and Alice"
-		return nil
-	}
-	workflow.Add(
-		flow.Step(sayHello).Input(andAlice),
-	)
-
-	_ = workflow.Do(context.TODO())
-	fmt.Println(sayHello.Output == "Hello Bob and Alice")
-	// Output:
-	// Hello Bob and Alice
-	// true
-}
-
-// # BeforeStep and AfterStep callbacks
-//
-// [READ BELOW ONLY WHEN YOU ARE INTERESTED IN THE IMPLEMENTATION]
-//
-// The Input callbacks are actually a special BeforeStep callbacks.
-// The BeforeStep and AfterStep callbacks are a feature that allows you to hook into the execution of a Step.
-//
-//	                   ▼
-//	  Step           │ctx│
-//	┌────────────────┘ │ └────────────────────┐
-//	│                  ▼                      │
-//	│          ┌────► ctx                     │
-//	│          │       │                      │
-//	│          │  ┌────▼─────┐                │
-//	│ err==nil │  │BeforeStep├┐               │
-//	│          │  └┬─────────┼│               │
-//	│          │   └───┼──────┘               │
-//	│          │       │                      │
-//	│          │       ▼       ┌────────┐     │
-//	│          └── ctx, error ─►err!=nil├─┐   │
-//	│                  │       └────────┘ │   │
-//	│        finish all│BeforeStep        │   │
-//	│                  │                  │   │
-//	│                 ctx                 │   │
-//	│                  │                  │   │
-//	│           ┌──────▼──────┐           │   └──
-//	│           │Do(ctx) error│           ├─► err ►
-//	│           └──────┬──────┘           │   ┌──
-//	│                  │                  │   │
-//	│              ctx,│error             │   │
-//	│                  │                  │   │
-//	│             ┌────▼────┐             │   │
-//	│             │AfterStep├┐            │   │
-//	│             └┬────────┼┼────err─────┘   │
-//	│              └─────────┘                │
-//	│        finish all AfterStep             │
-//	└─────────────────────────────────────────┘
-func ExampleAddSteps_BeforeStep() {
-	workflow := new(flow.Workflow)
-
-	var (
-		foo = new(Foo)
-		bar = new(Bar)
-	)
-
-	workflow.Add(
-		flow.Step(foo).DependsOn(bar).
-			BeforeStep(func(ctx context.Context, _ flow.Steper) (context.Context, error) {
-				fmt.Println("BeforeStep")
-				ctx = context.WithValue(ctx, "key", "value") // the value is available in Do
-				return ctx, nil
-			}).
-			AfterStep(func(ctx context.Context, _ flow.Steper, err error) error {
-				fmt.Println("AfterStep")
-				// do some check on err
-				if err != nil {
-					fmt.Println("AfterStep: ", err)
-				}
-				return fmt.Errorf("NewError")
-			}),
-	)
-
-	var errWorkflow flow.ErrWorkflow
-	if errors.As(workflow.Do(context.TODO()), &errWorkflow) {
-		fmt.Println(errWorkflow[foo].Unwrap())
-	}
-	// Output:
-	// Bar
-	// BeforeStep
-	// Foo
-	// AfterStep
-	// NewError
-}
-
-type SayHello struct {
-	Who    string
-	Output string
-}
-
-func (s *SayHello) Do(context.Context) error {
-	s.Output = "Hello " + s.Who
-	fmt.Println(s.Output)
-	return nil
-}
-
-type ImBob struct {
-	Output string
-}
-
-func (i *ImBob) Do(context.Context) error {
-	i.Output = "Bob"
-	return nil
-}
diff --git a/example/04_callbacks_test.go b/example/04_callbacks_test.go
new file mode 100644
index 0000000..da7a9b1
--- /dev/null
+++ b/example/04_callbacks_test.go
@@ -0,0 +1,104 @@
+package flow_test
+
+import (
+	"context"
+	"errors"
+	"fmt"
+
+	flow "github.com/Azure/go-workflow"
+)
+
+// # Callbacks: BeforeStep / AfterStep
+//
+// **What you'll learn**
+//   - Use `BeforeStep` to mutate context (or short-circuit) right before Do.
+//   - Use `AfterStep` to inspect / transform the error right after Do.
+//   - Where these callbacks sit in the execution stack vs `Input` /
+//     Interceptors.
+//
+// **Where they fit**
+//
+//	StepInterceptor (workflow-level, see 10_observability_test.go)
+//	  └── retry loop (one iteration per attempt)
+//	      └── AttemptInterceptor (workflow-level)
+//	          └── BeforeStep callbacks   ← runs once PER ATTEMPT
+//	              └── Input callbacks    (a special BeforeStep)
+//	                  └── step.Do(ctx)
+//	              └── AfterStep callbacks ← runs once PER ATTEMPT
+//
+// `BeforeStep` and `AfterStep` are step-level (configured per Step). Use
+// them when behaviour applies to one Step. Reach for an Interceptor when
+// it applies to every Step in the Workflow.
+
+// ExampleAddStep_BeforeStep adds Before/After callbacks to a Step. The
+// Step is just a plain struct with Do — same shape as in 01 and 03.
+func ExampleAddStep_BeforeStep() {
+	greet := &greeter{Name: "world"}
+
+	w := new(flow.Workflow)
+	w.Add(
+		flow.Step(greet).
+			// BeforeStep can read/modify ctx, or return an error to skip Do.
+			// The returned ctx is forwarded to subsequent BeforeStep
+			// callbacks and ultimately to Do.
+			BeforeStep(func(ctx context.Context, _ flow.Steper) (context.Context, error) {
+				fmt.Println("(before)")
+				return ctx, nil
+			}).
+			// AfterStep can inspect or transform Do's error. Return nil to
+			// suppress; return a different error to replace it.
+			AfterStep(func(ctx context.Context, _ flow.Steper, err error) error {
+				fmt.Println("(after) err=", err)
+				return err
+			}),
+	)
+
+	_ = w.Do(context.Background())
+	// Output:
+	// (before)
+	// hello, world
+	// (after) err= <nil>
+}
+
+type greeter struct {
+	Name string
+}
+
+func (g *greeter) Do(ctx context.Context) error {
+	fmt.Printf("hello, %s\n", g.Name)
+	return nil
+}
+
+// ExampleAddStep_AfterStep_transformError shows the most common AfterStep
+// idiom: catch a known error and convert it to nil (suppress) or to a
+// domain-specific error.
+func ExampleAddStep_AfterStep_transformError() {
+	lookup := &lookupItem{}
+
+	w := new(flow.Workflow)
+	w.Add(
+		flow.Step(lookup).
+			AfterStep(func(ctx context.Context, _ flow.Steper, err error) error {
+				if errors.Is(err, errNotFound) {
+					// "Not found" is fine for this workflow — treat as success.
+					fmt.Println("nothing to do")
+					return nil
+				}
+				return err
+			}),
+	)
+
+	if err := w.Do(context.Background()); err != nil {
+		fmt.Println("workflow failed:", err)
+	}
+	// Output:
+	// nothing to do
+}
+
+var errNotFound = errors.New("not found")
+
+type lookupItem struct{}
+
+func (l *lookupItem) Do(ctx context.Context) error {
+	return errNotFound
+}
diff --git a/example/04_condition_when_test.go b/example/04_condition_when_test.go
deleted file mode 100644
index e159f36..0000000
--- a/example/04_condition_when_test.go
+++ /dev/null
@@ -1,127 +0,0 @@
-package flow_test
-
-import (
-	"context"
-	"fmt"
-
-	flow "github.com/Azure/go-workflow"
-)
-
-// # Condition and When
-//
-// Workflow tracks and updates each Step's status according to the execution result.
-//
-// StepStatus and Condition designs are learn from https://docs.github.com/en/actions/learn-github-actions/expressions#status-check-functions
-//
-// StepStatus are:
-//   - Pending
-//   - Running
-//   - Failed
-//   - Succeeded
-//   - Canceled
-//   - Skipped
-//
-// Only Pending Step can be queued to be executed.
-//
-// Before kicking the Step off, Workflow will check current Step's When setting
-//
-//	// When is a function to determine what's the next status of Step.
-//	// When makes the decision based on the status and result of all the Upstream Steps.
-//	// When is only called when all Upstreams are terminated.
-//	type When func(context.Context, map[Steper]StatusError) StepStatus
-//
-// After When makes the decision of next status, Workflow will update Step's status accordingly.
-//
-// If the decision is Running, Workflow starts a goroutine to run the Step.
-func ExampleCondition() {
-	var (
-		succeeded = new(SucceededStep)
-		failed    = new(FailedStep)
-		canceled  = new(CanceledStep)
-		skipped   = new(SkippedStep)
-		custom    = Print("CustomStep")
-
-		allSucceeded = Print("AllSucceeded")
-		always       = Print("Always")
-		anyFailed    = Print("AnyFailed")
-		beCanceled   = Print("BeCanceled")
-		customWhen   = func(ctx context.Context, ups map[flow.Steper]flow.StepResult) flow.StepStatus {
-			// use built-in Condition to check upstreams' status
-			status := flow.AllSucceededOrSkipped(ctx, ups)
-			if status != flow.Running {
-				return status
-			}
-			// do custom logic
-			if result, ok := ups[succeeded]; ok {
-				if result.Err != nil {
-					return flow.Failed // fail if succeeded Step has error (it shouldn't happen)
-				}
-			}
-			return flow.Running
-		}
-	)
-
-	workflow := new(flow.Workflow)
-	workflow.Add(
-		// AllSucceeded will run when all Upstreams are Succeeded,
-		// so allSucceeded will be skipped here.
-		flow.Step(allSucceeded).DependsOn(succeeded, failed, canceled, skipped).
-			When(flow.AllSucceeded),
-		// AnyFailed will run when any Upstream is Failed,
-		// so anyFailed will be run.
-		flow.Step(anyFailed).DependsOn(succeeded, failed, canceled, skipped).
-			When(flow.AnyFailed),
-	)
-	_ = workflow.Do(context.Background())                   // AnyFailed
-	fmt.Println(workflow.StateOf(allSucceeded).GetStatus()) // Skipped
-	fmt.Println(workflow.StateOf(skipped).GetStatus())      // Skipped
-	fmt.Println(workflow.StateOf(canceled).GetStatus())     // Canceled
-
-	workflow = new(flow.Workflow)
-	workflow.Add(
-		// Always will run the Step regardlessly
-		flow.Step(always).DependsOn(succeeded, failed, canceled, skipped).
-			When(flow.Always),
-		// BeCanceled will run when the workflow is canceled
-		flow.Step(beCanceled).When(flow.BeCanceled).DependsOn(always),
-	)
-	ctx, cancel := context.WithCancel(context.Background())
-	cancel() // just cancel this ctx
-	_ = workflow.Do(ctx)
-	// Always
-	// BeCanceled
-	fmt.Println(workflow.StateOf(succeeded).GetStatus())  // Canceled
-	fmt.Println(workflow.StateOf(always).GetStatus())     // Succeeded
-	fmt.Println(workflow.StateOf(beCanceled).GetStatus()) // Succeeded
-
-	workflow = new(flow.Workflow)
-	workflow.Add(
-		flow.Step(custom).When(customWhen).DependsOn(succeeded, skipped),
-	)
-	_ = workflow.Do(context.Background())
-	// CustomStep
-	fmt.Println(workflow.StateOf(custom).GetStatus()) // Succeeded
-
-	// Output:
-	// AnyFailed
-	// Skipped
-	// Skipped
-	// Canceled
-	// Always
-	// BeCanceled
-	// Canceled
-	// Succeeded
-	// Succeeded
-	// CustomStep
-	// Succeeded
-}
-
-type SucceededStep struct{}
-type FailedStep struct{}
-type CanceledStep struct{}
-type SkippedStep struct{}
-
-func (s *SucceededStep) Do(context.Context) error { return nil }
-func (s *FailedStep) Do(context.Context) error    { return fmt.Errorf("failed!") }
-func (s *CanceledStep) Do(context.Context) error  { return flow.Cancel(fmt.Errorf("cancel")) } // notice you can manually cancel a Step
-func (s *SkippedStep) Do(context.Context) error   { return flow.Skip(fmt.Errorf("skip")) }     // and Skip also
diff --git a/example/05_branch_if_switch_test.go b/example/05_branch_if_switch_test.go
deleted file mode 100644
index 2c53a4b..0000000
--- a/example/05_branch_if_switch_test.go
+++ /dev/null
@@ -1,80 +0,0 @@
-package flow_test
-
-import (
-	"context"
-	"fmt"
-
-	flow "github.com/Azure/go-workflow"
-)
-
-// # Branch: If / Switch
-//
-// Based on the condition, now it's possible to add branch control to the workflow.
-//
-// Introduce `If` and `Switch`, they're not steps,
-// rather a control branch that add into workflow and manages the condition of their branch steps.
-func ExampleIf() {
-	var (
-		item       string
-		isNotEmpty = flow.FuncO("IsNotEmpty", func(ctx context.Context) (bool, error) {
-			return item != "", nil
-		})
-		newIt = flow.Func("NewIt", func(ctx context.Context) error {
-			item = "new"
-			return nil
-		})
-		updateIt = flow.Func("UpdateIt", func(ctx context.Context) error {
-			item += "_updated"
-			return nil
-		})
-	)
-	w := new(flow.Workflow).Add(
-		flow.If(isNotEmpty, func(ctx context.Context, f *flow.Function[struct{}, bool]) (bool, error) {
-			return f.Output, nil
-		}).
-			Then(updateIt).
-			Else(newIt),
-	)
-	fmt.Println(item) //
-	w.Do(context.Background())
-	fmt.Println(item) // new
-	w.Do(context.Background())
-	fmt.Println(item) // new_updated
-	// Output:
-	//
-	// new
-	// new_updated
-}
-
-func ExampleSwitch() {
-	var (
-		age    int
-		getAge = flow.Func("GetAge", func(ctx context.Context) error {
-			age = 20
-			return nil
-		})
-		canDrive  = Print("CanDrive")
-		canDrink  = Print("CanDrink")
-		canOwnGun = Print("CanOwnGun")
-	)
-	w := new(flow.Workflow).Add(
-		flow.Switch(getAge).
-			Case(canDrive, func(ctx context.Context, f *flow.Function[struct{}, struct{}]) (bool, error) {
-				return age >= 16, nil
-			}).
-			Case(canDrink, func(ctx context.Context, f *flow.Function[struct{}, struct{}]) (bool, error) {
-				return age >= 21, nil
-			}).
-			Case(canOwnGun, func(ctx context.Context, f *flow.Function[struct{}, struct{}]) (bool, error) {
-				return age >= 18, nil
-			}),
-
-		flow.Step(canOwnGun).DependsOn(canDrive), // just let them print in order
-	)
-	w.Do(context.Background())
-	fmt.Println(w.StateOf(canDrink).Status) // Skipped
-	// Output:
-	// CanDrive
-	// CanOwnGun
-	// Skipped
-}
diff --git a/example/05_conditions_test.go b/example/05_conditions_test.go
new file mode 100644
index 0000000..e127cd0
--- /dev/null
+++ b/example/05_conditions_test.go
@@ -0,0 +1,107 @@
+package flow_test
+
+import (
+	"context"
+	"fmt"
+
+	flow "github.com/Azure/go-workflow"
+)
+
+// # Conditions: deciding whether a Step runs
+//
+// **What you'll learn**
+//   - Every Step has a Condition that decides — based on its upstreams'
+//     terminal status — whether the Step `Running` or settles inline as
+//     `Skipped` / `Canceled`.
+//   - Built-in conditions: `AllSucceeded` (default), `AllSucceededOrSkipped`,
+//     `Always`, `AnyFailed`, `BeCanceled`.
+//   - Use `flow.Skip(err)` / `flow.Cancel(err)` inside `Do` to settle a Step
+//     yourself.
+//
+// **Status state machine**
+//
+// Steps move through these states. Only `Pending` is queueable; the four
+// terminal states are mutually exclusive.
+//
+//	Pending ─► Running ─► Succeeded | Failed
+//	  │
+//	  └──► Skipped | Canceled (decided by Condition before Running)
+//
+// **Why this matters**
+//
+// Steps that are settled inline (Skipped/Canceled by Condition) DO NOT enter
+// the interceptor chain or consume a MaxConcurrency lease. So conditions are
+// the right place to short-circuit work — not the wrong-fitting AfterStep
+// or a bail-out inside Do.
+
+// ExampleCondition_default shows the default condition: a Step runs only if
+// every upstream Succeeded. If anything else terminated upstream, the Step
+// is Skipped.
+func ExampleCondition_default() {
+	var (
+		ok       = flow.Func("ok", func(ctx context.Context) error { return nil })
+		boom     = flow.Func("boom", func(ctx context.Context) error { return fmt.Errorf("boom") })
+		downstream = flow.Func("downstream", func(ctx context.Context) error {
+			fmt.Println("downstream ran")
+			return nil
+		})
+	)
+
+	w := new(flow.Workflow)
+	w.Add(
+		flow.Step(downstream).DependsOn(ok, boom), // default = AllSucceeded
+	)
+	_ = w.Do(context.Background())
+
+	fmt.Println("downstream:", w.StateOf(downstream).GetStatus())
+	// Output:
+	// downstream: Skipped
+}
+
+// ExampleCondition_anyFailed shows a recovery / cleanup pattern: a Step
+// that runs *because* something upstream failed.
+func ExampleCondition_anyFailed() {
+	var (
+		ok      = flow.Func("ok", func(ctx context.Context) error { return nil })
+		boom    = flow.Func("boom", func(ctx context.Context) error { return fmt.Errorf("boom") })
+		recover = flow.Func("recover", func(ctx context.Context) error {
+			fmt.Println("recover ran")
+			return nil
+		})
+	)
+
+	w := new(flow.Workflow)
+	w.Add(
+		flow.Step(recover).
+			DependsOn(ok, boom).
+			When(flow.AnyFailed),
+	)
+	_ = w.Do(context.Background())
+	// Output:
+	// recover ran
+}
+
+// ExampleCondition_skipFromDo shows how to settle a Step as Skipped from
+// inside `Do` (for example, "this run has nothing to do for these inputs").
+// Wrap the cause with `flow.Skip(err)`. Use `flow.Cancel(err)` similarly to
+// mark a Step as Canceled.
+//
+// Skipped is the polite way to say "I had nothing to do" — it is NOT a
+// failure, and downstreams with the default condition (AllSucceeded) will
+// also Skip rather than Run. Set Workflow.SkipAsError = true if you want
+// Skipped to count as a workflow error.
+func ExampleCondition_skipFromDo() {
+	var nothing = flow.Func("nothing", func(ctx context.Context) error {
+		return flow.Skip(fmt.Errorf("no work for now"))
+	})
+
+	w := new(flow.Workflow)
+	w.Add(flow.Step(nothing))
+
+	err := w.Do(context.Background()) // returns nil; Skipped is not surfaced.
+	fmt.Println("err:", err)
+	fmt.Println("status:", w.StateOf(nothing).GetStatus())
+	// Output:
+	// err: <nil>
+	// status: Skipped
+}
diff --git a/example/06_branching_test.go b/example/06_branching_test.go
new file mode 100644
index 0000000..7073f28
--- /dev/null
+++ b/example/06_branching_test.go
@@ -0,0 +1,103 @@
+package flow_test
+
+import (
+	"context"
+	"fmt"
+
+	flow "github.com/Azure/go-workflow"
+)
+
+// # Branching: If / Switch
+//
+// **What you'll learn**
+//   - `If` and `Switch` are not Steps — they are *control branches* that
+//     evaluate a predicate and decide which downstream Step(s) run.
+//   - The predicate sees the producer Step's typed Output, so the branch
+//     can react to runtime data.
+//
+// **Mental model**
+//
+// `If(producer, predicate).Then(thenStep).Else(elseStep)` adds three Steps
+// to the Workflow: producer, thenStep, and elseStep — wired so that exactly
+// one of (thenStep, elseStep) runs and the other is Skipped.
+//
+//	  producer ── predicate(producer.Output) ──► true  ──► thenStep
+//	                                       └──► false ──► elseStep
+//
+// `Switch(producer)` is the multi-way version: each `.Case(step, predicate)`
+// declares one branch. Unlike a Go `switch`, cases are NOT exclusive:
+// every case whose predicate returns true runs. Make your predicates
+// mutually exclusive if you want only one branch to fire.
+
+// ExampleIf shows a typical "load or create" pattern: load an item from a
+// store, and if it doesn't exist yet, create it.
+func ExampleIf() {
+	var item string
+
+	var (
+		// Producer: returns true if `item` already exists.
+		hasItem = flow.FuncO("HasItem", func(ctx context.Context) (bool, error) {
+			return item != "", nil
+		})
+		create = flow.Func("Create", func(ctx context.Context) error {
+			item = "new"
+			fmt.Println("created")
+			return nil
+		})
+		update = flow.Func("Update", func(ctx context.Context) error {
+			item += " (updated)"
+			fmt.Println("updated")
+			return nil
+		})
+	)
+
+	w := new(flow.Workflow).Add(
+		flow.If(hasItem, func(ctx context.Context, f *flow.Function[struct{}, bool]) (bool, error) {
+			return f.Output, nil
+		}).
+			Then(update). // run if hasItem.Output == true
+			Else(create), // run if hasItem.Output == false
+	)
+
+	_ = w.Do(context.Background()) // first run: item is empty → Create
+	_ = w.Do(context.Background()) // second run: item exists → Update
+	fmt.Println("final:", item)
+	// Output:
+	// created
+	// updated
+	// final: new (updated)
+}
+
+// ExampleSwitch shows a multi-way branch. Cases are NOT exclusive — every
+// predicate that returns true causes its Step to run. Use mutually
+// exclusive predicates (or a chain of `.Case(...)` with disjoint ranges)
+// when you want only one to fire.
+func ExampleSwitch() {
+	getAge := flow.FuncO("GetAge", func(ctx context.Context) (int, error) {
+		return 25, nil
+	})
+
+	var (
+		minor  = flow.Func("Minor", func(ctx context.Context) error { fmt.Println("minor"); return nil })
+		adult  = flow.Func("Adult", func(ctx context.Context) error { fmt.Println("adult"); return nil })
+		senior = flow.Func("Senior", func(ctx context.Context) error { fmt.Println("senior"); return nil })
+	)
+
+	// Mutually exclusive bands: 0–17, 18–64, 65+. Exactly one matches.
+	band := func(min, max int) func(context.Context, *flow.Function[struct{}, int]) (bool, error) {
+		return func(ctx context.Context, f *flow.Function[struct{}, int]) (bool, error) {
+			return f.Output >= min && f.Output <= max, nil
+		}
+	}
+
+	w := new(flow.Workflow).Add(
+		flow.Switch(getAge).
+			Case(minor, band(0, 17)).
+			Case(adult, band(18, 64)).
+			Case(senior, band(65, 200)),
+	)
+
+	_ = w.Do(context.Background())
+	// Output:
+	// adult
+}
diff --git a/example/06_retry_test.go b/example/06_retry_test.go
deleted file mode 100644
index 8934fcf..0000000
--- a/example/06_retry_test.go
+++ /dev/null
@@ -1,60 +0,0 @@
-package flow_test
-
-import (
-	"context"
-	"fmt"
-
-	flow "github.com/Azure/go-workflow"
-)
-
-// # Retry
-//
-// Workflow can retry a Step when it fails, and accept a RetryOption to customize the retry behavior.
-//
-//	// RetryOption customizes retry behavior of a Step in Workflow.
-//	type RetryOption struct {
-//		TimeoutPerTry time.Duration // 0 means no timeout
-//		Attempts      uint64        // 0 means no limit
-//		StopIf        func(ctx context.Context, attempt uint64, since time.Duration, err error) bool
-//		Backoff       backoff.BackOff
-//		Notify        backoff.Notify
-//		Timer         backoff.Timer
-//	}
-
-func ExampleAddSteps_Retry() {
-	var (
-		workflow   = new(flow.Workflow)
-		passAfter2 = &PassAfter{Attempt: 2}
-	)
-
-	workflow.Add(
-		flow.Step(passAfter2).
-			Retry(func(ro *flow.RetryOption) {
-				ro.Attempts = 5 // retry 5 times
-				ro.Timer = new(testTimer)
-			}),
-	)
-
-	_ = workflow.Do(context.TODO())
-	// Output:
-	// failed at attempt 0
-	// failed at attempt 1
-	// succeed at attempt 2
-}
-
-// PassAfter keeps failing until the attempt reaches the given number.
-type PassAfter struct {
-	Attempt int
-	count   int
-}
-
-func (p *PassAfter) Do(ctx context.Context) error {
-	defer func() { p.count++ }()
-	if p.count >= p.Attempt {
-		fmt.Printf("succeed at attempt %d\n", p.count)
-		return nil
-	}
-	err := fmt.Errorf("failed at attempt %d", p.count)
-	fmt.Println(err)
-	return err
-}
diff --git a/example/07_retry_and_timeout_test.go b/example/07_retry_and_timeout_test.go
new file mode 100644
index 0000000..b6c7e23
--- /dev/null
+++ b/example/07_retry_and_timeout_test.go
@@ -0,0 +1,126 @@
+package flow_test
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"sync"
+	"time"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/benbjohnson/clock"
+)
+
+// # Retry & Timeout: surviving flaky work
+//
+// **What you'll learn**
+//   - `Retry` re-runs a Step until it Succeeds or runs out of attempts.
+//   - `Timeout` (per-step) bounds the entire Step including all retries.
+//   - `Retry.TimeoutPerTry` bounds *each individual attempt* — much more
+//     useful than a global step timeout when retries are involved.
+//   - Both timeouts compose:
+//
+//	    ┌────────────── Step Timeout (Timeout) ──────────────┐
+//	    │  ┌── attempt ──┐  ┌── attempt ──┐  ┌── attempt ──┐ │
+//	    │  │ TimeoutPerTry│  │ TimeoutPerTry│  │ TimeoutPerTry│ │
+//	    │  └─────────────┘  └─────────────┘  └─────────────┘ │
+//	    └────────────────────────────────────────────────────┘
+//
+// **Defaults**
+//   - No retry. A failing Step's error is published as-is.
+//   - No timeout (per-step or per-try). The Step runs until ctx is done
+//     (which is what `context.WithTimeout` on the workflow ctx achieves).
+
+// ExampleAddStep_Retry shows the most common form: succeed eventually.
+//
+// `passAfter{n}` fails n times then succeeds. With Attempts = 5 the retry
+// loop has plenty of headroom.
+func ExampleAddStep_Retry() {
+	pa := &passAfter{n: 2}
+
+	w := new(flow.Workflow)
+	w.Add(
+		flow.Step(pa).Retry(func(ro *flow.RetryOption) {
+			ro.Attempts = 5
+			ro.Timer = new(zeroTimer) // suppress real backoff sleep in the example
+		}),
+	)
+	_ = w.Do(context.Background())
+	// Output:
+	// fail #0
+	// fail #1
+	// pass #2
+}
+
+// ExampleAddStep_Retry_perTryTimeout shows TimeoutPerTry. The Step itself
+// runs forever (waits on ctx); each attempt is killed at the per-try
+// deadline; after `Attempts` killings, the Step is finally marked Failed.
+//
+// We use a mock clock so the example is fast and deterministic.
+func ExampleAddStep_Retry_perTryTimeout() {
+	mock := clock.NewMock()
+	w := &flow.Workflow{Clock: mock}
+
+	startedAttempt := make(chan struct{}, 16)
+	hangForever := flow.Func("hang", func(ctx context.Context) error {
+		startedAttempt <- struct{}{} // signal "I'm running"
+		<-ctx.Done()                 // wait until killed by per-try timeout
+		return ctx.Err()
+	})
+
+	w.Add(
+		flow.Step(hangForever).Retry(func(ro *flow.RetryOption) {
+			ro.Attempts = 2
+			ro.TimeoutPerTry = 5 * time.Minute
+			ro.Timer = new(zeroTimer)
+		}),
+	)
+
+	// Run the workflow in the background. As each attempt starts, advance
+	// the mock clock past the per-try deadline so the attempt's ctx fires.
+	var wg sync.WaitGroup
+	wg.Add(1)
+	var workflowErr error
+	go func() {
+		defer wg.Done()
+		workflowErr = w.Do(context.Background())
+	}()
+	go func() {
+		for range startedAttempt {
+			mock.Add(6 * time.Minute) // tick past TimeoutPerTry
+		}
+	}()
+	wg.Wait()
+
+	var ew flow.ErrWorkflow
+	fmt.Println("ErrWorkflow?", errors.As(workflowErr, &ew))
+	fmt.Println("status:", w.StateOf(hangForever).GetStatus())
+	// Output:
+	// ErrWorkflow? true
+	// status: Canceled
+}
+
+// passAfter is a Step that fails n times then succeeds.
+type passAfter struct {
+	n      int
+	tryNum int
+}
+
+func (p *passAfter) String() string { return "passAfter" }
+func (p *passAfter) Do(ctx context.Context) error {
+	defer func() { p.tryNum++ }()
+	if p.tryNum < p.n {
+		fmt.Printf("fail #%d\n", p.tryNum)
+		return fmt.Errorf("transient")
+	}
+	fmt.Printf("pass #%d\n", p.tryNum)
+	return nil
+}
+
+// zeroTimer is a backoff Timer that fires immediately. Use in examples /
+// tests to skip real backoff sleeps.
+type zeroTimer struct{ t *time.Timer }
+
+func (z *zeroTimer) C() <-chan time.Time   { return z.t.C }
+func (z *zeroTimer) Start(d time.Duration) { z.t = time.NewTimer(0) }
+func (z *zeroTimer) Stop()                 { z.t.Stop() }
diff --git a/example/07_timeout_test.go b/example/07_timeout_test.go
deleted file mode 100644
index 2a1e74a..0000000
--- a/example/07_timeout_test.go
+++ /dev/null
@@ -1,103 +0,0 @@
-package flow_test
-
-import (
-	"context"
-	"fmt"
-	"sync"
-	"time"
-
-	flow "github.com/Azure/go-workflow"
-	"github.com/benbjohnson/clock"
-)
-
-// # Step Timeout and Per-Try Timeout
-//
-// Workflow can manages the timeout of each Step in different granularity.
-//
-//	       ┌────────────────Step Timeout──────────────┐
-//	       │                                          │
-//	       │ ┌─────────┐       ┌─────────┐            │
-//	START ─┴─► Step.Do ├─retry┌► Step.Do ├┐retry─►...─┴─► EXIT
-//	         └─────────┘      │┬─────────┼│
-//	                          └───────────┘
-//	                         Per-Try Timeout
-//
-//	workflow.Add(
-//		Step(a).
-//		Timeout(/* Step Timeout */).
-//		Retry(func(ro *flow.RetryOption) {
-//			ro.TimeoutPerTry = /* Per-Try Timeout */
-//		}),
-//	)
-func ExampleAddSteps_Timeout() {
-	var (
-		mock     = clock.NewMock() // use mock clock
-		workflow = &flow.Workflow{Clock: mock}
-		started  = make(chan struct{})
-		waitDone = &WaitDone{StartDo: started}
-	)
-
-	workflow.Add(
-		flow.Steps(waitDone).
-			Timeout(15 * time.Minute).
-			Retry(func(ro *flow.RetryOption) {
-				ro.TimeoutPerTry = 10 * time.Minute
-				ro.Attempts = 2
-				ro.Timer = new(testTimer)
-			}),
-	)
-
-	var err error
-	var wg sync.WaitGroup
-	wg.Add(1)
-	go func() {
-		defer wg.Done()
-		// you can, actually, pass a context with timeout to set a Workflow level timeout
-		ctx, cancel := context.WithTimeout(context.Background(), 1*time.Hour)
-		defer cancel()
-		err = workflow.Do(ctx)
-	}()
-	go func() {
-		for range started {
-			mock.Add(10 * time.Minute) // tick forward 10 minute
-		}
-	}()
-	wg.Wait()
-
-	fmt.Println(err)
-	// Output:
-	// done
-	// done
-	// WaitDone: [Canceled]
-	//	context deadline exceeded
-}
-
-// WaitDone will be pending until the context is done.
-type WaitDone struct {
-	StartDo chan<- struct{} // signal it each time start Do()
-}
-
-func (p *WaitDone) String() string { return "WaitDone" }
-func (p *WaitDone) Do(ctx context.Context) error {
-	p.StartDo <- struct{}{}
-	<-ctx.Done()
-	fmt.Println("done")
-	return ctx.Err()
-}
-
-// testTimer is a Timer that all retry intervals are immediate (0).
-type testTimer struct {
-	timer *time.Timer
-}
-
-func (t *testTimer) C() <-chan time.Time {
-	return t.timer.C
-}
-
-func (t *testTimer) Start(duration time.Duration) {
-	t.timer = time.NewTimer(0)
-}
-
-func (t *testTimer) Stop() {
-	t.timer.Stop()
-}
diff --git a/example/08_workflow_in_workflow_test.go b/example/08_workflow_in_workflow_test.go
new file mode 100644
index 0000000..25ada98
--- /dev/null
+++ b/example/08_workflow_in_workflow_test.go
@@ -0,0 +1,95 @@
+package flow_test
+
+import (
+	"context"
+	"fmt"
+
+	flow "github.com/Azure/go-workflow"
+)
+
+// # Workflow inside a Workflow: composing complex pipelines
+//
+// **What you'll learn**
+//   - `*Workflow` itself satisfies `Steper`, so a Workflow can be used as
+//     a Step inside another Workflow. This is the recommended way to
+//     compose multiple operations into a "compound" Step.
+//   - The naive alternative — a struct whose `Do` calls a fixed sequence
+//     of inner Steps directly — looks simpler but loses observability,
+//     per-inner-step retry, parallelism, and Mock-ability.
+//
+// **Rule of thumb**
+//
+//	Need 1 atomic action?              ─► implement Steper directly.
+//	Need a sequence of N actions?      ─► put them in a sub-Workflow.
+//	Don't roll your own composite by   ─► (see ExampleWorkflow_compositeAntipattern below).
+//	chaining Do() calls inside Do().
+
+// ExampleWorkflow_asStep shows the recommended pattern: build the inner
+// pipeline as its own Workflow, then plug it into the outer Workflow as
+// a Step. Every benefit of go-workflow (interceptors, retry, conditions,
+// MaxConcurrency) applies to the inner Steps too.
+func ExampleWorkflow_asStep() {
+	var (
+		clean   = flow.Func("Clean",   func(ctx context.Context) error { fmt.Println("clean"); return nil })
+		compile = flow.Func("Compile", func(ctx context.Context) error { fmt.Println("compile"); return nil })
+		test    = flow.Func("Test",    func(ctx context.Context) error { fmt.Println("test"); return nil })
+	)
+
+	// Inner workflow: the "build" sub-pipeline.
+	build := new(flow.Workflow).Add(
+		flow.Pipe(clean, compile, test),
+	)
+
+	var (
+		fetch   = flow.Func("Fetch",   func(ctx context.Context) error { fmt.Println("fetch"); return nil })
+		publish = flow.Func("Publish", func(ctx context.Context) error { fmt.Println("publish"); return nil })
+	)
+
+	// Outer workflow: fetch ─► build ─► publish.
+	outer := new(flow.Workflow).Add(
+		flow.Pipe(fetch, build, publish),
+	)
+
+	_ = outer.Do(context.Background())
+	// Output:
+	// fetch
+	// clean
+	// compile
+	// test
+	// publish
+}
+
+// ExampleWorkflow_compositeAntipattern shows what NOT to do, and why.
+// `compositeStep` runs three inner Steps from inside its own Do(). It
+// works — but the Workflow has no idea those inner Steps exist:
+//   - Workflow.MaxConcurrency does NOT apply to inner Steps.
+//   - Per-inner Retry / Timeout / When are not configurable.
+//   - Interceptors only see one outer Step, not the three inner ones.
+//   - flow.Mock can't mock individual inner Steps for tests.
+//
+// Use a sub-Workflow (above) instead.
+func ExampleWorkflow_compositeAntipattern() {
+	w := new(flow.Workflow).Add(
+		flow.Step(&compositeStep{label: "build"}),
+	)
+	_ = w.Do(context.Background())
+	// Output:
+	// build: clean
+	// build: compile
+	// build: test
+}
+
+// compositeStep is the antipattern: a single Step that internally chains
+// several actions. Don't do this for production pipelines — see
+// ExampleWorkflow_asStep above for the right way.
+type compositeStep struct {
+	label string
+}
+
+func (c *compositeStep) String() string { return c.label }
+func (c *compositeStep) Do(ctx context.Context) error {
+	for _, action := range []string{"clean", "compile", "test"} {
+		fmt.Printf("%s: %s\n", c.label, action)
+	}
+	return nil
+}
diff --git a/example/08_workflow_option_test.go b/example/08_workflow_option_test.go
deleted file mode 100644
index 926a53b..0000000
--- a/example/08_workflow_option_test.go
+++ /dev/null
@@ -1,137 +0,0 @@
-package flow_test
-
-import (
-	"context"
-	"fmt"
-	"sync"
-	"sync/atomic"
-	"time"
-
-	flow "github.com/Azure/go-workflow"
-)
-
-// # Workflow Options
-//
-// Workflow provides options that configures its behavior.
-//
-//	type Workflow struct {
-//		MaxConcurrency int  // MaxConcurrency limits the max concurrency of running Steps
-//		DontPanic      bool // DontPanic suppress panics, instead return it as error
-//		OKToSkip       bool // OKToSkip returns nil if all Steps succeeded or skipped, otherwise only return nil if all Steps succeeded
-//	}
-
-func ExampleWorkflow_MaxConcurrency() {
-	var (
-		workflow = &flow.Workflow{
-			MaxConcurrency: 2,
-		}
-
-		counter = new(atomic.Int32)
-		start   = make(chan struct{})
-		done    = make(chan struct{})
-
-		countOneThenWaitDone = func(context.Context) error {
-			counter.Add(1)
-			start <- struct{}{} // signal start
-			<-done
-			return nil
-		}
-
-		a = flow.Func("a", countOneThenWaitDone)
-		b = flow.Func("b", countOneThenWaitDone)
-		c = flow.Func("c", countOneThenWaitDone)
-	)
-
-	workflow.Add(flow.Steps(a, b, c))
-
-	var wg sync.WaitGroup
-	wg.Add(1)
-	go func() {
-		defer wg.Done()
-		_ = workflow.Do(context.TODO())
-	}()
-
-	// should only two Steps are running concurrently
-	<-start
-	<-start
-	// <-start // this will block
-	fmt.Println(counter.Load()) // 2
-
-	// unblock one Step
-	done <- struct{}{}
-	<-start
-	fmt.Println(counter.Load()) // 3
-
-	// unblock all Step
-	close(done)
-
-	// wait the Workflow to finish
-	wg.Wait()
-
-	// Output:
-	// 2
-	// 3
-}
-
-func ExampleWorkflow_DontPanic() {
-	var (
-		workflow = &flow.Workflow{
-			DontPanic: true,
-		}
-
-		panicStep = flow.Func("panic", func(context.Context) error {
-			panic("I'm panicking")
-		})
-	)
-
-	workflow.Add(flow.Step(panicStep))
-
-	fmt.Println(workflow.Do(context.TODO()))
-	// Output:
-	// panic: [Failed]
-	//	I'm panicking
-}
-
-func ExampleWorkflow_SkipAsError() {
-	var (
-		workflow1 = &flow.Workflow{
-			SkipAsError: true,
-		}
-		workflow2 = &flow.Workflow{
-			SkipAsError: false,
-		}
-
-		skipped = flow.Func("skipped", func(context.Context) error {
-			return flow.Skip(fmt.Errorf("skip me"))
-		})
-	)
-
-	workflow1.Add(flow.Step(skipped))
-	workflow2.Add(flow.Step(skipped))
-
-	fmt.Println(workflow1.Do(context.TODO()))
-	fmt.Println(workflow2.Do(context.TODO()))
-	// Output:
-	// skipped: [Skipped]
-	//	skip me
-	//
-	// <nil>
-}
-
-func ExampleWorkflow_DefaultOption() {
-	var (
-		defaultTimeout = 10 * time.Minute
-		workflow       = &flow.Workflow{
-			DefaultOption: &flow.StepOption{
-				Timeout: &defaultTimeout,
-			},
-		}
-		step = flow.NoOp("step")
-	)
-
-	workflow.Add(flow.Step(step))
-	opt := workflow.StateOf(step).Option()
-	fmt.Println(*opt.Timeout)
-	// Output:
-	// 10m0s
-}
diff --git a/example/09_workflow_options_test.go b/example/09_workflow_options_test.go
new file mode 100644
index 0000000..9d3263b
--- /dev/null
+++ b/example/09_workflow_options_test.go
@@ -0,0 +1,85 @@
+package flow_test
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"sync/atomic"
+
+	flow "github.com/Azure/go-workflow"
+)
+
+// # Workflow options: tuning execution
+//
+// **What you'll learn**
+//   - `MaxConcurrency` caps how many Steps run at the same time.
+//   - `DontPanic` recovers panics inside Step bodies and converts them to
+//     errors instead of crashing the program.
+//
+// **Other options (see godoc on `Workflow`)**
+//   - `Clock`         — inject a deterministic clock for testing.
+//   - `DefaultOption` — apply a `StepOption` (timeout, retry, …) to every
+//                       Step. Per-Step options still win.
+//   - `SkipAsError`   — treat `Skipped` Steps as workflow errors.
+
+// ExampleWorkflow_MaxConcurrency caps parallelism. Steps that are eligible
+// to run beyond the cap wait for a slot to free up.
+func ExampleWorkflow_MaxConcurrency() {
+	const cap = 2
+
+	var live atomic.Int32
+	var maxObserved atomic.Int32
+	gate := make(chan struct{}) // released when we observe `cap` running
+
+	work := func(name string) *flow.Function[struct{}, struct{}] {
+		return flow.Func(name, func(ctx context.Context) error {
+			n := live.Add(1)
+			defer live.Add(-1)
+			for {
+				cur := maxObserved.Load()
+				if n <= cur || maxObserved.CompareAndSwap(cur, n) {
+					break
+				}
+			}
+			// First `cap` Steps block until the gate is released; this
+			// proves the limiter actually serializes the rest.
+			if n <= cap {
+				<-gate
+			}
+			return nil
+		})
+	}
+
+	w := &flow.Workflow{MaxConcurrency: cap}
+	w.Add(flow.Steps(work("a"), work("b"), work("c"), work("d"), work("e")))
+
+	go func() {
+		// Wait until we've actually seen `cap` live, then release everyone.
+		for maxObserved.Load() < cap {
+		}
+		close(gate)
+	}()
+
+	_ = w.Do(context.Background())
+	fmt.Println("max concurrent:", maxObserved.Load())
+	// Output:
+	// max concurrent: 2
+}
+
+// ExampleWorkflow_DontPanic enables panic recovery. A panicking Step is
+// reported as a normal `Failed` Step instead of crashing the program.
+func ExampleWorkflow_DontPanic() {
+	w := &flow.Workflow{DontPanic: true}
+	w.Add(
+		flow.Step(flow.Func("oops", func(context.Context) error {
+			panic("boom")
+		})),
+	)
+
+	err := w.Do(context.Background())
+
+	var ew flow.ErrWorkflow
+	fmt.Println("ErrWorkflow?", errors.As(err, &ew))
+	// Output:
+	// ErrWorkflow? true
+}
diff --git a/example/10_observability_test.go b/example/10_observability_test.go
new file mode 100644
index 0000000..3539e68
--- /dev/null
+++ b/example/10_observability_test.go
@@ -0,0 +1,174 @@
+package flow_test
+
+import (
+	"context"
+	"fmt"
+
+	flow "github.com/Azure/go-workflow"
+)
+
+// # Observability: Interceptors
+//
+// **What you'll learn**
+//   - Use `StepInterceptor` to observe every Step (full lifetime, including
+//     retries) — perfect for logs and tracing spans.
+//   - Use `AttemptInterceptor` to observe each individual attempt — perfect
+//     for per-try metrics or transforming an attempt's error.
+//   - Parent → child workflows inherit interceptors automatically; opt out
+//     with `IsolateInterceptors`.
+//
+// **Two layers, deliberately separated**
+//
+//	StepInterceptor    wraps the FULL lifetime of a Step (across all retries).
+//	                   Sees one Begin / End per Step.
+//
+//	AttemptInterceptor wraps a SINGLE attempt (Before → Do → After) inside the
+//	                   retry loop. Sees one call per attempt.
+//
+// Both are middleware: each receives a `next` callback and is free to
+// short-circuit, wrap, or transform around it.
+//
+// **When to reach for which mechanism**
+//
+//	Need to log/trace every Step?               → Interceptor (this file).
+//	Need to react to upstream's terminal status → Condition (05_conditions).
+//	Need behaviour for one specific Step?       → BeforeStep / AfterStep
+//	                                              (04_callbacks).
+//
+// **Caveats**
+//   - Steps settled inline as Skipped/Canceled by their Condition bypass
+//     the interceptor chain. Inspect `StepResult` if you need those.
+
+// ExampleStepInterceptor shows the simplest, most common use: a logger that
+// prints when each Step starts and ends. The interceptor wraps the Step's
+// FULL lifetime, so retries / per-attempt detail are invisible at this layer.
+func ExampleStepInterceptor() {
+	logger := flow.StepInterceptorFunc(func(ctx context.Context, step flow.Steper, next func(context.Context) error) error {
+		fmt.Printf(">>> START %s\n", step)
+		err := next(ctx)
+		fmt.Printf("<<< END   %s (err=%v)\n", step, err)
+		return err
+	})
+
+	var (
+		foo = flow.Func("Foo", func(ctx context.Context) error { fmt.Println("Foo"); return nil })
+		bar = flow.Func("Bar", func(ctx context.Context) error { fmt.Println("Bar"); return nil })
+	)
+	workflow := &flow.Workflow{
+		StepInterceptors: []flow.StepInterceptor{logger},
+	}
+	workflow.Add(
+		flow.Step(foo).DependsOn(bar),
+	)
+
+	_ = workflow.Do(context.Background())
+	// Output:
+	// >>> START Bar
+	// Bar
+	// <<< END   Bar (err=<nil>)
+	// >>> START Foo
+	// Foo
+	// <<< END   Foo (err=<nil>)
+}
+
+// ExampleAttemptInterceptor shows the per-attempt layer. Combined with Retry,
+// the StepInterceptor sees a single Begin/End for the whole Step while the
+// AttemptInterceptor is invoked once per attempt — exactly what you want for
+// per-try metrics, tracing spans, or attempt-scoped error inspection.
+func ExampleAttemptInterceptor() {
+	stepLog := flow.StepInterceptorFunc(func(ctx context.Context, step flow.Steper, next func(context.Context) error) error {
+		fmt.Printf("[step ] begin %s\n", step)
+		err := next(ctx)
+		fmt.Printf("[step ] end   %s (err=%v)\n", step, err)
+		return err
+	})
+	attemptLog := flow.AttemptInterceptorFunc(func(ctx context.Context, step flow.Steper, attempt uint64, next func(context.Context) error) error {
+		err := next(ctx)
+		fmt.Printf("[try=%d] %s err=%v\n", attempt, step, err)
+		return err
+	})
+
+	passAfter2 := &flow.NamedStep{Name: "PassAfter2", Steper: &passAfter{n: 2}}
+	workflow := &flow.Workflow{
+		StepInterceptors:    []flow.StepInterceptor{stepLog},
+		AttemptInterceptors: []flow.AttemptInterceptor{attemptLog},
+	}
+	workflow.Add(
+		flow.Step(passAfter2).
+			Retry(func(ro *flow.RetryOption) {
+				ro.Attempts = 5
+				ro.Timer = new(zeroTimer)
+			}),
+	)
+
+	_ = workflow.Do(context.Background())
+	// Output:
+	// [step ] begin PassAfter2
+	// fail #0
+	// [try=0] PassAfter2 err=transient
+	// fail #1
+	// [try=1] PassAfter2 err=transient
+	// pass #2
+	// [try=2] PassAfter2 err=<nil>
+	// [step ] end   PassAfter2 (err=<nil>)
+}
+
+// ExampleInterceptorReceiver shows that when a Workflow is used as a Step
+// inside another Workflow, the outer Workflow's interceptors automatically
+// wrap every Step in the inner Workflow.
+//
+// The mechanism is the InterceptorReceiver interface: any Step that contains
+// a sub-Workflow (Workflow itself, SubWorkflow) implements
+// PrependInterceptors, and the parent walks the Step tree (via Unwrap) to
+// find a receiver. So you can wrap a sub-Workflow in flow.Name (or any other
+// Steper wrapper) without losing inheritance.
+//
+// To opt out of inheritance and run an inner Workflow with only its own
+// interceptors, set IsolateInterceptors: true on the inner.
+func ExampleInterceptorReceiver() {
+	outerLogger := flow.StepInterceptorFunc(func(ctx context.Context, step flow.Steper, next func(context.Context) error) error {
+		fmt.Printf("[outer] %s\n", step)
+		return next(ctx)
+	})
+
+	// inner has no interceptors of its own — it inherits from outer.
+	inner := new(flow.Workflow)
+	inner.Add(
+		flow.Pipe(
+			flow.Func("inner-B", func(ctx context.Context) error { fmt.Println("inner-B"); return nil }),
+			flow.Func("inner-A", func(ctx context.Context) error { fmt.Println("inner-A"); return nil }),
+		),
+	)
+
+	// isolated has the same shape but opts out of inheritance.
+	isolated := &flow.Workflow{IsolateInterceptors: true}
+	isolated.Add(
+		flow.Step(flow.Func("isolated-X", func(ctx context.Context) error { fmt.Println("isolated-X"); return nil })),
+	)
+
+	outer := &flow.Workflow{
+		StepInterceptors: []flow.StepInterceptor{outerLogger},
+	}
+	// Naming the sub-workflows is purely cosmetic — it just makes the log
+	// readable. flow.Name wraps each sub-workflow in a NamedStep, and the
+	// outer interceptor finds the InterceptorReceiver by walking through
+	// the wrapper via Unwrap.
+	outer.Add(
+		flow.Name(inner, "inner"),
+		flow.Name(isolated, "isolated"),
+		// Wire the dependency on the underlying sub-workflows. Add() merges
+		// configs by step identity, so the wrapped (named) versions and
+		// these dependency edges land on the same step.
+		flow.Step(isolated).DependsOn(inner),
+	)
+
+	_ = outer.Do(context.Background())
+	// Output:
+	// [outer] inner
+	// [outer] inner-B
+	// inner-B
+	// [outer] inner-A
+	// inner-A
+	// [outer] isolated
+	// isolated-X
+}
diff --git a/example/10_update_workflow_test.go b/example/10_update_workflow_test.go
deleted file mode 100644
index 62da55e..0000000
--- a/example/10_update_workflow_test.go
+++ /dev/null
@@ -1,207 +0,0 @@
-package flow_test
-
-import (
-	"context"
-	"fmt"
-
-	flow "github.com/Azure/go-workflow"
-)
-
-// [STOP HERE FOR BASIC USAGE]
-
-// # Adding a Step to Workflow is idempotent
-//
-// After a Workflow is constructed, you can still update the Steps in Workflow.
-//
-// Get the Steps in Workflow:
-//
-//	workflow.Steps()
-//
-// Adding a Step to Workflow is idempotent, so you can add the same Step multiple times,
-// the configurations of the Step will be **merged**.
-//
-// So it's up to you to choose the pattern of declaring Steps.
-//
-//	a. declare a step with all its configurations together.
-//
-//	workflow.Add(
-//		flow.Step(step).
-//			DependsOn(...).
-//			Input(...).
-//			Timeout(...).
-//			Retry(...),
-//	)
-//
-//	b. declare a step multiple times, and each time configure different things.
-//
-//	workflow.Add(
-//		// dependency
-//		flow.Step(step).
-//			DependsOn(...),
-//		// ...
-//		// input
-//		flow.Step(step).
-//			Input(...),
-//	)
-//	// or even in another Add()
-//	workflow.Add(
-//		flow.Step(step).
-//			Timeout(...),
-//	)
-//
-// So it's possible to update the Steps in Workflow, for example, to add a Retry to a Step,
-// get the Steps in Workflow, via `workflow.Steps()`, then `Add()` them back to update the Step.
-//
-//	for _, step := range workflow.Steps() {
-//		workflow.Add(
-//			flow.Steps(step).Retry(...), // update the Step
-//		)
-//	}
-func ExampleWorkflow_Add() {
-	workflow := &flow.Workflow{}
-	{ // scope foo and bar
-		var (
-			foo = &Foo{}
-			bar = &Bar{}
-		)
-		workflow.Add(
-			flow.Step(bar).DependsOn(foo),
-		)
-	}
-
-	// from now on, we've lose reference to foo, bar
-	// but still possible to update them (like add dependency)
-	helloWorld := &SayHello{Who: "World!"}
-	for _, step := range workflow.Steps() {
-		workflow.Add(flow.Step(step).DependsOn(helloWorld))
-	}
-
-	_ = workflow.Do(context.Background())
-	// Output:
-	// Hello World!
-	// Foo
-	// Bar
-}
-
-// # Decorate a Step via "Wrapping"
-//
-// Step implementations can be reusable and composable.
-//
-// For example, you may have a "DecorateStep" that wraps another Step to alter its behavior:
-//
-//	type DecorateStep struct {
-//		BaseStep flow.Steper
-//	}
-//
-//	func (d *DecorateStep) Do(ctx context.Context) error {
-//		// do something before
-//		err := d.BaseStep.Do(ctx)
-//		// do something after
-//	}
-//
-// Here, you may notice we're having following problem:
-//
-// What if add a "DecorateStep" and its "BaseStep" to a Workflow simultaneously?
-// Will `BaseStep.Do` being called twice?
-//
-//	base := &BaseStep{}
-//	decorate := &DecorateStep{BaseStep: base}
-//	workflow.Add(
-//		Steps(base, decorate),
-//	)
-//
-// The answer is NO, Workflow will only call `DecorateStep.Do`, so the `BaseStep.Do` will be called only once.
-// While requiring the step implementation to have `Unwrap() Steper` method.
-//
-//	func (d *DecorateStep) Unwrap() Steper { return d.BaseStep }
-//
-// Maybe this will remind you of Go builtin package `errors`,
-// for detail document, please check `Is` and `As` functions and `StepTree` type.
-//
-// Basically, the principal is
-//
-//	Workflow will only orchestrate the top-level Steps, and leave them to manage their inner Steps.
-//
-// Top-level Steps are the Steps that no other Steps wrap them.
-func ExampleWrapStep() {
-	var (
-		w    = new(flow.Workflow)
-		foo  = new(Foo)
-		bar  = new(Bar)
-		wbar = &WrapStep{bar}
-	)
-	w.Add(
-		flow.Step(bar).DependsOn(foo),
-		flow.Step(wbar), // wrap bar
-	)
-	_ = w.Do(context.Background())
-	// Output:
-	// Foo
-	// WRAP: BEFORE
-	// Bar
-	// WRAP: AFTER
-}
-
-type WrapStep struct{ flow.Steper }
-
-func (w *WrapStep) Unwrap() flow.Steper { return w.Steper }
-func (w *WrapStep) Do(ctx context.Context) error {
-	fmt.Println("WRAP: BEFORE")
-	err := w.Steper.Do(ctx)
-	fmt.Println("WRAP: AFTER")
-	return err
-}
-
-// Since Go1.21, errors package support unwraping multiple errors, `flow` also supports this feature.
-type MultiWrapStep struct{ Steps []flow.Steper }
-
-func (m *MultiWrapStep) Unwrap() []flow.Steper { return m.Steps }
-func (m *MultiWrapStep) Do(ctx context.Context) error {
-	fmt.Println("MULTI: BEFORE")
-	defer fmt.Println("MULTI: AFTER")
-	for i, step := range m.Steps {
-		fmt.Printf("MULTI: STEP %d\n", i)
-		if err := step.Do(ctx); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func ExampleMultiWrapStep() {
-	fooBar := &MultiWrapStep{
-		Steps: []flow.Steper{
-			new(Foo),
-			new(Bar),
-		},
-	}
-	fmt.Println(flow.Has[*Foo](fooBar)) // true
-	fmt.Println(flow.Has[*Bar](fooBar)) // true
-
-	// actually Workflow itself also implements `Unwrap() []Steper` method
-	workflow := new(flow.Workflow).
-		Add(
-			flow.Step(fooBar).
-				DependsOn(new(SayHello)),
-		)
-
-	// use As to unwrap specific type from Step
-	for _, sayHello := range flow.As[*SayHello](workflow) {
-		workflow.Add(flow.Step(sayHello).Input(func(ctx context.Context, sh *SayHello) error {
-			sh.Who = "you can unwrap step!"
-			return nil
-		}))
-	}
-
-	_ = workflow.Do(context.Background())
-	// Output:
-	// true
-	// true
-	// Hello you can unwrap step!
-	// MULTI: BEFORE
-	// MULTI: STEP 0
-	// Foo
-	// MULTI: STEP 1
-	// Bar
-	// MULTI: AFTER
-}
diff --git a/example/11_debugging_test.go b/example/11_debugging_test.go
new file mode 100644
index 0000000..155c01f
--- /dev/null
+++ b/example/11_debugging_test.go
@@ -0,0 +1,89 @@
+package flow_test
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"sort"
+
+	flow "github.com/Azure/go-workflow"
+)
+
+// # Debugging: figuring out what failed and why
+//
+// **What you'll learn**
+//   - `Workflow.Do` returns `flow.ErrWorkflow` — a `map[Steper]StepResult`
+//     keyed by failed Step. Iterate to print them all, or use `errors.As`.
+//   - Use `Workflow.StateOf(step).GetStatus()` to inspect any Step's
+//     terminal status post-run.
+//   - For per-Step structured logging, prefer an interceptor (10_observability)
+//     over `AfterStep` so the same logger applies to every Step.
+//
+// **The two questions you'll typically ask**
+//
+//	1. Which Steps failed?  ─► iterate ErrWorkflow.
+//	2. What status did Step X end in?  ─► Workflow.StateOf(X).GetStatus().
+
+// ExampleErrWorkflow shows how to inspect the Workflow's error after Do.
+// Several Steps fail in different ways; iterating ErrWorkflow gives you
+// the per-Step error breakdown.
+//
+// Note: ErrWorkflow contains an entry for *every* Step in the Workflow,
+// including the ones that succeeded. Filter on `result.Err != nil` (or
+// `result.Status != flow.Succeeded`) to focus on the failures.
+func ExampleErrWorkflow() {
+	w := new(flow.Workflow)
+	w.Add(
+		flow.Step(flow.Func("a", func(context.Context) error { return errors.New("disk full") })),
+		flow.Step(flow.Func("b", func(context.Context) error { return errors.New("403 forbidden") })),
+		flow.Step(flow.Func("c", func(context.Context) error { return nil })), // succeeds
+	)
+
+	err := w.Do(context.Background())
+
+	var ew flow.ErrWorkflow
+	if errors.As(err, &ew) {
+		// Sort keys so the godoc output is deterministic.
+		var names []string
+		byName := map[string]flow.StepResult{}
+		for step, result := range ew {
+			if result.Err == nil {
+				continue
+			}
+			name := fmt.Sprint(step)
+			names = append(names, name)
+			byName[name] = result
+		}
+		sort.Strings(names)
+		for _, name := range names {
+			fmt.Printf("%s: %s — %v\n", name, byName[name].Status, byName[name].Err)
+		}
+	}
+	// Output:
+	// a: Failed — disk full
+	// b: Failed — 403 forbidden
+}
+
+// ExampleWorkflow_StateOf shows how to inspect any individual Step's state
+// after the Workflow has run, without going through the error.
+func ExampleWorkflow_StateOf() {
+	var (
+		ok      = flow.Func("ok",      func(context.Context) error { return nil })
+		boom    = flow.Func("boom",    func(context.Context) error { return errors.New("boom") })
+		downstream = flow.Func("downstream", func(context.Context) error { return nil })
+	)
+
+	w := new(flow.Workflow)
+	w.Add(
+		flow.Step(downstream).DependsOn(ok, boom), // default condition: skipped because boom failed
+	)
+	_ = w.Do(context.Background())
+
+	for _, step := range []flow.Steper{ok, boom, downstream} {
+		fmt.Printf("%s: %s\n", step, w.StateOf(step).GetStatus())
+	}
+	// Output:
+	// ok: Succeeded
+	// boom: Failed
+	// downstream: Skipped
+}
diff --git a/example/11_workflow_in_workflow_test.go b/example/11_workflow_in_workflow_test.go
deleted file mode 100644
index 3f628b2..0000000
--- a/example/11_workflow_in_workflow_test.go
+++ /dev/null
@@ -1,47 +0,0 @@
-package flow_test
-
-import (
-	"context"
-	"fmt"
-
-	flow "github.com/Azure/go-workflow"
-)
-
-// # Workflow in Workflow
-//
-// Maybe you've already noticed that, Workflow also implements Steper interface.
-//
-//	func (w *Workflow) Do(ctx context.Context) error
-//
-// Which means, you can actually put a Workflow into another Workflow as a Step!
-//
-// We encourage you to use this feature to build complex workflows.
-func ExampleWorkflow_Do() {
-	var (
-		foo   = new(Foo)
-		bar   = new(Bar)
-		inner = new(flow.Workflow).Add(
-			flow.Step(bar).DependsOn(foo),
-		)
-
-		before = Print("Before")
-		after  = Print("After")
-		outer  = new(flow.Workflow).Add(
-			flow.Pipe(before, inner, after),
-		)
-	)
-
-	_ = outer.Do(context.Background())
-	// Output:
-	// Before
-	// Foo
-	// Bar
-	// After
-}
-
-type Print string
-
-func (p Print) Do(ctx context.Context) error {
-	fmt.Println(p)
-	return nil
-}
diff --git a/example/12_debug_step_test.go b/example/12_debug_step_test.go
deleted file mode 100644
index 0a804c4..0000000
--- a/example/12_debug_step_test.go
+++ /dev/null
@@ -1,53 +0,0 @@
-package flow_test
-
-import (
-	"context"
-	"fmt"
-
-	flow "github.com/Azure/go-workflow"
-)
-
-// # How to debug a failed Step?
-//
-// A debug callback can be executed only when the target Steps are failed.
-//
-// If the debug step needs the result of the upstream steps, it can be achieved by hacking When.
-type DebugStep struct {
-	Upstreams map[flow.Steper]flow.StepResult
-}
-
-func (d *DebugStep) When(ctx context.Context, ups map[flow.Steper]flow.StepResult) flow.StepStatus {
-	// save the upstreams for debug
-	d.Upstreams = ups
-	return flow.AnyFailed(ctx, ups)
-}
-func (d *DebugStep) Do(ctx context.Context) error {
-	for up, statusErr := range d.Upstreams {
-		switch {
-		case flow.Has[*FailedStep](up):
-			// handle the error
-			fmt.Printf("[%s] %s", statusErr.Status, statusErr.Unwrap())
-		}
-	}
-	return nil
-}
-
-func ExampleDebugStep() {
-	var (
-		debug    = new(DebugStep)
-		failed   = new(FailedStep)
-		workflow = new(flow.Workflow).Add(
-			flow.Step(failed),
-		)
-	)
-	// register the debug step
-	workflow.Add(
-		flow.Step(debug).
-			DependsOn(failed).
-			When(debug.When),
-	)
-
-	_ = workflow.Do(context.Background())
-	// Output:
-	// [Failed] failed!
-}
diff --git a/example/12_testing_workflows_test.go b/example/12_testing_workflows_test.go
new file mode 100644
index 0000000..4084967
--- /dev/null
+++ b/example/12_testing_workflows_test.go
@@ -0,0 +1,86 @@
+package flow_test
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"testing"
+
+	flow "github.com/Azure/go-workflow"
+)
+
+// # Testing workflows
+//
+// **What you'll learn**
+//   - Use `flow.Mock(step, fn)` to swap a Step's `Do` for a test double.
+//     The original Step's identity, name, and config are preserved — only
+//     `Do` is replaced. Pointer-based lookups (`As[T]` / `HasStep`) still
+//     find it.
+//   - You can mock Steps in a Workflow that was built by production code:
+//     just hand the Workflow to your test, then call `Add(flow.Mock(...))`
+//     to swap behaviours before running.
+//
+// **When to reach for what**
+//
+//	You wrote the Workflow in your test           ─► substitute Steps directly,
+//	                                                 no need for Mock.
+//	Production code built the Workflow            ─► flow.Mock to swap one Step.
+//	You want to assert on per-Step error/status   ─► see 11_debugging.
+//	You want to assert on Begin/End ordering      ─► add a StepInterceptor in
+//	                                                 the test (10_observability).
+
+// ExampleMock shows the typical use: a production workflow assembled
+// elsewhere, with one Step substituted in the test.
+func ExampleMock() {
+	// Pretend this Workflow comes from production code we don't control.
+	w := buildPipeline()
+
+	// In our test we don't actually want to call the real `publish`. Swap it.
+	w.Add(
+		flow.Mock(publishStep, func(ctx context.Context) error {
+			fmt.Println("(mocked publish)")
+			return nil
+		}),
+	)
+
+	_ = w.Do(context.Background())
+	// Output:
+	// build
+	// (mocked publish)
+}
+
+// publishStep is the production Step we'll mock in the example.
+var publishStep = flow.Func("publish", func(ctx context.Context) error {
+	// Pretend this hits a real registry — we don't want it to run in tests.
+	return errors.New("real publish hit; should have been mocked")
+})
+
+func buildPipeline() *flow.Workflow {
+	build := flow.Func("build", func(ctx context.Context) error {
+		fmt.Println("build")
+		return nil
+	})
+	w := new(flow.Workflow)
+	w.Add(flow.Pipe(build, publishStep))
+	return w
+}
+
+// TestMyPipeline_unitTest demonstrates the same idea inside a real `go test`
+// function (rather than as a godoc Example). This is what your CI test
+// would look like; the godoc Example above just exists to show the pattern.
+func TestMyPipeline_unitTest(t *testing.T) {
+	called := false
+	w := buildPipeline()
+	w.Add(
+		flow.Mock(publishStep, func(ctx context.Context) error {
+			called = true
+			return nil
+		}),
+	)
+	if err := w.Do(context.Background()); err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !called {
+		t.Fatal("publish mock was not invoked")
+	}
+}
diff --git a/example/13_composite_step_test.go b/example/13_composite_step_test.go
deleted file mode 100644
index 823e490..0000000
--- a/example/13_composite_step_test.go
+++ /dev/null
@@ -1,109 +0,0 @@
-package flow_test
-
-import (
-	"context"
-	"fmt"
-
-	flow "github.com/Azure/go-workflow"
-)
-
-// # Composite Step
-//
-// Writing a Step with only a few operations is easy,
-// but writing a Step that contains multiple and complex operations is challenging.
-//
-// We can reuse and compose simple Steps to form a composite Step.
-//
-// However, composite step still has few drawbacks:
-//	- it's not unit-test-able
-//	- the inner steps are invisible to the workflow if composite step not implement Unwrap() method
-//	- only one error returned from Do(), lose detailed inner step error
-//	- when add input callbacks to the inner steps, the callbacks will be called before the composite step's Do()
-//
-// Thus, we recommend to use Workflow-in-Workflow to build a composite step.
-
-type Bootstrap struct{}
-type Cleanup struct{}
-type SimpleStep struct{ Value string }
-type CompositeStep struct {
-	Bootstrap
-	SimpleStep
-	Cleanup
-}
-
-func (b *Bootstrap) Do(ctx context.Context) error {
-	fmt.Println("Bootstrap")
-	return nil
-}
-func (c *Cleanup) Do(ctx context.Context) error {
-	fmt.Println("Cleanup")
-	return nil
-}
-func (s *SimpleStep) Do(ctx context.Context) error {
-	fmt.Printf("SimpleStep: %s\n", s.Value)
-	return fmt.Errorf("SimpleStep Failed!")
-}
-func (c *CompositeStep) String() string { return "CompositeStep" }
-func (c *CompositeStep) Unwrap() []flow.Steper {
-	return []flow.Steper{&c.Bootstrap, &c.SimpleStep, &c.Cleanup}
-}
-func (c *CompositeStep) Do(ctx context.Context) error {
-	if err := c.Bootstrap.Do(ctx); err != nil {
-		return err
-	}
-	defer c.Cleanup.Do(ctx)
-	return c.SimpleStep.Do(ctx)
-}
-
-func ExampleCompositeStep() {
-	workflow := new(flow.Workflow)
-	workflow.Add(
-		flow.Step(new(CompositeStep)).
-			Input(func(ctx context.Context, cs *CompositeStep) error {
-				cs.SimpleStep.Value = "Action!"
-				return nil
-			}),
-	)
-	err := workflow.Do(context.Background())
-	fmt.Println(err)
-	// Output:
-	// Bootstrap
-	// SimpleStep: Action!
-	// Cleanup
-	// CompositeStep: [Failed]
-	// 	SimpleStep Failed!
-}
-
-func ExampleCompositeViaWorkflow() {
-	var (
-		composite = &CompositeViaWorkflow{SimpleStep: SimpleStep{
-			Value: "Action!",
-		}}
-		w = new(flow.Workflow).Add(
-			flow.Step(composite),
-		)
-	)
-	_ = w.Do(context.Background())
-	// Output:
-	// Bootstrap
-	// SimpleStep: Action!
-}
-
-type CompositeViaWorkflow struct {
-	SimpleStep
-	w *flow.Workflow
-}
-
-func (c *CompositeViaWorkflow) Unwrap() flow.Steper          { return c.w }
-func (c *CompositeViaWorkflow) Do(ctx context.Context) error { return c.w.Do(ctx) }
-func (c *CompositeViaWorkflow) BuildStep() {
-	c.w = &flow.Workflow{}
-	var (
-		bootstrap = new(Bootstrap)
-		cleanup   = new(Cleanup)
-		simple    = &c.SimpleStep
-	)
-	c.w.Add(
-		flow.Pipe(bootstrap, simple, cleanup),
-	)
-}
diff --git a/example/14_mock_step_test.go b/example/14_mock_step_test.go
deleted file mode 100644
index 363f677..0000000
--- a/example/14_mock_step_test.go
+++ /dev/null
@@ -1,32 +0,0 @@
-package flow_test
-
-import (
-	"context"
-	"fmt"
-
-	flow "github.com/Azure/go-workflow"
-)
-
-// # Mock Step in Workflow for unit-test
-//
-// When writing unit tests for a composite Step or a Workflow, it's often necessary to mock the behavior of the inner Steps.
-//
-// We can MockStep by wrapping the original Step and override the Do() method.
-func ExampleMockStep() {
-	var (
-		foo = new(Foo)
-		bar = new(Bar)
-		w   = new(flow.Workflow)
-	)
-	w.Add(
-		flow.Step(bar).DependsOn(foo),
-		flow.Mock(foo, func(ctx context.Context) error {
-			fmt.Println("MockFoo")
-			return nil
-		}),
-	)
-	_ = w.Do(context.Background())
-	// Output:
-	// MockFoo
-	// Bar
-}
diff --git a/example/README.md b/example/README.md
new file mode 100644
index 0000000..a8f7516
--- /dev/null
+++ b/example/README.md
@@ -0,0 +1,70 @@
+# Examples
+
+This directory is the **`go-workflow` learning path** in code form. Each
+file is a runnable [Go example test](https://pkg.go.dev/testing#hdr-Examples)
+focused on one question. Read top to bottom on a first pass; jump around
+once you know what you need.
+
+`go test ./example/...` runs everything and verifies the output blocks
+stay in sync with the library.
+
+## Path
+
+### Get the mental model (read first)
+
+| File                            | What you'll learn |
+|---------------------------------|---|
+| [01_quickstart](01_quickstart_test.go)             | Any struct with a `Do` method is a Step. End-to-end 3-minute tour: parallel fetch + merge into a profile, with data flowing through `Input` callbacks. |
+| [02_steps_and_deps](02_steps_and_deps_test.go)     | `Step` / `Steps` / `DependsOn` / `Pipe` / `BatchPipe` / `Name`. |
+
+### Move data through the graph
+
+| File                            | What you'll learn |
+|---------------------------------|---|
+| [03_data_flow](03_data_flow_test.go)               | The standard `Input` callback pattern (with your structs and with `Func`/`FuncIO`/`FuncI`/`FuncO`). |
+| [04_callbacks](04_callbacks_test.go)               | `BeforeStep` / `AfterStep` and how they relate to `Input`. |
+
+### Decide what runs (and what doesn't)
+
+| File                            | What you'll learn |
+|---------------------------------|---|
+| [05_conditions](05_conditions_test.go)             | `Condition`, `When`, `flow.Skip` / `flow.Cancel` from inside `Do`. |
+| [06_branching](06_branching_test.go)               | `If` / `Switch` for runtime-data-driven branches. |
+| [07_retry_and_timeout](07_retry_and_timeout_test.go) | `Retry`, per-attempt timeout, step timeout, deterministic-clock testing. |
+
+### Build bigger workflows
+
+| File                            | What you'll learn |
+|---------------------------------|---|
+| [08_workflow_in_workflow](08_workflow_in_workflow_test.go) | Use a `*Workflow` as a Step. Why a "composite Step" struct is an antipattern. |
+| [09_workflow_options](09_workflow_options_test.go)         | `MaxConcurrency`, `DontPanic`. |
+
+### Operate, debug, test
+
+| File                            | What you'll learn |
+|---------------------------------|---|
+| [10_observability](10_observability_test.go)       | `StepInterceptor` / `AttemptInterceptor` for cross-cutting logging, tracing, metrics. |
+| [11_debugging](11_debugging_test.go)               | `ErrWorkflow` and `Workflow.StateOf` for post-run inspection. |
+| [12_testing_workflows](12_testing_workflows_test.go) | `flow.Mock` to substitute Step behaviour in tests. |
+
+## Conventions used in these examples
+
+- **Production Steps are your own structs.** Anything with a
+  `Do(context.Context) error` method satisfies `flow.Steper`. The first
+  four files (01–04) all use plain structs to make this concrete.
+- **`flow.Func` / `FuncIO` / `FuncI` / `FuncO`** show up from 05 onward
+  for inline scaffolding when the focus of an example is something other
+  than the Step body itself (a wiring shape, a Condition, a retry policy,
+  etc.). They are convenience helpers — not the primary way to define a Step.
+- **Sorted output** when a Step inspects map iteration (which is unordered)
+  so the `// Output:` block stays stable.
+- **`zeroTimer` / `clock.Mock`** in `07_retry_and_timeout` so retry / timeout
+  examples don't actually sleep.
+
+## Where to look beyond this directory
+
+- The `Workflow`, `Step`, `Steps`, `Pipe`, `Retry`, `If`, `Switch` etc.
+  godoc on [pkg.go.dev](https://pkg.go.dev/github.com/Azure/go-workflow)
+  has the full API surface and many small inline examples.
+- `openspec/specs/` contains the formal behaviour specs that these
+  examples exercise.
diff --git a/func.go b/func.go
index 9a95691..4c6597d 100644
--- a/func.go
+++ b/func.go
@@ -4,28 +4,48 @@ import (
 	"context"
 )
 
-// Func constructs a Step from an arbitrary function
+// Func adapts a `func(ctx) error` into a Step (no input, no output) named
+// `name`. Convenient for one-off inline steps that don't deserve their own
+// type.
+//
+//	w.Add(flow.Step(flow.Func("greet", func(ctx context.Context) error {
+//	    fmt.Println("hello")
+//	    return nil
+//	})))
 func Func(name string, do func(context.Context) error) *Function[struct{}, struct{}] {
 	return FuncIO(name, func(ctx context.Context, _ struct{}) (struct{}, error) {
 		return struct{}{}, do(ctx)
 	})
 }
+
+// FuncIO adapts a `func(ctx, In) (Out, error)` into a Step. The Input field
+// is fed in (so you can populate it via Step(...).Input(...)) and the
+// returned Out is stored on the Output field (so you can read it via
+// Step(...).Output(...)).
 func FuncIO[I, O any](name string, do func(context.Context, I) (O, error)) *Function[I, O] {
 	f := &Function[I, O]{Name: name, DoFunc: do}
 	return f
 }
+
+// FuncI is FuncIO for an input-only function (no output).
 func FuncI[I any](name string, do func(context.Context, I) error) *Function[I, struct{}] {
 	return FuncIO(name, func(ctx context.Context, i I) (struct{}, error) {
 		return struct{}{}, do(ctx, i)
 	})
 }
+
+// FuncO is FuncIO for an output-only function (no input).
 func FuncO[O any](name string, do func(context.Context) (O, error)) *Function[struct{}, O] {
 	return FuncIO(name, func(ctx context.Context, _ struct{}) (O, error) {
 		return do(ctx)
 	})
 }
 
-// Function wraps an arbitrary function as a Step.
+// Function is the Step implementation produced by Func / FuncIO / FuncI /
+// FuncO. Input is supplied by the caller (typically via Step(f).Input(...)),
+// passed into DoFunc on each attempt, and the return value is stashed in
+// Output (so Step(f).Output(...) can pick it up). String() returns Name, so
+// Function shows up nicely in logs and ErrWorkflow messages.
 type Function[I, O any] struct {
 	Name   string
 	Input  I
diff --git a/interceptor.go b/interceptor.go
index 5be4dce..9e2e6b1 100644
--- a/interceptor.go
+++ b/interceptor.go
@@ -2,36 +2,77 @@ package flow
 
 import "context"
 
-// StepInterceptor intercepts the full lifecycle of a step (all retry attempts).
-// Skipped and Canceled steps do not enter the interceptor chain.
+// StepInterceptor wraps the FULL lifetime of a step (from the first attempt
+// up to and including the last retry). The chain is built once per step run
+// in stepExecution.run, with the lowest-index interceptor on the outside.
+// `next` invokes the next interceptor in the chain — eventually calling into
+// executeWithRetry, which itself loops over attempts and per-attempt
+// interceptors.
+//
+// Important: steps that are settled inline (Skipped or Canceled by their
+// Condition) bypass the interceptor chain entirely. If you need observability
+// for those terminal states, watch the StepResult instead.
 type StepInterceptor interface {
 	InterceptStep(ctx context.Context, step Steper, next func(context.Context) error) error
 }
 
-// AttemptInterceptor intercepts each individual attempt (Before → Do → After).
-// The error returned by next (if any) is the attempt's failure — it is available
-// for inspection before being returned.
+// AttemptInterceptor wraps a SINGLE attempt (Before → Do → After) inside the
+// retry loop. It receives the 0-based attempt index, and the error returned
+// by `next` is the error of THAT attempt — so you can inspect, transform or
+// suppress it before it propagates up to the retry policy.
 type AttemptInterceptor interface {
 	InterceptAttempt(ctx context.Context, step Steper, attempt uint64, next func(context.Context) error) error
 }
 
-// StepInterceptorFunc is a function adapter for StepInterceptor.
+// StepInterceptorFunc adapts a plain function to the StepInterceptor
+// interface — same shape as http.HandlerFunc.
 type StepInterceptorFunc func(ctx context.Context, step Steper, next func(context.Context) error) error
 
 func (f StepInterceptorFunc) InterceptStep(ctx context.Context, step Steper, next func(context.Context) error) error {
 	return f(ctx, step, next)
 }
 
-// AttemptInterceptorFunc is a function adapter for AttemptInterceptor.
+// AttemptInterceptorFunc adapts a plain function to the AttemptInterceptor interface.
 type AttemptInterceptorFunc func(ctx context.Context, step Steper, attempt uint64, next func(context.Context) error) error
 
 func (f AttemptInterceptorFunc) InterceptAttempt(ctx context.Context, step Steper, attempt uint64, next func(context.Context) error) error {
 	return f(ctx, step, attempt, next)
 }
 
-// InterceptorReceiver is implemented by steps that contain a sub-workflow.
-// stepExecution calls PrependInterceptors once (in executeWithRetry, before the retry loop)
-// so that parent interceptors wrap child interceptors for the entire step lifetime.
+// InterceptorReceiver is implemented by any Step that contains a sub-workflow
+// (notably *Workflow itself and SubWorkflow). The parent's stepExecution
+// calls PrependInterceptors ONCE — in executeWithRetry, just before the
+// retry loop — so the parent's interceptor chain wraps the child's
+// interceptor chain for the duration of the step.
+//
+// Implementations should be careful not to mutate the user-supplied base
+// chain or accumulate inherited entries across runs (see Workflow's
+// `inheritedStep` / `inheritedAttempt` design).
+//
+// The parent looks for an InterceptorReceiver by walking the Step tree via
+// Unwrap (using the same protocol as As[T] / Has[T]). This means inheritance
+// keeps working when the sub-workflow is wrapped in a Name / NamedStep / any
+// other Steper wrapper that exposes Unwrap. The first receiver found in
+// pre-order is used.
 type InterceptorReceiver interface {
 	PrependInterceptors(step []StepInterceptor, attempt []AttemptInterceptor)
 }
+
+// findInterceptorReceiver returns the first InterceptorReceiver in the Step
+// tree rooted at s, walking via Unwrap in pre-order. Returns nil if none of
+// the unwrapped Steps satisfies InterceptorReceiver.
+//
+// This lets a sub-workflow be wrapped in a Steper-only wrapper (e.g.
+// NamedStep, which embeds the Steper interface and therefore does not
+// promote PrependInterceptors) without losing parent-interceptor inheritance.
+func findInterceptorReceiver(s Steper) InterceptorReceiver {
+	var found InterceptorReceiver
+	Traverse(s, func(s Steper, _ []Steper) TraverseDecision {
+		if r, ok := s.(InterceptorReceiver); ok {
+			found = r
+			return TraverseStop
+		}
+		return TraverseContinue
+	})
+	return found
+}
diff --git a/mock.go b/mock.go
index cbe204b..855d48e 100644
--- a/mock.go
+++ b/mock.go
@@ -2,17 +2,27 @@ package flow
 
 import "context"
 
-// Mock helps to mock a step in Workflow.
+// Mock returns a Builder that swaps in a fake Do for an existing step. The
+// original step value is kept (so identity-based lookups via As[T] / HasStep
+// still find it), but its Do is replaced by your function.
 //
 //	w.Add(
-//		flow.Mock(step, func(ctx context.Context) error {}),
+//	    flow.Mock(realStep, func(ctx context.Context) error {
+//	        // pretend behaviour for tests
+//	        return nil
+//	    }),
 //	)
+//
+// Mock is most useful in tests after the workflow is already wired (possibly
+// by production code), when you want to substitute behaviour without
+// rebuilding the graph.
 func Mock[T Steper](step T, do func(context.Context) error) Builder {
 	return Step(&MockStep{Step: step, MockDo: do})
 }
 
-// MockStep helps to mock a step.
-// After building a workflow, you can mock the original step with a mock step.
+// MockStep is the wrapper produced by Mock. It exposes the original Step via
+// Unwrap (so utilities like As[T] / HasStep / String still see through it)
+// while delegating Do to MockDo.
 type MockStep struct {
 	Step   Steper
 	MockDo func(context.Context) error
diff --git a/name.go b/name.go
index 57066b0..9bec81e 100644
--- a/name.go
+++ b/name.go
@@ -2,25 +2,28 @@ package flow
 
 import "fmt"
 
-// Name can rename a Step.
+// Name attaches a human-readable display name to a Step by wrapping it in a
+// NamedStep. The returned Builder can be passed to Workflow.Add directly:
 //
 //	workflow.Add(
-//		Step(a),
-//		Name(a, "StepA"),
+//	    Step(a),
+//	    Name(a, "StepA"), // a will now log/print as "StepA"
 //	)
 //
-// Attention: Name will wrap the original Step
+// Note: Name produces a wrapper Step. The original Step is reachable via
+// Unwrap() (so As[T] / HasStep / interceptor inheritance still see through
+// the wrapper), but the wrapper itself becomes the value the Workflow tracks.
 func Name(step Steper, name string) Builder {
 	return Step(&NamedStep{Name: name, Steper: step})
 }
 
-// Names can rename multiple Steps.
+// Names attaches display names to many steps at once.
 //
 //	workflow.Add(
-//		Names(map[Steper]string{
-//			stepA: "A",
-//			stepB: "B",
-//		},
+//	    Names(map[Steper]string{
+//	        stepA: "A",
+//	        stepB: "B",
+//	    }),
 //	)
 func Names(m map[Steper]string) Builder {
 	as := AddSteps{}
@@ -30,18 +33,19 @@ func Names(m map[Steper]string) Builder {
 	return as
 }
 
-// NameFunc can rename a Step with a runtime function.
+// NameFunc is like Name but the display name is computed every time it is
+// requested — useful when the name depends on runtime data.
 func NameFunc(step Steper, fn func() string) Builder {
 	return NameStringer(step, stringer(fn))
 }
 
-// NameStringer can rename a Step with a fmt.Stringer,
-// which allows String() method to be called at runtime.
+// NameStringer is like NameFunc but takes any fmt.Stringer as the name source.
 func NameStringer(step Steper, name fmt.Stringer) Builder {
 	return Step(&StringerNamedStep{Name: name, Steper: step})
 }
 
-// NamedStep is a wrapper of Steper, it gives your step a name by overriding String() method.
+// NamedStep wraps a Steper and overrides its String() method with a fixed
+// name. It preserves Steper identity through Unwrap.
 type NamedStep struct {
 	Name string
 	Steper
@@ -50,10 +54,13 @@ type NamedStep struct {
 func (ns *NamedStep) String() string { return ns.Name }
 func (ns *NamedStep) Unwrap() Steper { return ns.Steper }
 
+// stringer adapts a `func() string` to fmt.Stringer for NameFunc.
 type stringer func() string
 
 func (s stringer) String() string { return s() }
 
+// StringerNamedStep wraps a Steper and overrides its String() with a
+// dynamic, runtime-evaluated name supplied by a fmt.Stringer.
 type StringerNamedStep struct {
 	Name fmt.Stringer
 	Steper
diff --git a/noop.go b/noop.go
index 43d956c..ffb07b1 100644
--- a/noop.go
+++ b/noop.go
@@ -4,9 +4,16 @@ import (
 	"context"
 )
 
+// NoOpStep is a Step whose Do is a deliberate no-op. Useful as a synthetic
+// "join point" — give multiple branches a single downstream to depend on
+// without doing any real work.
 type NoOpStep struct{ Name string }
 
-// NoOp constructs a step doing nothing.
-func NoOp(name string) *NoOpStep           { return &NoOpStep{Name: name} }
+// NoOp builds a NoOpStep with the given display name.
+//
+//	join := flow.NoOp("merge")
+//	w.Add(flow.Steps(join).DependsOn(branchA, branchB, branchC))
+func NoOp(name string) *NoOpStep { return &NoOpStep{Name: name} }
+
 func (n *NoOpStep) String() string         { return n.Name }
 func (*NoOpStep) Do(context.Context) error { return nil }
diff --git a/openspec/specs/step-interceptor/spec.md b/openspec/specs/step-interceptor/spec.md
index e061ef2..5227148 100644
--- a/openspec/specs/step-interceptor/spec.md
+++ b/openspec/specs/step-interceptor/spec.md
@@ -135,6 +135,12 @@ type InterceptorReceiver interface {
 }
 ```
 
+`stepExecution` locates the `InterceptorReceiver` for a step by walking the Step tree via
+`Unwrap` (the same protocol used by `As[T]` / `Has[T]`) and selecting the first receiver
+found in pre-order. This means a sub-workflow MAY be wrapped in any Steper-only wrapper
+(notably `flow.Name` / `NamedStep`, whose embedded `Steper` interface does not promote
+`PrependInterceptors`) without losing inheritance.
+
 `stepExecution` calls `PrependInterceptors` exactly once per step, in `executeWithRetry`
 before the retry loop begins. Inheritance is **per-run scoped**:
 
@@ -161,6 +167,16 @@ before the retry loop begins. Inheritance is **per-run scoped**:
 - **WHEN** the parent runs
 - **THEN** X is invoked for both the outer step and the inner step S
 
+#### Scenario: Inheritance survives Steper-only wrappers (NamedStep / flow.Name)
+- **GIVEN** a parent Workflow with a `StepInterceptor` X, and a child `*Workflow`
+  containing step `S` that is added to the parent via `flow.Name(child, "name")` (which
+  wraps the child in a `NamedStep` whose embedded `Steper` interface does not promote
+  `PrependInterceptors`)
+- **WHEN** the parent runs
+- **THEN** X is invoked for both the wrapping `NamedStep` and the inner step S
+- **AND** inheritance works because `stepExecution` looks up `InterceptorReceiver` via
+  `Unwrap`, not via a direct type assertion on the registered Step
+
 #### Scenario: PrependInterceptors does not duplicate across retries
 - **WHEN** a sub-workflow step is retried N times
 - **THEN** parent interceptors are prepended exactly once, not N times
diff --git a/retry.go b/retry.go
index ab32779..2ab8b13 100644
--- a/retry.go
+++ b/retry.go
@@ -7,21 +7,45 @@ import (
 	"github.com/cenkalti/backoff/v4"
 )
 
+// DefaultRetryOption is the policy used as the seed when a step calls Retry()
+// with no overriding mutator. It is also the policy used when Retry(nil) is
+// called explicitly.
+//
+// Note: each step takes its OWN copy of this option, with Backoff cleared to
+// nil so that the per-run retry() allocates a fresh backoff.BackOff. This
+// avoids a data race on the shared NewExponentialBackOff() instance when
+// multiple steps retry concurrently.
 var DefaultRetryOption = RetryOption{
 	Backoff:  backoff.NewExponentialBackOff(),
 	Attempts: 3,
 }
 
-// RetryOption customizes retry behavior of a Step in Workflow.
+// RetryOption controls how a step is retried.
+//
+// The semantics map onto cenkalti/backoff/v4 internals:
+//
+//   - Attempts: total number of attempts including the first try (so 3 means
+//     "try, retry, retry"). 0 means "no attempt cap"; the run is then bounded
+//     only by Backoff/ctx/notAfter.
+//   - TimeoutPerTry: deadline applied to each individual attempt's context.
+//     0 means "no per-try deadline"; the attempt is bounded by the step-level
+//     Timeout (if any) and by ctx.
+//   - Backoff: the backoff strategy. If left nil at retry time, retry()
+//     allocates a fresh ExponentialBackOff for this run (see DefaultRetryOption).
+//   - Notify / Timer: passed straight through to backoff.RetryNotifyWithTimer.
 type RetryOption struct {
-	TimeoutPerTry time.Duration // 0 means no timeout
-	Attempts      uint64        // 0 means no limit
-	// NextBackOff is called after each retry to determine the next backoff duration.
-	// Notice if attempts limits are reach, or context timeout, or BackOff fires backoff.Stop,
-	// this function will not be called.
+	TimeoutPerTry time.Duration
+	Attempts      uint64
+	// NextBackOff is invoked AFTER each failed attempt to (optionally) override
+	// the next backoff duration computed by Backoff. It is NOT called when:
+	//   - Attempts cap has been reached, or
+	//   - ctx has fired (cancel / deadline), or
+	//   - the inner Backoff returned backoff.Stop, or
+	//   - the step-level Timeout (notAfter) has elapsed.
 	//
-	// RetryEvent: the event records attempt, duration since the start, and the error of the last try.
-	// nextBackOff: the next backoff duration calculated by the inner BackOff
+	// Arguments:
+	//   re          — what just happened (attempt number, total elapsed, last error).
+	//   nextBackOff — the duration the inner Backoff suggests next.
 	NextBackOff func(ctx context.Context, re RetryEvent, nextBackOff time.Duration) time.Duration
 
 	Backoff backoff.BackOff
@@ -29,18 +53,23 @@ type RetryOption struct {
 	Timer   backoff.Timer
 }
 
-// RetryEvent is the event fired when a retry happens
+// RetryEvent is a snapshot of a single failed attempt, fed to NextBackOff.
 type RetryEvent struct {
-	Attempt uint64
-	Since   time.Duration
-	Error   error
+	Attempt uint64        // 0-based index of the attempt that just failed.
+	Since   time.Duration // time elapsed since the first attempt started.
+	Error   error         // error returned by the failed attempt.
 }
 
-// retry constructs a do function with retry enabled according to the option.
+// retry returns a wrapper that will run `do` with retry semantics derived from
+// `opt`. If opt is nil the wrapper runs `do` exactly once.
+//
+// The wrapper threads through the step-level deadline (`notAfter`) so the
+// retry loop can stop early if the deadline is about to elapse, and applies
+// `TimeoutPerTry` (if set) by deriving a per-attempt context.
 func (w *Workflow) retry(opt *RetryOption) func(
 	ctx context.Context,
 	do func(context.Context) error,
-	notAfter time.Time, // the Step level timeout ddl
+	notAfter time.Time, // step-level Timeout deadline; zero means "none".
 ) error {
 	if opt == nil {
 		return func(ctx context.Context, do func(context.Context) error, notAfter time.Time) error { return do(ctx) }
@@ -48,8 +77,9 @@ func (w *Workflow) retry(opt *RetryOption) func(
 	return func(ctx context.Context, do func(context.Context) error, notAfter time.Time) error {
 		backOff := opt.Backoff
 		if backOff == nil {
-			// Backoff was not set (or was cleared to avoid sharing DefaultRetryOption's
-			// mutable Backoff instance). Allocate a fresh one for this retry run.
+			// Backoff was not set (or was cleared to avoid sharing
+			// DefaultRetryOption's mutable Backoff instance). Allocate a
+			// fresh one so concurrent retries don't race on shared state.
 			backOff = backoff.NewExponentialBackOff()
 		}
 		backOff = backoff.WithContext(backOff, ctx)
@@ -57,6 +87,8 @@ func (w *Workflow) retry(opt *RetryOption) func(
 			backOff = &backOffStopIfTimeout{BackOff: backOff, NotAfter: notAfter, Now: w.Clock.Now}
 		}
 		if opt.Attempts > 0 {
+			// WithMaxRetries counts RETRIES, not total attempts — Attempts=N
+			// means "1 initial + (N-1) retries".
 			backOff = backoff.WithMaxRetries(backOff, opt.Attempts-1)
 		}
 		retried := func(ctx context.Context, e RetryEvent) {}
@@ -91,6 +123,10 @@ func (w *Workflow) retry(opt *RetryOption) func(
 	}
 }
 
+// backOffWithEvent is a thin BackOff decorator that lets the user-supplied
+// NextBackOff observe each retry event and override the next backoff.
+// retried() is called from inside the retry function (not from NextBackOff
+// itself) so the event reflects the attempt that just finished.
 type backOffWithEvent struct {
 	backoff.BackOff
 	nextBackOff func(context.Context, RetryEvent, time.Duration) time.Duration
@@ -99,6 +135,8 @@ type backOffWithEvent struct {
 	e   RetryEvent
 }
 
+// NextBackOff defers to the inner Backoff first; if it returned backoff.Stop
+// the retry loop is finished and the user override is not called.
 func (b *backOffWithEvent) NextBackOff() time.Duration {
 	bkof := b.BackOff.NextBackOff()
 	if b.nextBackOff == nil || bkof == backoff.Stop {
@@ -106,17 +144,25 @@ func (b *backOffWithEvent) NextBackOff() time.Duration {
 	}
 	return b.nextBackOff(b.ctx, b.e, bkof)
 }
+
+// retried is called by the retry loop after each failed attempt to publish
+// the event for the next NextBackOff() invocation.
 func (b *backOffWithEvent) retried(ctx context.Context, e RetryEvent) {
 	b.ctx = ctx
 	b.e = e
 }
 
+// backOffStopIfTimeout fires backoff.Stop as soon as the step-level deadline
+// (NotAfter) has been crossed, so the retry loop doesn't sleep into a
+// deadline that's about to elapse.
 type backOffStopIfTimeout struct {
 	backoff.BackOff
 	NotAfter time.Time
 	Now      func() time.Time
 }
 
+// NextBackOff returns backoff.Stop if the step deadline has passed (or any
+// supporting field is missing); otherwise the inner backoff value is used.
 func (b *backOffStopIfTimeout) NextBackOff() time.Duration {
 	bkof := b.BackOff.NextBackOff()
 	if b.NotAfter.IsZero() || b.Now == nil || bkof == backoff.Stop || b.Now().After(b.NotAfter) {
diff --git a/state.go b/state.go
index b313864..9aa0a38 100644
--- a/state.go
+++ b/state.go
@@ -5,51 +5,75 @@ import (
 	"sync"
 )
 
-// State is the internal state of a Step in a Workflow.
+// State is the per-step bookkeeping that a Workflow keeps for every Step it
+// orchestrates. It carries two things:
 //
-// It has the status and the config (dependency, input, retry option, condition, timeout, etc.) of the step.
-// The status could be read / write from different goroutines, so use RWMutex to protect it.
+//   - StepResult — the runtime status, the error from the last attempt, and
+//     the time the step finished (zero if it never ran).
+//   - Config    — the dependency edges, before/after callbacks and step
+//     options (retry, condition, timeout) declared via Step()/Steps()/Pipe().
+//
+// State is read and written from multiple goroutines (the tick loop and the
+// per-step worker), so every accessor goes through the embedded RWMutex.
+// Use the GetXxx / SetXxx helpers rather than touching StepResult directly.
 type State struct {
 	StepResult
 	Config *StepConfig
 	sync.RWMutex
 }
 
+// GetStatus returns the current StepStatus under read lock.
 func (s *State) GetStatus() StepStatus {
 	s.RLock()
 	defer s.RUnlock()
 	return s.Status
 }
+
+// SetStatus replaces the StepStatus under write lock. Other StepResult fields
+// (Err, FinishedAt) are left as-is — use SetStepResult to write them together.
 func (s *State) SetStatus(ss StepStatus) {
 	s.Lock()
 	defer s.Unlock()
 	s.Status = ss
 }
+
+// GetStepResult returns a snapshot of the full StepResult under read lock.
 func (s *State) GetStepResult() StepResult {
 	s.RLock()
 	defer s.RUnlock()
 	return s.StepResult
 }
+
+// SetStepResult atomically replaces the entire StepResult (status, error and
+// finish time). Used at terminal transitions where all three change together.
 func (s *State) SetStepResult(r StepResult) {
 	s.Lock()
 	defer s.Unlock()
 	s.StepResult = r
 }
 
+// GetError is a convenience over GetStepResult().Err.
 func (s *State) GetError() error { return s.GetStepResult().Err }
 
+// SetError replaces only the Err field under write lock.
 func (s *State) SetError(err error) {
 	s.Lock()
 	defer s.Unlock()
 	s.Err = err
 }
 
+// Upstreams returns the set of steps this step depends on, or nil if no
+// dependencies have been declared yet.
 func (s *State) Upstreams() Set[Steper] {
 	if s.Config == nil {
 		return nil
 	}
 	return s.Config.Upstreams
 }
+
+// Option folds every registered option function into a fresh StepOption and
+// returns it. The result is always non-nil; absent options leave their fields
+// at their zero value (no retry, no timeout, default Condition).
 func (s *State) Option() *StepOption {
 	opt := &StepOption{}
 	if s.Config != nil && s.Config.Option != nil {
@@ -59,6 +83,16 @@ func (s *State) Option() *StepOption {
 	}
 	return opt
 }
+
+// Before runs every BeforeStep callback registered via Input() / BeforeStep()
+// in declaration order. Each callback can swap the context.Context that is
+// threaded into the next callback (and ultimately into Step.Do). The first
+// callback to return an error short-circuits the chain and that error is
+// returned alongside the most-recent context.
+//
+// The do parameter is the panic-catching wrapper supplied by the caller —
+// passing each callback through `do` lets a panicking callback be turned into
+// an error when Workflow.DontPanic is true.
 func (s *State) Before(root context.Context, step Steper, do func(func() error) error) (context.Context, error) {
 	if s.Config == nil || len(s.Config.Before) == 0 {
 		return root, nil
@@ -75,6 +109,12 @@ func (s *State) Before(root context.Context, step Steper, do func(func() error)
 	}
 	return ctx, nil
 }
+
+// After runs every AfterStep callback registered via Output() / AfterStep()
+// in declaration order, threading the error from each callback into the next.
+// Unlike Before, callbacks are NOT short-circuited by an error — every After
+// runs and the final error is whatever the last callback returns. This lets
+// AfterStep callbacks observe, transform, or even swallow the error.
 func (s *State) After(ctx context.Context, step Steper, err error) error {
 	if s.Config == nil || len(s.Config.After) == 0 {
 		return err
@@ -84,6 +124,9 @@ func (s *State) After(ctx context.Context, step Steper, err error) error {
 	}
 	return err
 }
+
+// AddUpstream declares that this step depends on `up`. Lazily allocates
+// Config / Config.Upstreams. Nil upstreams are silently ignored.
 func (s *State) AddUpstream(up Steper) {
 	if s.Config == nil {
 		s.Config = &StepConfig{}
@@ -95,6 +138,11 @@ func (s *State) AddUpstream(up Steper) {
 		s.Config.Upstreams.Add(up)
 	}
 }
+
+// MergeConfig folds another StepConfig into this state's config (union of
+// upstreams, concatenation of Before / After / Option). Used when the same
+// step is referenced by multiple Add() calls or when a composite step is
+// replaced by a new root that absorbs prior configuration.
 func (s *State) MergeConfig(sc *StepConfig) {
 	if s.Config == nil {
 		s.Config = &StepConfig{}
diff --git a/step.go b/step.go
index 2d0e41b..e5d66bf 100644
--- a/step.go
+++ b/step.go
@@ -8,52 +8,70 @@ import (
 	"time"
 )
 
-// Steper describes the requirement for a step, which is basic unit of a Workflow.
-//
-// Implement this interface to allow Workflow orchestrating your steps.
-//
-// Notice Steper will be saved in Workflow as map key, so it's supposed to be 'comparable' type like struct pointer.
-//
-// Do not use empty struct{} as Steper implementation, because all empty struct{} are equal in Go,
-// which makes it impossible to distinguish different steps in Workflow.
+// Steper is the contract every Step must satisfy: a single Do method.
+// Implement it on any comparable type (typically a *struct) and the Workflow
+// can orchestrate it.
+//
+// Why "comparable"? The Workflow stores steps as map keys to track their
+// state, so two distinct *Foo instances must be distinguishable. In
+// particular, do NOT use the empty `struct{}` as a Step type — every
+// `struct{}` value compares equal in Go, and the Workflow would treat them
+// all as the same step.
 type Steper interface {
 	Do(context.Context) error
 }
 
-// Builder is an interface to add steps into Workflow
+// Builder is the glue between user-facing helpers (Step, Steps, Pipe, If, …)
+// and Workflow.Add: anything that can produce a {step → config} map can be
+// passed to Add().
 type Builder interface {
 	AddToWorkflow() map[Steper]*StepConfig
 }
 
-// BeforeStep defines callback being called BEFORE step being executed.
+// BeforeStep is the signature for a hook that runs before a step's Do, on
+// every retry attempt. It may swap the context for the rest of the chain.
+// Returning a non-nil error short-circuits the remaining BeforeStep callbacks
+// and is reported as ErrBeforeStep to the After hooks.
 type BeforeStep func(context.Context, Steper) (context.Context, error)
 
-// AfterStep defines callback being called AFTER step being executed.
+// AfterStep is the signature for a hook that runs after a step's Do, on every
+// retry attempt. It receives — and can transform or swallow — the error
+// returned by Do (or by a previous AfterStep). Unlike BeforeStep, AfterStep
+// callbacks always all run; an error doesn't short-circuit them.
 type AfterStep func(context.Context, Steper, error) error
 
-// StepConfig is the configuration of a step in a Workflow.
+// StepConfig collects everything Workflow needs to know about a single step
+// besides the step itself: who it depends on, what hooks to run around it,
+// and how to configure retry/timeout/condition.
 type StepConfig struct {
-	Upstreams Set[Steper]         // Upstreams of the step, means these steps should happen-before this step
-	Before    []BeforeStep        // Before callbacks of the step, will be called before Do
-	After     []AfterStep         // After callbacks of the step, will be called before Do
-	Option    []func(*StepOption) // Option customize the step settings
+	Upstreams Set[Steper]         // steps that must be terminated before this step is considered.
+	Before    []BeforeStep        // hooks run before Do, in declaration order, per attempt.
+	After     []AfterStep         // hooks run after  Do, in declaration order, per attempt.
+	Option    []func(*StepOption) // option mutators folded together to compute the effective StepOption.
 }
 
-// StepOption customizes the behavior of how Workflow orchestrates the step.
+// StepOption is the resolved per-step configuration the scheduler consults at
+// runtime. Built by folding StepConfig.Option in declaration order — later
+// mutators win for fields they touch (so Timeout/When/Retry follow
+// "last-one-wins").
 type StepOption struct {
-	RetryOption *RetryOption   // RetryOption customize how the step should be retried, default (nil) means no retry.
-	Condition   Condition      // Condition decides whether Workflow should execute the step, default to DefaultCondition.
-	Timeout     *time.Duration // Timeout sets the step level timeout, default (nil) means no timeout.
+	RetryOption *RetryOption   // nil means: no retry, run once.
+	Condition   Condition      // nil means: use the package-level DefaultCondition (AllSucceeded).
+	Timeout     *time.Duration // nil means: no step-level deadline (the step runs until ctx is done).
 }
 
-// Steps bakes Steper(s) ready to be added into Workflow.
+// Steps registers one or more independent Steps to be added into the Workflow.
 //
-// The Steper(s) declared are mutually independent, meaning they will be executed in parallel.
+// Steps in the same Steps(...) call are NOT linked to each other: by default
+// they may run concurrently. Use DependsOn / When / Retry / Timeout etc. on
+// the returned AddSteps to attach common configuration.
 //
 //	workflow.Add(
-//		Steps(a, b, c),					// a, b, c will be executed in parallel
-//		Steps(a, b, c).DependsOn(d, e), // d, e will be executed in parallel, then a, b, c in parallel
+//	    Steps(a, b, c),                 // a, b and c are independent (run in parallel).
+//	    Steps(a, b, c).DependsOn(d, e), // d and e first; then a, b, c become eligible.
 //	)
+//
+// Use Step (singular, generic) instead when you need typed Input/Output hooks.
 func Steps(steps ...Steper) AddSteps {
 	rv := make(AddSteps)
 	for _, step := range steps {
@@ -62,14 +80,17 @@ func Steps(steps ...Steper) AddSteps {
 	return rv
 }
 
-// Step bakes typed Steper(s) ready to be added into Workflow.
-//
-// The main difference between Step() and Steps() is that,
-// Step() allows to add Input callbacks for the step (since this is a generic function).
+// Step is the typed counterpart of Steps. Because it is generic over the
+// concrete step type S, it can offer Input / Output callbacks that receive
+// the step as its real type — no type assertion needed.
 //
 //	Step(a).Input(func(ctx context.Context, a *A) error {
-//		// fill a
-//	}))
+//	    a.Field = "filled at runtime"
+//	    return nil
+//	})
+//
+// All Steps(...) helpers (DependsOn, When, Retry, …) are also available on
+// the returned AddStep[S].
 func Step[S Steper](steps ...S) AddStep[S] {
 	return AddStep[S]{
 		AddSteps: Steps(ToSteps(steps)...),
@@ -77,17 +98,17 @@ func Step[S Steper](steps ...S) AddStep[S] {
 	}
 }
 
-// Pipe creates a pipeline in Workflow.
+// Pipe wires the given Steps into a strict linear pipeline.
 //
 //	workflow.Add(
-//		Pipe(a, b, c), // a -> b -> c
+//	    Pipe(a, b, c), // a -> b -> c
 //	)
 //
-// The above code is equivalent to:
+// Equivalent to:
 //
 //	workflow.Add(
-//		Step(b).DependsOn(a),
-//		Step(c).DependsOn(b),
+//	    Step(b).DependsOn(a),
+//	    Step(c).DependsOn(b),
 //	)
 func Pipe(steps ...Steper) AddSteps {
 	as := Steps(steps...)
@@ -97,21 +118,23 @@ func Pipe(steps ...Steper) AddSteps {
 	return as
 }
 
-// BatchPipe creates a batched pipeline in Workflow.
+// BatchPipe wires batches of Steps into a "fully-connected" pipeline: every
+// step in batch i+1 depends on every step in batch i. Steps within the same
+// batch remain independent of each other.
 //
 //	workflow.Add(
-//		BatchPipe(
-//			Steps(a, b),
-//			Steps(c, d, e),
-//			Steps(f),
-//		),
+//	    BatchPipe(
+//	        Steps(a, b),
+//	        Steps(c, d, e),
+//	        Steps(f),
+//	    ),
 //	)
 //
-// The above code is equivalent to:
+// Equivalent to:
 //
 //	workflow.Add(
-//		Steps(c, d, e).DependsOn(a, b),
-//		Steps(f).DependsOn(c, d, e),
+//	    Steps(c, d, e).DependsOn(a, b),
+//	    Steps(f).DependsOn(c, d, e),
 //	)
 func BatchPipe(batch ...AddSteps) AddSteps {
 	as := Steps()
@@ -124,11 +147,12 @@ func BatchPipe(batch ...AddSteps) AddSteps {
 	return as
 }
 
-// DependsOn declares dependency on the given Steps.
+// DependsOn declares that the configured step(s) must run AFTER all of the
+// given upstream steps have terminated.
 //
-//	Step(a).DependsOn(b, c)
+//	Step(a).DependsOn(b, c) // b and c happen-before a.
 //
-// Then b, c should happen-before a.
+// Calling DependsOn multiple times is additive (the upstream sets are unioned).
 func (as AddSteps) DependsOn(ups ...Steper) AddSteps {
 	for down := range as {
 		as[down].Upstreams.Add(ups...)
@@ -136,17 +160,20 @@ func (as AddSteps) DependsOn(ups ...Steper) AddSteps {
 	return as
 }
 
-// Input adds BeforeStep callback for the Step(s).
+// Input registers a typed BeforeStep callback. It runs at runtime, BEFORE Do,
+// on EVERY retry attempt. Use it to populate fields on the step from data
+// that's only available once upstreams have finished.
 //
-// Input callbacks will be called before Do,
-// and the order will respect the order of declarations.
+// Input callbacks fire in declaration order — both within a single Input(...)
+// call and across multiple Input(...) calls on the same step:
 //
 //	Step(a).
-//		Input(/* 1. this Input will be called first */).
-//		Input(/* 2. this Input will be called after 1. */)
-//	Step(a).Input(/* 3. this Input is after all */)
+//	    Input(/* 1. fires first  */).
+//	    Input(/* 2. fires second */)
+//	Step(a).Input(/* 3. fires last */)
 //
-// The Input callbacks are executed at runtime and per-try.
+// Returning a non-nil error short-circuits the remaining Before chain and is
+// surfaced as ErrBeforeStep.
 func (as AddStep[S]) Input(fns ...func(context.Context, S) error) AddStep[S] {
 	for _, step := range as.Steps {
 		step := step // capture range variable
@@ -162,12 +189,12 @@ func (as AddStep[S]) Input(fns ...func(context.Context, S) error) AddStep[S] {
 	return as
 }
 
-// Output can pass the results of the Step to outer scope.
-// Output is only triggered when the Step is successful (returns nil error).
+// Output registers a typed AfterStep callback. It is the symmetric companion
+// of Input: it runs at runtime, AFTER Do, on every retry attempt — but ONLY
+// when Do (and the prior After chain) returned nil. Use it to extract fields
+// off a successful step into outer scope.
 //
-// Output actually adds AfterStep callback for the Step(s).
-//
-// The Output callbacks are executed at runtime and per-try.
+// If you need to observe failures too, use AfterStep instead.
 func (as AddStep[S]) Output(fns ...func(context.Context, S) error) AddStep[S] {
 	for _, step := range as.Steps {
 		step := step // capture range variable
@@ -186,12 +213,12 @@ func (as AddStep[S]) Output(fns ...func(context.Context, S) error) AddStep[S] {
 	return as
 }
 
-// BeforeStep adds BeforeStep callback for the Step(s).
+// BeforeStep registers an untyped BeforeStep callback.
 //
-// The BeforeStep callback will be called before Do, and return when first error occurs.
-// The order of execution will respect the order of declarations.
-// The BeforeStep callbacks are able to change the context.Context feed into Do.
-// The BeforeStep callbacks are executed at runtime and per-try.
+// BeforeStep callbacks run before Do, in declaration order, on every retry
+// attempt. They may swap the context.Context that flows into Do (and into
+// subsequent BeforeStep callbacks). The first non-nil error short-circuits
+// the chain.
 func (as AddSteps) BeforeStep(befores ...BeforeStep) AddSteps {
 	for step := range as {
 		as[step].Before = append(as[step].Before, befores...)
@@ -199,23 +226,22 @@ func (as AddSteps) BeforeStep(befores ...BeforeStep) AddSteps {
 	return as
 }
 
-// AfterStep adds AfterStep callback for the Step(s).
-//
-// The AfterStep callback will be called after Do, and pass the error to next AfterStep callback.
-// The order of execution will respect the order of declarations.
-// The AfterStep callbacks are able to change the error returned by Do.
-// The AfterStep callbacks are executed at runtime and per-try.
+// AfterStep registers an untyped AfterStep callback.
 //
-// Tip:
+// AfterStep callbacks run after Do, in declaration order, on every retry
+// attempt. The error from Do (or from a previous AfterStep) is threaded
+// through, so each callback can observe, transform or swallow it. ALL
+// callbacks always run — an error never short-circuits the After chain.
 //
-// Remember to check error when overriding your own AfterStep function.
+// Tip: when you only care about the success path, remember to forward errors:
 //
 //	Steps(a).AfterStep(func(ctx context.Context, step Steper, err error) error {
-//		if err != nil {
-//			// do something when error happens
-//		}
-//		// do something when success
-//		return err // you can decide whether pass through the error or not
+//	    if err != nil {
+//	        // handle / log the failure
+//	        return err          // typically: forward unchanged
+//	    }
+//	    // success-only post-processing
+//	    return nil
 //	})
 func (as AddSteps) AfterStep(afters ...AfterStep) AddSteps {
 	for step := range as {
@@ -224,8 +250,10 @@ func (as AddSteps) AfterStep(afters ...AfterStep) AddSteps {
 	return as
 }
 
-// Timeout sets the Step level timeout.
-// Last one wins.
+// Timeout sets a step-level deadline that bounds the entire step lifetime
+// (all retry attempts together). Last call wins.
+//
+// For a per-attempt deadline, set RetryOption.TimeoutPerTry inside Retry().
 func (as AddSteps) Timeout(timeout time.Duration) AddSteps {
 	for step := range as {
 		as[step].Option = append(as[step].Option, func(so *StepOption) {
@@ -235,24 +263,20 @@ func (as AddSteps) Timeout(timeout time.Duration) AddSteps {
 	return as
 }
 
-// When set the Condition for the Step.
-// Last one wins.
-//
-// Tip:
+// When sets the Condition that decides whether the step actually runs. Last
+// call wins.
 //
-// Remember to check upstreams when overriding your own Condition function.
+// Tip: when composing with built-in conditions, return early so you don't
+// accidentally promote a Skipped/Canceled decision to Running:
 //
 //	Steps(a).When(func(ctx context.Context, ups map[Steper]StepResult) StepStatus {
-//		// check upstreams leveraging built-in Conditions
-//		status := flow.AllSucceeded(ctx, ups)
-//		if status != flow.Running {
-//			return status // return fast if the Condition not satisfied
-//		}
-//		// your custom logic
-//		if ... {
-//			return flow.Running
-//		}
-//		return flow.Skipped
+//	    if status := flow.AllSucceeded(ctx, ups); status != flow.Running {
+//	        return status // upstreams aren't all green — bail with their decision
+//	    }
+//	    if myExtraCheck() {
+//	        return flow.Running
+//	    }
+//	    return flow.Skipped
 //	})
 func (as AddSteps) When(cond Condition) AddSteps {
 	for step := range as {
@@ -263,16 +287,19 @@ func (as AddSteps) When(cond Condition) AddSteps {
 	return as
 }
 
-// Retry customize how the Step should be retried.
+// Retry configures retry behavior for the step. The mutator(s) are applied to
+// a fresh RetryOption seeded from DefaultRetryOption (so calling Retry with
+// no mutator — e.g. Retry(nil) — opts in to the default retry policy).
 //
-// Step will use DefaultRetryOption when this option is configured with nil.
+// Note: the field is named Attempts (total attempts including the first try),
+// not MaxAttempts.
 //
 //	w.Add(
-//		Step(a), // not retry
-//		Step(b).Retry(func(opt *RetryOption) { // will retry 3 times
-//			opt.MaxAttempts = 3
-//		}),
-//		Step(c).Retry(nil), // will use DefaultRetryOption!
+//	    Step(a),                                      // no retry, run once.
+//	    Step(b).Retry(func(opt *RetryOption) {        // up to 3 attempts.
+//	        opt.Attempts = 3
+//	    }),
+//	    Step(c).Retry(nil),                           // use DefaultRetryOption as-is.
 //	)
 func (as AddSteps) Retry(opts ...func(*RetryOption)) AddSteps {
 	for step := range as {
@@ -296,10 +323,12 @@ func (as AddSteps) Retry(opts ...func(*RetryOption)) AddSteps {
 	return as
 }
 
-// AddToWorkflow implements Builder
+// AddToWorkflow makes AddSteps satisfy Builder so it can be passed to
+// Workflow.Add directly.
 func (as AddSteps) AddToWorkflow() map[Steper]*StepConfig { return as }
 
-// Merge another AddSteps into one.
+// Merge folds other AddSteps maps into this one, unioning per-step
+// configuration (upstreams unioned, callbacks/options concatenated).
 func (as AddSteps) Merge(others ...AddSteps) AddSteps {
 	for _, other := range others {
 		for k, v := range other {
@@ -312,111 +341,61 @@ func (as AddSteps) Merge(others ...AddSteps) AddSteps {
 	return as
 }
 
-// DependsOn declares dependency on the given Steps.
-//
-//	Step(a).DependsOn(b, c)
-//
-// Then b, c should happen-before a.
+// DependsOn — typed shim that forwards to the AddSteps method so chaining on
+// Step() returns the typed AddStep[S] (preserving Input/Output access).
 func (as AddStep[S]) DependsOn(ups ...Steper) AddStep[S] {
 	as.AddSteps = as.AddSteps.DependsOn(ups...)
 	return as
 }
 
-// BeforeStep adds BeforeStep callback for the Step(s).
-//
-// The BeforeStep callback will be called before Do, and return when first error occurs.
-// The order of execution will respect the order of declarations.
-// The BeforeStep callbacks are able to change the context.Context feed into Do.
-// The BeforeStep callbacks are executed at runtime and per-try.
+// BeforeStep — typed shim; see AddSteps.BeforeStep.
 func (as AddStep[S]) BeforeStep(befores ...BeforeStep) AddStep[S] {
 	as.AddSteps = as.AddSteps.BeforeStep(befores...)
 	return as
 }
 
-// AfterStep adds AfterStep callback for the Step(s).
-//
-// The AfterStep callback will be called after Do, and pass the error to next AfterStep callback.
-// The order of execution will respect the order of declarations.
-// The AfterStep callbacks are able to change the error returned by Do.
-// The AfterStep callbacks are executed at runtime and per-try.
-//
-// Tip:
-//
-// Remember to check error when overriding your own AfterStep function.
-//
-//	Step(a).AfterStep(func(ctx context.Context, step Steper, err error) error {
-//		if err != nil {
-//			// do something when error happens
-//		}
-//		// do something when success
-//		return err // you can decide whether pass through the error or not
-//	})
+// AfterStep — typed shim; see AddSteps.AfterStep.
 func (as AddStep[S]) AfterStep(afters ...AfterStep) AddStep[S] {
 	as.AddSteps = as.AddSteps.AfterStep(afters...)
 	return as
 }
 
-// Timeout sets the Step level timeout.
-// Last one wins.
+// Timeout — typed shim; see AddSteps.Timeout.
 func (as AddStep[S]) Timeout(timeout time.Duration) AddStep[S] {
 	as.AddSteps = as.AddSteps.Timeout(timeout)
 	return as
 }
 
-// When set the Condition for the Step.
-// Last one wins.
-//
-// Tip:
-//
-// Remember to check upstreams when overriding your own Condition function.
-//
-//	Steps(a).When(func(ctx context.Context, ups map[Steper]StepResult) StepStatus {
-//		// check upstreams leveraging built-in Conditions
-//		status := flow.AllSucceeded(ctx, ups)
-//		if status != flow.Running {
-//			return status // return fast if the Condition not satisfied
-//		}
-//		// your custom logic
-//		if ... {
-//			return flow.Running
-//		}
-//		return flow.Skipped
-//	})
+// When — typed shim; see AddSteps.When.
 func (as AddStep[S]) When(when Condition) AddStep[S] {
 	as.AddSteps = as.AddSteps.When(when)
 	return as
 }
 
-// Retry customize how the Step should be retried.
-//
-// Step will use DefaultRetryOption when this option is configured with nil.
-//
-//	w.Add(
-//		Step(a), // not retry
-//		Step(b).Retry(func(opt *RetryOption) { // will retry 3 times
-//			opt.MaxAttempts = 3
-//		}),
-//		Step(c).Retry(nil), // will use DefaultRetryOption!
-//	)
+// Retry — typed shim; see AddSteps.Retry.
 func (as AddStep[S]) Retry(fns ...func(*RetryOption)) AddStep[S] {
 	as.AddSteps = as.AddSteps.Retry(fns...)
 	return as
 }
 
-// AddStep is a typed wrapper of AddSteps.
+// AddStep is the typed view returned by Step[S]. It embeds AddSteps so every
+// untyped helper (DependsOn, When, Retry, …) is available, and adds the
+// typed Input/Output helpers on top.
 type AddStep[S Steper] struct {
 	AddSteps
 	Steps []S
 }
 
-// AddSteps helps to add Steper(s) into Workflow.
+// AddSteps is the {step → config} map produced by Steps(...) / Pipe(...) /
+// BatchPipe(...). It satisfies Builder via AddToWorkflow.
 type AddSteps map[Steper]*StepConfig
 
-// ToSteps converts []<Steper implementation> to []Steper.
+// ToSteps widens a typed slice []S to []Steper. Useful when you want to add
+// a homogeneous slice of steps without an explicit per-element conversion.
 //
-//	steps := []someStepImpl{ ... }
+//	steps := []*MyStep{ ... }
 //	flow.Add(
-//		Steps(ToSteps(steps)...),
+//	    Steps(ToSteps(steps)...),
 //	)
 func ToSteps[S Steper](steps []S) []Steper {
 	rv := []Steper{}
@@ -426,7 +405,8 @@ func ToSteps[S Steper](steps []S) []Steper {
 	return rv
 }
 
-// Merge merges another StepConfig into this one.
+// Merge folds another StepConfig into this one in place: upstream sets are
+// unioned, callbacks and options are concatenated. A nil other is a no-op.
 func (sc *StepConfig) Merge(other *StepConfig) {
 	if other == nil {
 		return
@@ -440,10 +420,11 @@ func (sc *StepConfig) Merge(other *StepConfig) {
 	sc.Option = append(sc.Option, other.Option...)
 }
 
-// Set is a simple generic set implementation based on map.
+// Set is a tiny generic set built on map. Used internally for upstream sets,
+// the BuildStep memo, etc.
 type Set[T comparable] map[T]struct{}
 
-// Has checks whether the set contains the given value.
+// Has reports whether v is in the set. Nil-safe.
 func (s Set[T]) Has(v T) bool {
 	if s == nil {
 		return false
@@ -452,7 +433,8 @@ func (s Set[T]) Has(v T) bool {
 	return ok
 }
 
-// Add adds the given values into the set.
+// Add inserts the given values into the set, lazily allocating the backing
+// map if needed.
 func (s *Set[T]) Add(vs ...T) {
 	if *s == nil {
 		*s = make(Set[T])
@@ -462,14 +444,14 @@ func (s *Set[T]) Add(vs ...T) {
 	}
 }
 
-// Union adds all values from the given sets into the set.
+// Union folds all elements from the given sets into this one.
 func (s *Set[T]) Union(sets ...Set[T]) {
 	for _, set := range sets {
 		s.Add(set.Flatten()...)
 	}
 }
 
-// Flatten converts the set into a slice of values.
+// Flatten returns the set's elements as a slice in unspecified order.
 func (s Set[T]) Flatten() []T {
 	r := make([]T, 0, len(s))
 	for v := range s {
@@ -478,8 +460,7 @@ func (s Set[T]) Flatten() []T {
 	return r
 }
 
-// Seq returns an iterator over the set.
-// The order of values is indeterminate.
+// Seq returns an iter.Seq over the set's elements. Order is unspecified.
 func (s Set[T]) Seq() iter.Seq[T] {
 	return func(yield func(T) bool) {
 		for v := range s {
@@ -490,18 +471,16 @@ func (s Set[T]) Seq() iter.Seq[T] {
 	}
 }
 
-// Keys returns the keys of the map m.
-// The keys will be in an indeterminate order.
+// Keys returns the keys of m in unspecified order.
 //
-// Deprecated: use maps.Keys() from standard library instead.
+// Deprecated: prefer slices.Collect(maps.Keys(m)) from the standard library.
 func Keys[M ~map[K]V, K comparable, V any](m M) []K {
 	return slices.Collect(maps.Keys(m))
 }
 
-// Values returns the values of the map m.
-// The values will be in an indeterminate order.
+// Values returns the values of m in unspecified order.
 //
-// Deprecated: use maps.Values() from standard library instead.
+// Deprecated: prefer slices.Collect(maps.Values(m)) from the standard library.
 func Values[M ~map[K]V, K comparable, V any](m M) []V {
 	return slices.Collect(maps.Values(m))
 }
diff --git a/workflow.go b/workflow.go
index 9f9f487..fe55204 100644
--- a/workflow.go
+++ b/workflow.go
@@ -13,69 +13,87 @@ import (
 	"github.com/benbjohnson/clock"
 )
 
-// Workflow represents a collection of connected Steps that form a directed acyclic graph (DAG).
+// Workflow orchestrates a collection of Steps connected by dependency edges
+// into a Directed Acyclic Graph (DAG).
 //
-// The Steps are connected via dependency, use Step(), Steps() or Pipe(), BatchPipe() to add Steps into Workflow.
+// You declare the graph with the helpers in step.go: Step / Steps / Pipe /
+// BatchPipe (and branching helpers If / Switch from branch.go), then hand
+// them to Workflow.Add:
 //
 //	workflow.Add(
-//		Step(a),
-//		Steps(b, c).DependsOn(a),	// a -> b, c
-//		Pipe(d, e, f),              // d -> e -> f
-//		BatchPipe(
-//			Steps(g, h),
-//			Steps(i, j),
-//		),                          // g, h -> i, j
+//	    Step(a),
+//	    Steps(b, c).DependsOn(a),    // a runs first, then b and c in parallel.
+//	    Pipe(d, e, f),               // d -> e -> f.
+//	    BatchPipe(
+//	        Steps(g, h),
+//	        Steps(i, j),
+//	    ),                           // g, h finish, then i, j run in parallel.
 //	)
 //
-// Workflow will execute Steps in a topological order, each Step will be executed in a separate goroutine.
+// Workflow.Do executes the graph in topological order. Each step that becomes
+// runnable runs in its own goroutine, with the following guarantee:
 //
-// Workflow guarantees that
+//	When a step's worker goroutine starts, every upstream step is already
+//	in a terminal status (Succeeded / Failed / Canceled / Skipped). The
+//	step's Condition then decides whether it actually runs (Running) or is
+//	settled inline as Skipped / Canceled.
 //
-//	Before a Step goroutine starts, all its Upstream Steps are `terminated`.
+// See StepStatus / Condition for the status state machine.
 //
-// Check `StepStatus` and `Condition` for details.
-//
-// Workflow supports Step-level configuration,       check Step(), Steps() and Pipe() for details.
-// Workflow supports Composite Steps,				 check Has(), As() and HasStep() for details.
+// Per-step configuration: use Step / Steps / Pipe (see step.go).
+// Composite steps:        use Has / As / HasStep (see wrap.go).
 type Workflow struct {
-	MaxConcurrency int         // MaxConcurrency limits the max concurrency of running Steps
-	DontPanic      bool        // DontPanic suppress panics, instead return it as error
-	SkipAsError    bool        // SkipAsError marks skipped Steps as an error if true, otherwise ignore them
-	Clock          clock.Clock // Clock for retry and unit test
-	DefaultOption  *StepOption // DefaultOption is the default option for all Steps
-
-	StepInterceptors    []StepInterceptor    // per-step global interceptors (immutable base)
-	AttemptInterceptors []AttemptInterceptor // per-attempt global interceptors (immutable base)
-	IsolateInterceptors bool                 // if true, do not inherit interceptors from a parent workflow
-
-	StepBuilder // StepBuilder to call BuildStep() for Steps
-
-	steps map[Steper]*State // the internal states of Steps
-
-	// inheritedStep / inheritedAttempt are populated by PrependInterceptors when
-	// this workflow runs as a child step under a parent. The lifecycle is:
-	//   1. Parent writes them BEFORE calling child.Do() (in executeWithRetry).
-	//   2. child.Do() reads them while building the effective interceptor chain.
-	//   3. child.Do()'s defer clears them after waitGroup.Wait() (covers all
-	//      exit paths: success, preflight error, panic).
+	// Workflow-level scheduling and panic policy.
+
+	MaxConcurrency int         // 0 means unlimited; otherwise caps simultaneously-running steps.
+	DontPanic      bool        // if true, panics are recovered and surfaced as ErrPanic.
+	SkipAsError    bool        // if true, Skipped terminal status counts as a workflow failure.
+	Clock          clock.Clock // injected clock for retries / timeouts (deterministic in tests).
+	DefaultOption  *StepOption // applied as the FIRST option for every Step (per-step Option calls override it).
+
+	// Workflow-level interceptors. The base slices are never mutated by
+	// inheritance (see PrependInterceptors / effective*Interceptors below),
+	// so multiple Do() runs stay deterministic.
+	StepInterceptors    []StepInterceptor    // wrap each step's full lifetime (across retries).
+	AttemptInterceptors []AttemptInterceptor // wrap each individual attempt (Before → Do → After).
+	IsolateInterceptors bool                 // when true and this Workflow runs as a child step, do NOT inherit parent interceptors.
+
+	StepBuilder // embeds the BuildStep memo so Workflow.Add can call BuildStep on new steps once.
+
+	steps map[Steper]*State // root step → its State (status + StepConfig).
+
+	// inheritedStep / inheritedAttempt hold the interceptors a parent workflow
+	// (or a SubWorkflow-bearing parent step) has prepended for the current run.
+	// Lifecycle:
+	//
+	//   1. Parent writes them BEFORE invoking child.Do() (in executeWithRetry).
+	//   2. child.Do() reads them while building its effective interceptor chain.
+	//   3. child.Do() clears them in a defer covering ALL exit paths
+	//      (success, preflight error, panic) so the next run starts fresh.
 	//
-	// They are NOT cleared by the internal reset() — reset() runs at the start
-	// of Do(), which would wipe out what the parent just wrote and break
-	// inheritance. The public Reset() method does clear them, since users call
-	// Reset() between runs and expect a fully-fresh state.
+	// They are intentionally NOT cleared by the internal reset() (reset() runs at
+	// the very top of Do() and would wipe the parent's just-written prefix). The
+	// public Reset() does clear them, since users call Reset() between runs and
+	// expect a fully-fresh state.
 	//
-	// They are never merged into StepInterceptors / AttemptInterceptors so the
+	// They are never folded into StepInterceptors / AttemptInterceptors so the
 	// user-supplied base stays untouched and repeated runs do not accumulate.
 	inheritedStep    []StepInterceptor
 	inheritedAttempt []AttemptInterceptor
 
-	statusChange *sync.Cond     // a condition to signal the status change to proceed tick
-	leaseBucket  chan struct{}  // constraint max concurrency of running Steps, nil means no limit
-	waitGroup    sync.WaitGroup // to prevent goroutine leak
-	isRunning    sync.Mutex     // indicate whether the Workflow is running
+	statusChange *sync.Cond     // signals to the tick loop when a worker terminates.
+	leaseBucket  chan struct{}  // bounded-channel "permit pool" enforcing MaxConcurrency; nil means unlimited.
+	waitGroup    sync.WaitGroup // tracks worker goroutines so Do() can wait for them on exit.
+	isRunning    sync.Mutex     // single-runner guard: TryLock fails fast if Do/Reset is re-entered.
 }
 
-// Add Steps into Workflow in phase Main.
+// Add wires Builders (Step / Steps / Pipe / BatchPipe / If / Switch / …) into
+// this Workflow. Repeated calls are additive: a step that appears in multiple
+// Add() calls has its config merged (upstreams unioned, callbacks/options
+// concatenated). Returns the Workflow for chaining.
+//
+// If DefaultOption is set, it is prepended to every step's Option list as a
+// SEED — so per-step Option calls (Retry, Timeout, When, …) still win.
 func (w *Workflow) Add(was ...Builder) *Workflow {
 	if w.steps == nil {
 		w.steps = make(map[Steper]*State)
@@ -95,23 +113,29 @@ func (w *Workflow) Add(was ...Builder) *Workflow {
 	return w
 }
 
-// AddStep adds a Step into Workflow with the given phase and config.
+// addStep registers `step` as a root in this workflow if it isn't already
+// reachable from one. If the new step embeds previously-registered roots, those
+// roots are demoted (their state is folded into the new root's state) so the
+// scheduler always operates on a single ROOT per composite tree.
+//
+// Then, if config is non-nil, declared upstreams are wired and the rest of the
+// config is merged into the resolved State (typically the lowest-level
+// containing workflow's State).
 func (w *Workflow) addStep(step Steper, config *StepConfig) {
 	if step == nil {
 		return
 	}
 	w.BuildStep(step)
 	if !HasStep(w, step) {
-		// the step is new, it becomes a new root.
-		// add the new root to the Workflow.
-		// if the step embeds a previous root step,
-		// we need to replace them with the new root.
-		// workflow will only orchestrate the root Steps,
-		// and leave the nested Steps being managed by the root Steps.
+		// New root: scan its tree for any previously-registered roots that are
+		// now nested inside it, and absorb their config so the scheduler sees a
+		// single root per composite. Panic if the new step would clash with a
+		// step that already belongs to a *different* root tree (that would be
+		// double-ownership and we have no way to resolve it).
 		var oldRoots Set[Steper]
 		Traverse(step, func(s Steper, walked []Steper) TraverseDecision {
 			if r := w.RootOf(s); r != nil {
-				if r != s { // s has another root
+				if r != s { // s already belongs to another root in this workflow.
 					panic(fmt.Errorf("add step %p(%s) failed, another step %p(%s) already has %p(%s)",
 						step, step, r, r, s, s))
 				}
@@ -137,7 +161,10 @@ func (w *Workflow) addStep(step Steper, config *StepConfig) {
 	}
 }
 
-// setUpstream will put the upstream into proper state.
+// setUpstream records `up` as an upstream of `step`, ensuring `up` itself is
+// registered as a step first. The dependency edge is added at every workflow
+// level whose tree contains both `step` and `up` — this keeps nested
+// SubWorkflows in sync so their tick loops see the same dependency.
 func (w *Workflow) setUpstream(step, up Steper) {
 	if step == nil || up == nil {
 		return
@@ -173,10 +200,15 @@ func (w *Workflow) setUpstream(step, up Steper) {
 	}
 }
 
-// Empty returns true if the Workflow don't have any Step.
+// Empty reports whether the Workflow has no steps. Nil-safe.
 func (w *Workflow) Empty() bool { return w == nil || len(w.steps) == 0 }
 
-// Steps returns all root Steps in the Workflow.
+// Steps returns the workflow's root steps. (Composite-step internals are not
+// exposed — only the values that are tracked by the scheduler.)
+//
+// Steps and Unwrap return the same slice; Unwrap also makes Workflow
+// participate in the Steper unwrapping protocol (see wrap.go), so utilities
+// like As[T] / HasStep / String can walk into nested workflows.
 func (w *Workflow) Steps() []Steper { return w.Unwrap() }
 func (w *Workflow) Unwrap() []Steper {
 	if w.Empty() {
@@ -185,7 +217,9 @@ func (w *Workflow) Unwrap() []Steper {
 	return Keys(w.steps)
 }
 
-// RootOf returns the root Step of the given Step.
+// RootOf returns the root step (the value the scheduler tracks) that contains
+// `step`, or nil if no root contains it. A step is its own root when it was
+// added directly.
 func (w *Workflow) RootOf(step Steper) Steper {
 	if w.Empty() {
 		return nil
@@ -198,8 +232,10 @@ func (w *Workflow) RootOf(step Steper) Steper {
 	return nil
 }
 
-// StateOf returns the internal state of the Step.
-// State includes Step's status, error, input, dependency and config.
+// StateOf returns the State for `step` — the per-step bookkeeping (status +
+// config). For composite steps, StateOf returns the State of the OWNING
+// workflow level (root or sub-workflow), not necessarily this top-level
+// workflow's State.
 func (w *Workflow) StateOf(step Steper) *State {
 	if w.Empty() || step == nil {
 		return nil
@@ -227,7 +263,10 @@ func (w *Workflow) StateOf(step Steper) *State {
 	return nil
 }
 
-// UpstreamOf returns all upstream Steps and their status and error.
+// UpstreamOf returns each direct upstream of `step` mapped to that upstream's
+// current StepResult. Upstream identities are normalised to their root step
+// (i.e. the value the scheduler tracks), so callers see exactly what the
+// scheduler sees.
 func (w *Workflow) UpstreamOf(step Steper) map[Steper]StepResult {
 	if w.Empty() {
 		return nil
@@ -240,7 +279,8 @@ func (w *Workflow) UpstreamOf(step Steper) map[Steper]StepResult {
 	return rv
 }
 
-// IsTerminated returns true if all Steps terminated.
+// IsTerminated reports whether every step in the workflow has reached a
+// terminal status. The tick loop polls this to decide when to exit.
 func (w *Workflow) IsTerminated() bool {
 	if w.Empty() {
 		return true
@@ -253,23 +293,31 @@ func (w *Workflow) IsTerminated() bool {
 	return true
 }
 
-// Reset resets the Workflow to ready for a new run.
+// Reset prepares the Workflow for a fresh run from outside (the user's POV).
+// It rejects with ErrWorkflowIsRunning if a Do call is currently in flight.
+//
+// Difference vs the internal reset(): Reset() ALSO clears the inherited
+// interceptor slices set by a parent during a previous run. The internal
+// reset() must NOT clear them — see the inheritedStep / inheritedAttempt
+// lifecycle docs above.
 func (w *Workflow) Reset() error {
 	if !w.isRunning.TryLock() {
 		return ErrWorkflowIsRunning
 	}
 	defer w.isRunning.Unlock()
 	w.reset()
-	// Unlike the internal reset() (which Do() calls at its own start), Reset() also
-	// clears interceptors inherited from a parent during a previous run. The internal
-	// reset() must not clear them, because the parent writes them just before calling
-	// child.Do(), and child.Do() then calls reset() — clearing there would wipe the
-	// just-written prefix and break inheritance.
 	w.inheritedStep = nil
 	w.inheritedAttempt = nil
 	return nil
 }
 
+// reset is the per-Do internal reset: clear all step results back to Pending,
+// install a fresh statusChange Cond, ensure Clock is set, and re-allocate the
+// concurrency lease bucket sized for MaxConcurrency.
+//
+// Crucially, this does NOT touch inheritedStep / inheritedAttempt — those were
+// just written by the parent before invoking Do() and must survive into the
+// run.
 func (w *Workflow) reset() {
 	for _, state := range w.steps {
 		state.SetStepResult(StepResult{Status: Pending})
@@ -279,21 +327,22 @@ func (w *Workflow) reset() {
 	}
 	w.statusChange = sync.NewCond(new(sync.Mutex))
 	if w.MaxConcurrency > 0 {
-		// use buffered channel as a sized bucket
-		// a Step needs to create a lease in the bucket to run,
-		// and remove the lease from the bucket when it's done.
+		// Buffered channel as a sized permit pool: a Step takes a slot via
+		// `w.leaseBucket <- struct{}{}` to begin running, and frees it via
+		// `<-w.leaseBucket` when it terminates.
 		w.leaseBucket = make(chan struct{}, w.MaxConcurrency)
 	}
 }
 
-// PrependInterceptors implements InterceptorReceiver on Workflow itself,
-// so a Workflow used directly as a step (or embedded via SubWorkflow) can
-// inherit interceptors from its parent. If IsolateInterceptors is true,
-// the call is a no-op and the workflow uses only its own interceptors.
+// PrependInterceptors implements InterceptorReceiver on Workflow itself, so a
+// Workflow used directly as a step (or via SubWorkflow) can inherit
+// interceptors from its parent for the duration of one run. With
+// IsolateInterceptors == true the call is a no-op (the workflow uses only
+// its own configured interceptors).
 //
 // The inherited slices are stored separately from StepInterceptors /
-// AttemptInterceptors so the user-supplied base is never mutated and
-// repeated runs do not accumulate.
+// AttemptInterceptors so the user-supplied base is never mutated and repeated
+// runs do not accumulate inherited entries.
 func (w *Workflow) PrependInterceptors(step []StepInterceptor, attempt []AttemptInterceptor) {
 	if w.IsolateInterceptors {
 		return
@@ -312,9 +361,10 @@ func (w *Workflow) PrependInterceptors(step []StepInterceptor, attempt []Attempt
 	}
 }
 
-// effectiveStepInterceptors returns the chain to invoke for this run:
-// inherited (from parent, if any) prepended to the user-configured base.
-// The result is never written back to either field.
+// effectiveStepInterceptors returns the chain to invoke for THIS run: the
+// inherited prefix (from a parent, if any) followed by this workflow's own
+// configured base. The result is computed each call and is never written
+// back to either field.
 func (w *Workflow) effectiveStepInterceptors() []StepInterceptor {
 	if len(w.inheritedStep) == 0 {
 		return w.StepInterceptors
@@ -336,34 +386,49 @@ func (w *Workflow) effectiveAttemptInterceptors() []AttemptInterceptor {
 	return out
 }
 
-// Do starts the Step execution in topological order,
-// and waits until all Steps terminated.
+// Do runs the Workflow synchronously: it spawns a goroutine for every
+// runnable step, blocks the calling goroutine on a tick loop until every
+// step has reached a terminal status, then returns.
+//
+// Concurrency: only one Do (or Reset) may be in flight at a time per
+// Workflow instance — re-entrant calls return ErrWorkflowIsRunning.
 //
-// Do will block the current goroutine.
+// Return value:
+//   - nil  if every step finished Succeeded (and, if SkipAsError == false,
+//     Skipped also counts as success).
+//   - ErrWorkflow (a map of step → StepResult) otherwise. ErrCycleDependency
+//     is returned by preflight if the graph isn't a DAG.
 func (w *Workflow) Do(ctx context.Context) error {
-	// assert the Workflow is not running
+	// Single-runner guard.
 	if !w.isRunning.TryLock() {
 		return ErrWorkflowIsRunning
 	}
 	defer w.isRunning.Unlock()
-	// Clear inherited interceptors set by a parent during this run on every exit
-	// path, so the next time this workflow runs (under any parent, or standalone)
-	// it starts fresh and PrependInterceptors does not accumulate. Using defer
-	// ensures even early exits (Empty, preflight failure, panic) reset state.
+
+	// Clear inherited interceptors set by a parent during this run on EVERY
+	// exit path, so a subsequent run (under any parent, or standalone) starts
+	// fresh and PrependInterceptors does not accumulate. Defer ensures even
+	// early exits (Empty, preflight failure, panic) reset state.
 	defer func() {
 		w.inheritedStep = nil
 		w.inheritedAttempt = nil
 	}()
-	// if no steps to run
+
+	// Nothing to do.
 	if w.Empty() {
 		return nil
 	}
+
 	w.reset()
-	// preflight check
+
+	// Reject cycles before launching any work.
 	if err := w.preflight(); err != nil {
 		return err
 	}
-	// each time one Step terminated, tick forward
+
+	// Tick loop: each time a step terminates it Signal()s the cond, we wake
+	// up and tick() again. Inline-settled steps may unblock more steps within
+	// the same tick (no signal needed for those — see tick()).
 	w.statusChange.L.Lock()
 	for {
 		if done := w.tick(ctx); done {
@@ -372,9 +437,11 @@ func (w *Workflow) Do(ctx context.Context) error {
 		w.statusChange.Wait()
 	}
 	w.statusChange.L.Unlock()
-	// ensure all goroutines are exited
+
+	// Drain worker goroutines so we don't return while children are still alive.
 	w.waitGroup.Wait()
-	// return the error
+
+	// Build the per-step error map and decide the overall outcome.
 	err := make(ErrWorkflow)
 	for step, state := range w.steps {
 		err[step] = state.GetStepResult()
@@ -388,8 +455,14 @@ func (w *Workflow) Do(ctx context.Context) error {
 	return err
 }
 
-const scanned StepStatus = "scanned" // a private status for preflight
+// scanned is a private status used only by preflight() to mark steps it has
+// proven to be reachable in topological order. It is replaced by Pending
+// before Do() starts dispatching.
+const scanned StepStatus = "scanned"
 
+// stepExecution is the per-step worker context handed to the goroutine that
+// runs a single step. attempt is bumped after each completed attempt by the
+// retry loop.
 type stepExecution struct {
 	w       *Workflow
 	step    Steper
@@ -397,6 +470,8 @@ type stepExecution struct {
 	attempt uint64
 }
 
+// isAllUpstreamScanned reports whether every upstream of a step has been
+// proved reachable by preflight (has the private "scanned" status).
 func isAllUpstreamScanned(ups map[Steper]StepResult) bool {
 	for _, up := range ups {
 		if up.Status != scanned {
@@ -405,6 +480,10 @@ func isAllUpstreamScanned(ups map[Steper]StepResult) bool {
 	}
 	return true
 }
+
+// isAnyUpstreamNotTerminated reports whether at least one upstream is still
+// running / pending. The tick loop uses this to skip steps whose upstreams
+// haven't all settled yet.
 func isAnyUpstreamNotTerminated(ups map[Steper]StepResult) bool {
 	for _, up := range ups {
 		if !up.Status.IsTerminated() {
@@ -413,11 +492,18 @@ func isAnyUpstreamNotTerminated(ups map[Steper]StepResult) bool {
 	}
 	return false
 }
+
+// preflight verifies the dependency graph is a DAG. It iteratively marks
+// every step whose upstreams are all already marked, until no further
+// progress is possible. Anything left unmarked sits in a cycle and is
+// reported via ErrCycleDependency.
+//
+// On success, all step statuses are reset to Pending so the tick loop can
+// dispatch them.
 func (w *Workflow) preflight() error {
-	// assert all dependency would not form a cycle
-	// start scanning, mark Step as Scanned only when its all dependencies are Scanned
+	// Topo-scan: mark Steps whose upstreams are all marked, repeat until fixed point.
 	for {
-		hasNewScanned := false // whether a new Step being marked as Scanned this turn
+		hasNewScanned := false
 		for step, state := range w.steps {
 			if state.GetStatus() == scanned {
 				continue
@@ -427,12 +513,12 @@ func (w *Workflow) preflight() error {
 				state.SetStatus(scanned)
 			}
 		}
-		if !hasNewScanned { // break when no new Step being Scanned
+		if !hasNewScanned {
 			break
 		}
 	}
-	// check whether still have Steps not Scanned,
-	// not Scanned Steps are in a cycle.
+
+	// Anything still unscanned participates in a cycle.
 	stepsInCycle := make(ErrCycleDependency)
 	for step, state := range w.steps {
 		if state.GetStatus() == scanned {
@@ -447,29 +533,32 @@ func (w *Workflow) preflight() error {
 	if len(stepsInCycle) > 0 {
 		return stepsInCycle
 	}
-	// reset all Steps' status to Pending
+
+	// Reset everyone to Pending for the real run.
 	for _, step := range w.steps {
 		step.SetStepResult(StepResult{Status: Pending})
 	}
 	return nil
 }
 
-// tick will not block, it starts a goroutine for each runnable Step.
-// tick returns true if all steps in all phases are terminated.
+// tick is one round of the scheduler. It is non-blocking — it spawns
+// goroutines for every Pending step that is now eligible. Returns true iff
+// every step has reached a terminal status.
+//
+// Why Condition is evaluated HERE (under statusChange.L) rather than inside
+// the worker goroutine:
 //
-// The Step's Condition is evaluated here (in the tick goroutine, holding
-// statusChange.L) so that:
-//   - Steps whose Condition resolves to a terminal status (Skipped/Canceled)
-//     are settled inline without spawning a goroutine or consuming a
-//     concurrency lease.
-//   - Steps that will execute have their status set to Running before the
-//     worker goroutine is spawned, so a subsequent tick cannot double-spawn
-//     them.
+//   - Steps whose Condition resolves to a TERMINAL status (Skipped/Canceled)
+//     are settled INLINE — no goroutine, no concurrency lease, no
+//     interceptor chain. This keeps zero-cost branches truly cheap.
+//   - Steps that WILL execute have their status set to Running before the
+//     worker is spawned, so a subsequent tick cannot double-spawn them.
 //
 // Inline-settled steps may unblock downstream steps in the same tick. Because
-// no goroutine is spawned for them, no signalStatusChange is fired — so we
-// loop until a single pass produces no inline progress, otherwise the main
-// loop in Do() would Wait() forever for a signal that never comes.
+// no goroutine is spawned for them, no signalStatusChange will fire — so we
+// loop within tick() until a single pass produces no inline progress;
+// otherwise the main Do() loop would Wait() forever for a signal that never
+// comes.
 func (w *Workflow) tick(ctx context.Context) bool {
 	for {
 		if w.IsTerminated() {
@@ -521,28 +610,35 @@ func (w *Workflow) tick(ctx context.Context) bool {
 	}
 }
 
+// signalStatusChange wakes the tick loop. Called from a worker goroutine
+// after the worker has updated its step's status to terminal.
 func (w *Workflow) signalStatusChange() {
 	w.statusChange.L.Lock()
 	defer w.statusChange.L.Unlock()
 	w.statusChange.Signal()
 }
 
+// run executes one step from start to terminal status: it builds the
+// StepInterceptor chain (innermost call is executeWithRetry, which loops over
+// attempts), runs it, classifies the result into a StepStatus, records the
+// final StepResult, releases the concurrency lease, and signals the scheduler.
 func (ex *stepExecution) run(ctx context.Context) {
 	defer ex.w.waitGroup.Done()
 
-	// By the time we get here, tick() has already evaluated the Condition
-	// (terminal results are settled inline) and set the status to Running.
-	// Build the StepInterceptor chain; innermost next is executeWithRetry.
-	// When DontPanic is true, each interceptor invocation is wrapped in
+	// Build the StepInterceptor chain. tick() has already evaluated the
+	// Condition (terminal results were settled inline) and set the status to
+	// Running, so we can dive straight in.
+	//
+	// When DontPanic is true, EVERY interceptor invocation is wrapped in
 	// catchPanicAsError so a panicking user interceptor cannot crash the
 	// process or leave the lease unreleased / status unsignalled.
 	stepNext := func(ctx context.Context) error { return ex.executeWithRetry(ctx) }
 	stepICs := ex.w.effectiveStepInterceptors()
 	for i := len(stepICs) - 1; i >= 0; i-- {
-		// ic and nextLocal are declared inside the loop body with :=, so they
-		// are fresh variables on every iteration and the closure below captures
-		// each iteration's instance independently. The explicit naming is to
-		// make the per-iteration scoping obvious to readers.
+		// `ic` and `nextLocal` are declared inside the loop body with `:=`,
+		// so they are fresh on every iteration and the closure below captures
+		// each iteration's instance independently. The explicit naming makes
+		// the per-iteration scoping obvious.
 		ic := stepICs[i]
 		nextLocal := stepNext
 		stepNext = func(ctx context.Context) error {
@@ -556,6 +652,10 @@ func (ex *stepExecution) run(ctx context.Context) {
 	}
 
 	err := stepNext(ctx)
+
+	// Classify the error into a terminal StepStatus. Cancellation errors
+	// (context.Canceled / DeadlineExceeded / DefaultIsCanceled-recognised)
+	// are reported as Canceled rather than Failed.
 	status := StatusFromError(err)
 	if status == Failed {
 		switch {
@@ -571,19 +671,22 @@ func (ex *stepExecution) run(ctx context.Context) {
 		Err:        err,
 		FinishedAt: ex.w.Clock.Now(),
 	})
-	// Release the lease BEFORE signalling, so that when the main loop wakes up
-	// in tick() it can immediately acquire a new lease.
+
+	// Release the lease BEFORE signalling, so when the tick loop wakes up it
+	// can immediately acquire a fresh lease for the next runnable step.
 	ex.w.unlease()
 	ex.w.signalStatusChange()
 }
 
+// executeWithRetry runs a single step's full attempt sequence under the
+// configured retry policy and step-level Timeout. Before running, it
+// propagates the effective interceptor chain into nested workflows so
+// multi-level nesting (grandparent → parent → child) accumulates correctly
+// for THIS run, while the user-supplied bases stay untouched.
 func (ex *stepExecution) executeWithRetry(ctx context.Context) error {
 	option := ex.state.Option()
 
-	// Propagate the effective chain (inherited prefix + this workflow's own base)
-	// so multi-level nesting (grandparent → parent → child) accumulates correctly
-	// within one run, while the user-supplied base on each workflow stays untouched.
-	if recv, ok := ex.step.(InterceptorReceiver); ok {
+	if recv := findInterceptorReceiver(ex.step); recv != nil {
 		recv.PrependInterceptors(ex.w.effectiveStepInterceptors(), ex.w.effectiveAttemptInterceptors())
 	}
 
@@ -600,15 +703,18 @@ func (ex *stepExecution) executeWithRetry(ctx context.Context) error {
 	return ex.w.retry(option.RetryOption)(ctx, attemptChain, notAfter)
 }
 
+// buildAttemptChain wraps a single attempt (Before → Do → After) with the
+// per-attempt interceptors, returning a function suitable for the retry loop.
+// The chain is wrapped one final time in a function that always increments
+// ex.attempt after each completed attempt — even when an interceptor
+// short-circuits — so the attempt counter remains accurate.
 func (ex *stepExecution) buildAttemptChain() func(context.Context) error {
 	chain := func(ctx context.Context) error {
 		return ex.runAttempt(ctx)
 	}
 	attemptICs := ex.w.effectiveAttemptInterceptors()
 	for i := len(attemptICs) - 1; i >= 0; i-- {
-		// ic and nextLocal are declared inside the loop body with :=, so they
-		// are fresh variables on every iteration and the closure below captures
-		// each iteration's instance independently.
+		// Same per-iteration capture pattern as run(); see comment there.
 		ic := attemptICs[i]
 		nextLocal := chain
 		chain = func(ctx context.Context) error {
@@ -620,8 +726,6 @@ func (ex *stepExecution) buildAttemptChain() func(context.Context) error {
 			return ic.InterceptAttempt(ctx, ex.step, ex.attempt, nextLocal)
 		}
 	}
-	// Wrap the full attempt chain (including interceptors) so ex.attempt is always
-	// incremented after each attempt regardless of whether interceptors short-circuit.
 	inner := chain
 	return func(ctx context.Context) error {
 		defer func() { ex.attempt++ }()
@@ -629,6 +733,13 @@ func (ex *stepExecution) buildAttemptChain() func(context.Context) error {
 	}
 }
 
+// runAttempt executes one attempt: Before callbacks → Do → After callbacks.
+//
+// The `do` wrapper is either a direct invocation, or — when DontPanic is true
+// — catchPanicAsError, which converts a panic to an ErrPanic-tagged error.
+// The Before chain may swap the context that is threaded into Do (and the
+// After chain). After callbacks always run, even if Before or Do failed; they
+// receive the latest error and can transform it.
 func (ex *stepExecution) runAttempt(ctx context.Context) error {
 	do := func(fn func() error) error { return fn() }
 	if ex.w.DontPanic {
@@ -649,6 +760,10 @@ func (ex *stepExecution) runAttempt(ctx context.Context) error {
 	return do(func() error { return ex.state.After(ctxStep, ex.step, err) })
 }
 
+// lease takes one slot from the concurrency permit pool. Returns true if the
+// caller may now run, or false if the pool is full (the tick loop will retry
+// on the next signal). When MaxConcurrency is unset (leaseBucket == nil), the
+// answer is always true.
 func (w *Workflow) lease() bool {
 	if w.leaseBucket == nil {
 		return true
@@ -660,13 +775,18 @@ func (w *Workflow) lease() bool {
 		return false
 	}
 }
+
+// unlease returns one slot to the concurrency permit pool, or is a no-op if
+// MaxConcurrency is unset.
 func (w *Workflow) unlease() {
 	if w.leaseBucket != nil {
 		<-w.leaseBucket
 	}
 }
 
-// catchPanicAsError catches panic from f and return it as error.
+// catchPanicAsError invokes f, recovers any panic, and returns it as an
+// ErrPanic carrying a filtered stack trace (only frames inside this module
+// are kept, to keep the trace readable).
 func catchPanicAsError(f func() error) error {
 	var returnErr error
 	func(err *error) {
@@ -689,28 +809,18 @@ func catchPanicAsError(f func() error) error {
 	return returnErr
 }
 
-// SubWorkflow is a helper struct to let you create a step with a sub-workflow.
-// Embed this struct to your struct definition.
-//
-// Usage:
+// SubWorkflow makes any user struct behave as a Step that contains a
+// Workflow. Embed it in your own struct to get Add/Do/Reset and the
+// InterceptorReceiver delegation for free:
 //
 //	type MyStep struct {
-//		flow.SubWorkflow
+//	    flow.SubWorkflow
 //	}
 //
 //	func (s *MyStep) BuildStep() {
-//		s.Reset() // reset the workflow
-//		s.Add(
-//			flow.Step(/* stepX */),
-//		)
-//	}
-//
-//	func main() {
-//		w := &flow.Workflow{}
-//		myStep := &MyStep{}
-//		w.Add(flow.Step(myStep)) // BuildStep() will be called when adding the step
-//		...
-//		stepX := flow.As[*StepX](w) // we can get the inner stepX from the workflow
+//	    s.Add(
+//	        flow.Step(/* stepX */),
+//	    )
 //	}
 type SubWorkflow struct{ w Workflow }
 
@@ -718,11 +828,13 @@ func (s *SubWorkflow) Unwrap() Steper                    { return &s.w }
 func (s *SubWorkflow) Add(builders ...Builder) *Workflow { return s.w.Add(builders...) }
 func (s *SubWorkflow) Do(ctx context.Context) error      { return s.w.Do(ctx) }
 
-// Reset resets the sub-workflow to ready for BuildStep()
+// Reset clears the inner workflow so a subsequent BuildStep() can rebuild
+// from scratch.
 func (s *SubWorkflow) Reset() { s.w = Workflow{} }
 
-// PrependInterceptors implements InterceptorReceiver by delegating to the
-// embedded Workflow.
+// PrependInterceptors satisfies InterceptorReceiver by delegating to the
+// embedded Workflow — so a parent workflow's interceptors flow into the
+// SubWorkflow's inner Workflow exactly as if it were used directly.
 func (s *SubWorkflow) PrependInterceptors(step []StepInterceptor, attempt []AttemptInterceptor) {
 	s.w.PrependInterceptors(step, attempt)
 }
diff --git a/wrap.go b/wrap.go
index 0ed8693..3a53527 100644
--- a/wrap.go
+++ b/wrap.go
@@ -6,89 +6,79 @@ import (
 	"strings"
 )
 
-// # What is a Composite Step?
+// # Composite Steps
 //
-// Consider this case, Alice writes a Step implementation,
+// A "composite step" is a Step that is implemented by combining (embedding,
+// wrapping or aggregating) one or more other Steps. The Workflow only
+// schedules its top-level (root) steps; the inner Steps remain the composite
+// step's own concern. To let the Workflow see through the composition (so
+// utilities like Has[T], As[T], HasStep, dependency wiring and BuildStep can
+// reach the inner steps), the composite exposes them via Unwrap().
 //
-//	type DoSomeThing struct{}
-//	func (d *DoSomeThing) Do(context.Context) error { /* do fancy things */ }
+// ## Example: aggregating two steps
 //
-// After that, Bob finds the above implementation is useful, but still not enough.
-// So Bob combines the above Steps into a new Step,
+//	type DoSomeThing struct{}
+//	func (d *DoSomeThing) Do(context.Context) error { /* ... */ }
 //
 //	type DoManyThings struct {
-//		DoSomeThing
-//		DoOtherThing
+//	    DoSomeThing
+//	    DoOtherThing
 //	}
-//	func (d *DoManyThings) Do(context.Context) error { /* do fancy things then other thing */ }
+//	func (d *DoManyThings) Do(context.Context) error { /* fan out ... */ }
 //
-// Let's call the above DoManyThings a Composite Step, the below Decorator is another example.
+// ## Example: wrapping (decorator)
 //
-//	type Decorator struct { Steper }
+//	type Decorator struct{ Steper }
 //	func (d *Decorator) Do(ctx context.Context) error {
-//		/* do something before */
-//		err := d.Steper.Do(ctx)
-//		/* do something after */
-//		return err
-//	}
-//
-// Since Workflow only requires a Step to satisfy the below interface:
-//
-//	type Steper interface {
-//		Do(context.Context) error
+//	    /* before */
+//	    err := d.Steper.Do(ctx)
+//	    /* after */
+//	    return err
 //	}
 //
-// It's easy, intuitive, flexible and yet powerful to use Composite Steps.
-//
-// Actually, Workflow itself also implements Steper interface,
-// meaning you can use Workflow as a Step in another Workflow!
-
-// # How to audit / retrieve / update all steps from the Workflow?
-//
-//	workflow := func() *Workflow {
-//		...
-//		workflow.Add(Step(doSomeThing))
-//		return workflow
-//	}
-//
-//	from now on, we don't have reference to the internal steps in Workflow directly, like doSomeThing
-//	however, it's totally possible have necessary to update doSomeThing,
-//	like modify its input, configuration, or even its behavior (by decorator).
-//
-// # Introduce Unwrap()
+// Workflow itself implements Steper, so any Workflow can be embedded as a
+// step inside another Workflow (see SubWorkflow in workflow.go).
 //
-// Kindly remind that, this nesting problem is not a new issue in Go.
-// In Go, we have a very common error pattern:
+// ## Reaching into composites
 //
-//	type MyError struct { Err error }
-//	func (e *MyError) Error() string { return fmt.Sprintf("MyError(%v)", e.Err) }
+// If you no longer hold a direct reference to an inner step but still need
+// to inspect or modify it (e.g. to mock it for tests, or attach an
+// Input/Output), expose it with one of the two Unwrap shapes recognised by
+// Traverse():
 //
-// The solution is using Unwrap() method:
-//
-//	func (e *MyError) Unwrap() error { return e.Err }
-//
-// Then standard package errors provides Is() and As() functions to help us deal with warped errors.
-// We also provides a similar Has() and As() functions for Steper.
-//
-// Users only need to implement the below methods for your Step implementations:
-//
-//	type WrapStep struct { Steper }
+//	type WrapStep struct{ Steper }
 //	func (w *WrapStep) Unwrap() Steper { return w.Steper }
-//	// or
-//	type WrapSteps struct { Steps []Steper }
+//
+//	type WrapSteps struct{ Steps []Steper }
 //	func (w *WrapSteps) Unwrap() []Steper { return w.Steps }
 //
-// to expose your inner Steps.
+// Then Has[T], As[T], HasStep and Traverse will all walk through the
+// composite to find what you're after — mirroring the standard library's
+// errors.Is / errors.As pattern for wrapped errors.
 
+// TraverseDecision is the value a Traverse visitor returns to direct the walk.
 type TraverseDecision int
 
 const (
-	TraverseContinue  = iota // TraverseContinue continue the traversal
-	TraverseStop             // TraverseStop stop and exit the traversal immediately
-	TraverseEndBranch        // TraverseEndBranch end the current branch, but continue sibling branches
+	TraverseContinue  = iota // keep walking into this node's children.
+	TraverseStop             // stop the entire traversal immediately.
+	TraverseEndBranch        // skip this node's children, continue with siblings.
 )
 
-// Traverse performs a pre-order traversal of the tree of step.
+// Traverse performs a pre-order depth-first walk of the Step tree rooted at s.
+//
+// For each node visited, the callback receives:
+//   - the Step itself,
+//   - the path of Steps walked to reach it (excluding the current node).
+//
+// The callback's TraverseDecision controls whether to descend, prune, or
+// stop. A nil callback is treated as TraverseStop.
+//
+// The walk understands two Unwrap shapes:
+//   - `Unwrap() Steper`     — single child, descend into it.
+//   - `Unwrap() []Steper`   — multiple children, descend into each in order.
+//
+// Anything else is a leaf.
 func Traverse(s Steper, f func(Steper, []Steper) TraverseDecision, walked ...Steper) TraverseDecision {
 	if f == nil {
 		return TraverseStop
@@ -117,7 +107,8 @@ func Traverse(s Steper, f func(Steper, []Steper) TraverseDecision, walked ...Ste
 	}
 }
 
-// Has reports whether there is any step inside matches target type.
+// Has reports whether the Step tree rooted at s contains any node assignable
+// to T. Mirrors errors.As's "is there a wrapped error of this type?".
 func Has[T Steper](s Steper) bool {
 	find := false
 	Traverse(s, func(s Steper, walked []Steper) TraverseDecision {
@@ -130,8 +121,8 @@ func Has[T Steper](s Steper) bool {
 	return find
 }
 
-// As finds all steps in the tree of step that matches target type, and returns them.
-// The sequence of the returned steps is pre-order traversal.
+// As collects every node in the Step tree assignable to T, in pre-order.
+// Returns nil (zero-length) if there are no matches.
 func As[T Steper](s Steper) []T {
 	var rv []T
 	Traverse(s, func(s Steper, walked []Steper) TraverseDecision {
@@ -143,7 +134,8 @@ func As[T Steper](s Steper) []T {
 	return rv
 }
 
-// HasStep reports whether there is any step matches target step.
+// HasStep reports whether the Step tree rooted at step contains the exact
+// instance target (pointer-equality). Returns false if target is nil.
 func HasStep(step, target Steper) bool {
 	if target == nil {
 		return false
@@ -159,7 +151,16 @@ func HasStep(step, target Steper) bool {
 	return find
 }
 
-// String unwraps step and returns a proper string representation.
+// String renders a Step (and any composite contents) as a debug-friendly
+// multi-line string. The format prefers, in order:
+//
+//   - the Step's own String() method, if any;
+//   - "<Type>(<addr>) { ... }" for single-child wrappers;
+//   - "<Type>(<addr>) { each child on its own line }" for multi-child wrappers;
+//   - "<Type>(<addr>)" for leaves.
+//
+// This is also what LogValue uses, so it's the canonical text form of a Step
+// across logs, errors and panics.
 func String(step Steper) string {
 	if step == nil {
 		return "<nil>"
@@ -180,15 +181,19 @@ func String(step Steper) string {
 	}
 }
 
-// LogValue is used with log/slog, you can use it like:
+// LogValue produces a slog-friendly handle for a Step that defers the
+// (potentially expensive) String() call until the slog backend actually
+// renders the field:
 //
 //	logger.With("step", LogValue(step))
 //
-// To prevent expensive String() calls,
+// If you don't care about laziness, the equivalent eager form is:
 //
 //	logger.With("step", String(step))
 func LogValue(step Steper) logValue { return logValue{Steper: step} }
 
+// logValue carries a Step around with custom String / LogValue / MarshalJSON
+// implementations so it serializes via String() for any sink.
 type logValue struct{ Steper }
 
 func (lv logValue) String() string       { return String(lv.Steper) }
diff --git a/wrap_test.go b/wrap_test.go
index 6d8bd0d..dcf2abd 100644
--- a/wrap_test.go
+++ b/wrap_test.go
@@ -349,3 +349,47 @@ func TestSubWorkflow_IsolateInterceptors(t *testing.T) {
 	// child only sees the inner step
 	assert.Equal(t, int32(1), childCount.Load())
 }
+
+// TestWorkflow_AsStep_InheritsThroughNamedStep ensures that wrapping a child
+// *Workflow in a Steper-only wrapper (NamedStep embeds the Steper interface,
+// so PrependInterceptors is NOT promoted) does not break parent-interceptor
+// inheritance: the parent walks the Step tree via Unwrap to find the
+// InterceptorReceiver, so wrappers are transparent.
+func TestWorkflow_AsStep_InheritsThroughNamedStep(t *testing.T) {
+	t.Parallel()
+
+	var stepped []Steper
+	mu := sync.Mutex{}
+	ic := StepInterceptorFunc(func(ctx context.Context, s Steper, next func(context.Context) error) error {
+		mu.Lock()
+		stepped = append(stepped, s)
+		mu.Unlock()
+		return next(ctx)
+	})
+
+	innerStep := NoOp("inner")
+	child := &Workflow{}
+	child.Add(Step(innerStep))
+
+	// Wrap child in NamedStep — this is the wrapper produced by flow.Name().
+	// NamedStep embeds the Steper interface, so it does NOT promote
+	// PrependInterceptors. Inheritance must therefore go through Unwrap.
+	named := &NamedStep{Name: "child", Steper: child}
+
+	parent := &Workflow{StepInterceptors: []StepInterceptor{ic}}
+	parent.Add(Step(named))
+	assert.NoError(t, parent.Do(context.Background()))
+
+	// Parent's interceptor must see both the wrapped child step and the inner step.
+	var sawNamed, sawInner bool
+	for _, s := range stepped {
+		if s == named {
+			sawNamed = true
+		}
+		if s == innerStep {
+			sawInner = true
+		}
+	}
+	assert.True(t, sawNamed, "parent interceptor should fire for the NamedStep itself")
+	assert.True(t, sawInner, "parent interceptor should fire for the inner step (inheritance through Unwrap)")
+}