diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py
index 705f50aff..7113e6508 100644
--- a/hindsight-api-slim/hindsight_api/api/http.py
+++ b/hindsight-api-slim/hindsight_api/api/http.py
@@ -270,6 +270,17 @@ class RecallRequest(BaseModel):
         default=None,
         description="List of fact types to recall: 'world', 'experience', 'observation'. Defaults to world and experience if not specified.",
     )
+    prefer_observations: bool = Field(
+        default=True,
+        description=(
+            "When recalling raw facts ('world'/'experience') together with 'observation', drop any raw "
+            "fact that an observation in the results was consolidated from, so the observation supersedes "
+            "it and you don't get duplicate content. The freed slots are backfilled with the next results, "
+            "keeping the result count at the requested budget. Enabled by default; set to false to return "
+            "raw facts even when an observation already covers them. No effect unless 'observation' and at "
+            "least one raw type are both requested."
+        ),
+    )
     budget: Budget = Budget.MID
     max_tokens: int = 4096
     trace: bool = False
@@ -3826,6 +3837,7 @@ async def api_recall(
                         max_tokens=request.max_tokens,
                         enable_trace=request.trace,
                         fact_type=fact_types,
+                        prefer_observations=request.prefer_observations,
                         question_date=question_date,
                         include_entities=include_entities,
                         max_entity_tokens=max_entity_tokens,
diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
index 6212877f3..04a1db943 100644
--- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py
+++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
@@ -3843,6 +3843,11 @@ async def recall_async(
         max_tokens: int = 4096,
         enable_trace: bool = False,
         fact_type: list[str] | None = None,
+        # Defaults False at the engine layer on purpose: the user-facing default is True
+        # (set on the HTTP RecallRequest / MCP recall tool), but internal callers that
+        # recall raw facts on purpose — notably consolidation, which needs the raw facts
+        # it folds into observations — must NOT have them silently deduped away.
+        prefer_observations: bool = False,
         question_date: datetime | None = None,
         include_entities: bool = False,
         max_entity_tokens: int = 500,
@@ -3875,6 +3880,10 @@ async def recall_async(
             bank_id: bank ID to recall for
             query: Recall query
             fact_type: List of fact types to recall (e.g., ['world', 'experience'])
+            prefer_observations: When True and both 'observation' and a raw type ('world'/'experience')
+                       are requested, drop raw facts that a returned observation was consolidated from
+                       (deduplication by provenance). Freed slots backfill, keeping the result count at
+                       the budget. No-op unless both observation and raw types are requested.
             budget: Budget level for graph traversal (low=100, mid=300, high=600 units)
             max_tokens: Maximum tokens to return (counts only 'text' field, default 4096)
                        Results are returned until token budget is reached, stopping before
@@ -4012,6 +4021,7 @@ async def recall_async(
                             max_chunk_tokens,
                             request_context,
                             semaphore_wait=semaphore_wait,
+                            prefer_observations=prefer_observations,
                             tags=tags,
                             tags_match=tags_match,
                             tag_groups=tag_groups,
@@ -4148,6 +4158,7 @@ async def _search_with_retries(
         max_chunk_tokens: int = 8192,
         request_context: "RequestContext" = None,
         semaphore_wait: float = 0.0,
+        prefer_observations: bool = False,
         tags: list[str] | None = None,
         tags_match: TagsMatch = "any",
         tag_groups: list[TagGroup] | None = None,
@@ -4660,6 +4671,48 @@ def to_tuple_format(results):
             if request_context is not None:
                 request_context.raise_if_cancelled()
 
+            # Step 4.8: prefer-observations dedup. When the caller asked for observations
+            # alongside raw facts, an observation supersedes the raw facts it was
+            # consolidated from: drop those raw facts so the same content isn't returned
+            # twice. Runs BEFORE the Step 5 truncation so the freed slots backfill with
+            # the next-best results, keeping the result count at the budget. No-op unless
+            # 'observation' and at least one raw type were both requested.
+            raw_types_requested = {"world", "experience"} & set(fact_type)
+            if prefer_observations and "observation" in fact_type and raw_types_requested:
+                # "The observation list" = observations within the window we would return.
+                # Only those can supersede a raw fact; a far-down observation should not
+                # suppress a top raw fact it merely happens to reference.
+                observation_ids = [
+                    uuid.UUID(sr.id)
+                    for sr in scored_results[: thinking_budget * 2]
+                    if sr.retrieval.fact_type == "observation"
+                ]
+                if observation_ids:
+                    superseded_ids: set[str] = set()
+                    async with acquire_with_retry(backend) as dedup_conn:
+                        obs_rows = await dedup_conn.fetch(
+                            f"""
+                            SELECT source_memory_ids
+                            FROM {fq_table("memory_units")}
+                            WHERE id = ANY($1::uuid[]) AND fact_type = 'observation'
+                            """,
+                            observation_ids,
+                        )
+                    for obs_row in obs_rows:
+                        for sid in obs_row["source_memory_ids"] or []:
+                            superseded_ids.add(str(sid))
+                    if superseded_ids:
+                        before_count = len(scored_results)
+                        scored_results = [
+                            sr
+                            for sr in scored_results
+                            if not (sr.retrieval.fact_type in ("world", "experience") and sr.id in superseded_ids)
+                        ]
+                        log_buffer.append(
+                            f"  [4.8] prefer_observations: dropped {before_count - len(scored_results)} "
+                            f"raw fact(s) superseded by {len(observation_ids)} observation(s)"
+                        )
+
             # Step 5: Truncate to thinking_budget * 2 for token filtering
             rerank_limit = thinking_budget * 2
             top_scored = scored_results[:rerank_limit]
diff --git a/hindsight-api-slim/hindsight_api/mcp_tools.py b/hindsight-api-slim/hindsight_api/mcp_tools.py
index 0f26b53d2..8c74174f8 100644
--- a/hindsight-api-slim/hindsight_api/mcp_tools.py
+++ b/hindsight-api-slim/hindsight_api/mcp_tools.py
@@ -833,6 +833,7 @@ async def recall(
             max_tokens: int = 4096,
             budget: str = "high",
             types: list[str] | None = None,
+            prefer_observations: bool = True,
             tags: list[str] | None = None,
             tags_match: str = "any",
             tag_groups: list[dict] | None = None,
@@ -845,6 +846,10 @@ async def recall(
                 max_tokens: Maximum tokens to return in results (default: 4096)
                 budget: Search budget - 'low', 'mid', or 'high' (default: 'high'). Higher budgets search more thoroughly.
                 types: Fact types to include (e.g., ['world', 'experience']). Default: all types.
+                prefer_observations: When recalling raw facts together with 'observation', drop any raw fact
+                    that a returned observation was consolidated from, so the observation supersedes it (no
+                    duplicate content). Enabled by default; set false to keep raw facts an observation already
+                    covers. No effect unless 'observation' and a raw type are both in types. Default: True.
                 tags: Optional tags to filter results by (e.g., ['project:alpha']). Mutually exclusive with tag_groups.
                 tags_match: How to match tags - 'any' (match any tag) or 'all' (match all tags). Default: 'any'
                 tag_groups: Compound tag filter using boolean groups (AND-ed together). Each group is a leaf
@@ -873,6 +878,7 @@ async def recall(
                     "bank_id": target_bank,
                     "query": query,
                     "fact_type": fact_types,
+                    "prefer_observations": prefer_observations,
                     "budget": budget_enum,
                     "max_tokens": max_tokens,
                     "request_context": _get_request_context(config),
@@ -905,6 +911,7 @@ async def recall(
             max_tokens: int = 4096,
             budget: str = "high",
             types: list[str] | None = None,
+            prefer_observations: bool = True,
             tags: list[str] | None = None,
             tags_match: str = "any",
             tag_groups: list[dict] | None = None,
@@ -916,6 +923,10 @@ async def recall(
                 max_tokens: Maximum tokens to return in results (default: 4096)
                 budget: Search budget - 'low', 'mid', or 'high' (default: 'high'). Higher budgets search more thoroughly.
                 types: Fact types to include (e.g., ['world', 'experience']). Default: all types.
+                prefer_observations: When recalling raw facts together with 'observation', drop any raw fact
+                    that a returned observation was consolidated from, so the observation supersedes it (no
+                    duplicate content). Enabled by default; set false to keep raw facts an observation already
+                    covers. No effect unless 'observation' and a raw type are both in types. Default: True.
                 tags: Optional tags to filter results by (e.g., ['project:alpha']). Mutually exclusive with tag_groups.
                 tags_match: How to match tags - 'any' (match any tag) or 'all' (match all tags). Default: 'any'
                 tag_groups: Compound tag filter using boolean groups (AND-ed together). Each group is a leaf
@@ -943,6 +954,7 @@ async def recall(
                     "bank_id": target_bank,
                     "query": query,
                     "fact_type": fact_types,
+                    "prefer_observations": prefer_observations,
                     "budget": budget_enum,
                     "max_tokens": max_tokens,
                     "request_context": _get_request_context(config),
diff --git a/hindsight-api-slim/tests/test_recall_prefer_observations.py b/hindsight-api-slim/tests/test_recall_prefer_observations.py
new file mode 100644
index 000000000..3c5054f75
--- /dev/null
+++ b/hindsight-api-slim/tests/test_recall_prefer_observations.py
@@ -0,0 +1,185 @@
+"""Tests for the recall `prefer_observations` deduplication flag.
+
+When the caller recalls raw facts ('world'/'experience') together with
+'observation' and sets prefer_observations=True, any raw fact that a returned
+observation was consolidated from (tracked via memory_units.source_memory_ids)
+is dropped so the observation supersedes it — no duplicate content.
+
+Dedup is provenance-based, not semantic: a raw fact that is semantically
+similar to an observation but NOT listed in its source_memory_ids must survive.
+
+No LLM required — inserts memory_units directly via SQL with real embeddings.
+"""
+
+import uuid
+
+import pytest
+import pytest_asyncio
+
+from hindsight_api import MemoryEngine, RequestContext
+from hindsight_api.engine.retain import embedding_utils
+
+RC = RequestContext(tenant_id="default")
+
+QUERY = "Alice mountain hiking"
+
+# Two raw facts the observation is consolidated from (must be dropped when the
+# flag is on), one raw fact that is semantically similar but NOT a source (must
+# survive), and the observation itself.
+SRC1_TEXT = "Alice loves hiking in the mountains"
+SRC2_TEXT = "Alice hikes the Alps every summer"
+NON_SRC_TEXT = "Alice enjoys exploring mountain hiking trails"
+OBS_TEXT = "Alice is an avid mountain hiker"
+
+
+async def _insert_unit(
+    conn,
+    *,
+    unit_id: str,
+    text: str,
+    bank_id: str,
+    embedding_str: str,
+    fact_type: str = "world",
+    source_memory_ids: list[uuid.UUID] | None = None,
+) -> None:
+    await conn.execute(
+        """
+        INSERT INTO memory_units (id, bank_id, text, fact_type, embedding, source_memory_ids)
+        VALUES ($1, $2, $3, $4, $5::vector, $6::uuid[])
+        """,
+        unit_id,
+        bank_id,
+        text,
+        fact_type,
+        embedding_str,
+        source_memory_ids,
+    )
+
+
+def _to_str(emb: list[float]) -> str:
+    return "[" + ",".join(str(v) for v in emb) + "]"
+
+
+def _result_ids(result) -> set[str]:
+    return {str(r.id) for r in result.results}
+
+
+@pytest_asyncio.fixture
+async def seeded_obs_memory(memory_no_llm_verify: MemoryEngine):
+    """Seed two source facts, one non-source fact, and an observation over the two sources."""
+    engine = memory_no_llm_verify
+    bank_id = f"test-prefer-obs-{uuid.uuid4().hex[:8]}"
+    await engine.get_bank_profile(bank_id, request_context=RC)
+
+    src1_id = str(uuid.uuid4())
+    src2_id = str(uuid.uuid4())
+    non_src_id = str(uuid.uuid4())
+    obs_id = str(uuid.uuid4())
+
+    embeddings = await embedding_utils.generate_embeddings_batch(
+        engine.embeddings,
+        [SRC1_TEXT, SRC2_TEXT, NON_SRC_TEXT, OBS_TEXT],
+    )
+
+    pool = await engine._get_pool()
+    async with pool.acquire() as conn:
+        await _insert_unit(conn, unit_id=src1_id, text=SRC1_TEXT, bank_id=bank_id, embedding_str=_to_str(embeddings[0]))
+        await _insert_unit(conn, unit_id=src2_id, text=SRC2_TEXT, bank_id=bank_id, embedding_str=_to_str(embeddings[1]))
+        await _insert_unit(
+            conn, unit_id=non_src_id, text=NON_SRC_TEXT, bank_id=bank_id, embedding_str=_to_str(embeddings[2])
+        )
+        await _insert_unit(
+            conn,
+            unit_id=obs_id,
+            text=OBS_TEXT,
+            bank_id=bank_id,
+            embedding_str=_to_str(embeddings[3]),
+            fact_type="observation",
+            source_memory_ids=[uuid.UUID(src1_id), uuid.UUID(src2_id)],
+        )
+
+    ids = {"src1": src1_id, "src2": src2_id, "non_src": non_src_id, "obs": obs_id}
+    yield engine, bank_id, ids
+
+    await engine.delete_bank(bank_id, request_context=RC)
+
+
+class TestPreferObservations:
+    async def test_disabled_returns_sources_and_observation(self, seeded_obs_memory):
+        """Without the flag, the source facts AND the observation are all returned."""
+        engine, bank_id, ids = seeded_obs_memory
+        result = await engine.recall_async(
+            bank_id=bank_id,
+            query=QUERY,
+            request_context=RC,
+            fact_type=["world", "experience", "observation"],
+            prefer_observations=False,
+            max_tokens=10000,
+        )
+        found = _result_ids(result)
+        assert ids["src1"] in found
+        assert ids["src2"] in found
+        assert ids["obs"] in found
+
+    async def test_enabled_drops_source_facts_keeps_observation(self, seeded_obs_memory):
+        """With the flag, the observation supersedes the facts it was consolidated from."""
+        engine, bank_id, ids = seeded_obs_memory
+        result = await engine.recall_async(
+            bank_id=bank_id,
+            query=QUERY,
+            request_context=RC,
+            fact_type=["world", "experience", "observation"],
+            prefer_observations=True,
+            max_tokens=10000,
+        )
+        found = _result_ids(result)
+        assert ids["obs"] in found, "the observation must remain"
+        assert ids["src1"] not in found, "source fact 1 is superseded by the observation"
+        assert ids["src2"] not in found, "source fact 2 is superseded by the observation"
+
+    async def test_enabled_keeps_non_source_fact(self, seeded_obs_memory):
+        """Dedup is provenance-based: a similar fact NOT in source_memory_ids survives."""
+        engine, bank_id, ids = seeded_obs_memory
+        result = await engine.recall_async(
+            bank_id=bank_id,
+            query=QUERY,
+            request_context=RC,
+            fact_type=["world", "experience", "observation"],
+            prefer_observations=True,
+            max_tokens=10000,
+        )
+        found = _result_ids(result)
+        assert ids["non_src"] in found, "a non-source fact must not be dropped, even if semantically similar"
+
+    async def test_noop_without_observation_type(self, seeded_obs_memory):
+        """The flag is a no-op when 'observation' is not among the requested types."""
+        engine, bank_id, ids = seeded_obs_memory
+        result = await engine.recall_async(
+            bank_id=bank_id,
+            query=QUERY,
+            request_context=RC,
+            fact_type=["world", "experience"],
+            prefer_observations=True,
+            max_tokens=10000,
+        )
+        found = _result_ids(result)
+        assert ids["src1"] in found
+        assert ids["src2"] in found
+
+
+def test_default_split_user_facing_on_engine_off():
+    """The user-facing default is on; the engine default is off.
+
+    The HTTP RecallRequest (and MCP recall tool) default prefer_observations to
+    True so callers get dedup automatically. The engine method defaults it to
+    False on purpose so internal callers — notably consolidation, which needs the
+    raw facts it folds into observations — are never silently deduped.
+    """
+    import inspect
+
+    from hindsight_api.api.http import RecallRequest
+    from hindsight_api.engine.memory_engine import MemoryEngine
+
+    assert RecallRequest(query="anything").prefer_observations is True
+    engine_default = inspect.signature(MemoryEngine.recall_async).parameters["prefer_observations"].default
+    assert engine_default is False
diff --git a/hindsight-clients/go/api/openapi.yaml b/hindsight-clients/go/api/openapi.yaml
index 2e8b9d5f2..37cc5159d 100644
--- a/hindsight-clients/go/api/openapi.yaml
+++ b/hindsight-clients/go/api/openapi.yaml
@@ -7113,6 +7113,18 @@ components:
             type: string
           nullable: true
           type: array
+        prefer_observations:
+          default: true
+          description: "When recalling raw facts ('world'/'experience') together with\
+            \ 'observation', drop any raw fact that an observation in the results\
+            \ was consolidated from, so the observation supersedes it and you don't\
+            \ get duplicate content. The freed slots are backfilled with the next\
+            \ results, keeping the result count at the requested budget. Enabled by\
+            \ default; set to false to return raw facts even when an observation already\
+            \ covers them. No effect unless 'observation' and at least one raw type\
+            \ are both requested."
+          title: Prefer Observations
+          type: boolean
         budget:
           $ref: '#/components/schemas/Budget'
         max_tokens:
diff --git a/hindsight-clients/go/model_recall_request.go b/hindsight-clients/go/model_recall_request.go
index e8ee279de..2c560833f 100644
--- a/hindsight-clients/go/model_recall_request.go
+++ b/hindsight-clients/go/model_recall_request.go
@@ -23,6 +23,8 @@ var _ MappedNullable = &RecallRequest{}
 type RecallRequest struct {
 	Query string `json:"query"`
 	Types []string `json:"types,omitempty"`
+	// When recalling raw facts ('world'/'experience') together with 'observation', drop any raw fact that an observation in the results was consolidated from, so the observation supersedes it and you don't get duplicate content. The freed slots are backfilled with the next results, keeping the result count at the requested budget. Enabled by default; set to false to return raw facts even when an observation already covers them. No effect unless 'observation' and at least one raw type are both requested.
+	PreferObservations *bool `json:"prefer_observations,omitempty"`
 	Budget *Budget `json:"budget,omitempty"`
 	MaxTokens *int32 `json:"max_tokens,omitempty"`
 	Trace *bool `json:"trace,omitempty"`
@@ -44,6 +46,8 @@ type _RecallRequest RecallRequest
 func NewRecallRequest(query string) *RecallRequest {
 	this := RecallRequest{}
 	this.Query = query
+	var preferObservations bool = true
+	this.PreferObservations = &preferObservations
 	var maxTokens int32 = 4096
 	this.MaxTokens = &maxTokens
 	var trace bool = false
@@ -58,6 +62,8 @@ func NewRecallRequest(query string) *RecallRequest {
 // but it doesn't guarantee that properties required by API are set
 func NewRecallRequestWithDefaults() *RecallRequest {
 	this := RecallRequest{}
+	var preferObservations bool = true
+	this.PreferObservations = &preferObservations
 	var maxTokens int32 = 4096
 	this.MaxTokens = &maxTokens
 	var trace bool = false
@@ -124,6 +130,38 @@ func (o *RecallRequest) SetTypes(v []string) {
 	o.Types = v
 }
 
+// GetPreferObservations returns the PreferObservations field value if set, zero value otherwise.
+func (o *RecallRequest) GetPreferObservations() bool {
+	if o == nil || IsNil(o.PreferObservations) {
+		var ret bool
+		return ret
+	}
+	return *o.PreferObservations
+}
+
+// GetPreferObservationsOk returns a tuple with the PreferObservations field value if set, nil otherwise
+// and a boolean to check if the value has been set.
+func (o *RecallRequest) GetPreferObservationsOk() (*bool, bool) {
+	if o == nil || IsNil(o.PreferObservations) {
+		return nil, false
+	}
+	return o.PreferObservations, true
+}
+
+// HasPreferObservations returns a boolean if a field has been set.
+func (o *RecallRequest) HasPreferObservations() bool {
+	if o != nil && !IsNil(o.PreferObservations) {
+		return true
+	}
+
+	return false
+}
+
+// SetPreferObservations gets a reference to the given bool and assigns it to the PreferObservations field.
+func (o *RecallRequest) SetPreferObservations(v bool) {
+	o.PreferObservations = &v
+}
+
 // GetBudget returns the Budget field value if set, zero value otherwise.
 func (o *RecallRequest) GetBudget() Budget {
 	if o == nil || IsNil(o.Budget) {
@@ -406,6 +444,9 @@ func (o RecallRequest) ToMap() (map[string]interface{}, error) {
 	if o.Types != nil {
 		toSerialize["types"] = o.Types
 	}
+	if !IsNil(o.PreferObservations) {
+		toSerialize["prefer_observations"] = o.PreferObservations
+	}
 	if !IsNil(o.Budget) {
 		toSerialize["budget"] = o.Budget
 	}
diff --git a/hindsight-clients/python/hindsight_client_api/models/recall_request.py b/hindsight-clients/python/hindsight_client_api/models/recall_request.py
index 1035ca7aa..605add5c1 100644
--- a/hindsight-clients/python/hindsight_client_api/models/recall_request.py
+++ b/hindsight-clients/python/hindsight_client_api/models/recall_request.py
@@ -31,6 +31,7 @@ class RecallRequest(BaseModel):
     """ # noqa: E501
     query: StrictStr
     types: Optional[List[StrictStr]] = None
+    prefer_observations: Optional[StrictBool] = Field(default=True, description="When recalling raw facts ('world'/'experience') together with 'observation', drop any raw fact that an observation in the results was consolidated from, so the observation supersedes it and you don't get duplicate content. The freed slots are backfilled with the next results, keeping the result count at the requested budget. Enabled by default; set to false to return raw facts even when an observation already covers them. No effect unless 'observation' and at least one raw type are both requested.")
     budget: Optional[Budget] = None
     max_tokens: Optional[StrictInt] = 4096
     trace: Optional[StrictBool] = False
@@ -39,7 +40,7 @@ class RecallRequest(BaseModel):
     tags: Optional[List[StrictStr]] = None
     tags_match: Optional[StrictStr] = Field(default='any', description="How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), 'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged).")
     tag_groups: Optional[List[MentalModelTriggerInputTagGroupsInner]] = None
-    __properties: ClassVar[List[str]] = ["query", "types", "budget", "max_tokens", "trace", "query_timestamp", "include", "tags", "tags_match", "tag_groups"]
+    __properties: ClassVar[List[str]] = ["query", "types", "prefer_observations", "budget", "max_tokens", "trace", "query_timestamp", "include", "tags", "tags_match", "tag_groups"]
 
     @field_validator('tags_match')
     def tags_match_validate_enum(cls, value):
@@ -134,6 +135,7 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
         _obj = cls.model_validate({
             "query": obj.get("query"),
             "types": obj.get("types"),
+            "prefer_observations": obj.get("prefer_observations") if obj.get("prefer_observations") is not None else True,
             "budget": obj.get("budget"),
             "max_tokens": obj.get("max_tokens") if obj.get("max_tokens") is not None else 4096,
             "trace": obj.get("trace") if obj.get("trace") is not None else False,
diff --git a/hindsight-clients/typescript/generated/types.gen.ts b/hindsight-clients/typescript/generated/types.gen.ts
index 4f20fb290..a0a52d27c 100644
--- a/hindsight-clients/typescript/generated/types.gen.ts
+++ b/hindsight-clients/typescript/generated/types.gen.ts
@@ -2897,6 +2897,12 @@ export type RecallRequest = {
    * List of fact types to recall: 'world', 'experience', 'observation'. Defaults to world and experience if not specified.
    */
   types?: Array<string> | null;
+  /**
+   * Prefer Observations
+   *
+   * When recalling raw facts ('world'/'experience') together with 'observation', drop any raw fact that an observation in the results was consolidated from, so the observation supersedes it and you don't get duplicate content. The freed slots are backfilled with the next results, keeping the result count at the requested budget. Enabled by default; set to false to return raw facts even when an observation already covers them. No effect unless 'observation' and at least one raw type are both requested.
+   */
+  prefer_observations?: boolean;
   budget?: Budget;
   /**
    * Max Tokens
diff --git a/hindsight-control-plane/src/app/api/recall/route.ts b/hindsight-control-plane/src/app/api/recall/route.ts
index 43126dd74..9c5a5e283 100644
--- a/hindsight-control-plane/src/app/api/recall/route.ts
+++ b/hindsight-control-plane/src/app/api/recall/route.ts
@@ -10,6 +10,7 @@ export async function POST(request: NextRequest) {
       query,
       types,
       fact_type,
+      prefer_observations,
       max_tokens,
       trace,
       budget,
@@ -25,6 +26,7 @@ export async function POST(request: NextRequest) {
       body: {
         query,
         types: types || fact_type,
+        prefer_observations,
         max_tokens,
         trace,
         budget: budget || "mid",
diff --git a/hindsight-control-plane/src/lib/api.ts b/hindsight-control-plane/src/lib/api.ts
index bf7497227..4e4a3193b 100644
--- a/hindsight-control-plane/src/lib/api.ts
+++ b/hindsight-control-plane/src/lib/api.ts
@@ -343,6 +343,7 @@ export class ControlPlaneClient {
   async recall(params: {
     query: string;
     types?: string[];
+    prefer_observations?: boolean;
     bank_id: string;
     budget?: string;
     max_tokens?: number;
diff --git a/hindsight-docs/docs/developer/api/recall.mdx b/hindsight-docs/docs/developer/api/recall.mdx
index 5bff8e43d..686104541 100644
--- a/hindsight-docs/docs/developer/api/recall.mdx
+++ b/hindsight-docs/docs/developer/api/recall.mdx
@@ -82,6 +82,12 @@ Each type runs the full four-strategy retrieval pipeline independently, so narro
 Observations are deduplicated, evidence-grounded beliefs consolidated from multiple facts — preferences, recurring patterns, and durable learnings the memory bank has built up. Each observation references its supporting memories (with exact quotes), and is refined rather than overwritten when new evidence arrives. They are created and maintained automatically in the background after retain operations.
 :::
 
+### prefer_observations
+
+Because observations are consolidated from raw facts, recalling `observation` alongside `world` and `experience` can return the same information twice — once as the raw fact and once folded into an observation. With `prefer_observations` you get the best of both: you still recall every type, but whenever an observation in the results was built from a raw fact, that raw fact is dropped so the observation supersedes it. The freed slots are backfilled with the next-best results, so you don't lose coverage.
+
+This lets you ask for everything without choosing between "raw facts only" (no consolidation) and "observations only" (which may lag behind the latest retains while consolidation catches up). **Enabled by default** — set it to `false` to return raw facts even when an observation already covers them. It has no effect unless both `observation` and at least one of `world`/`experience` are included in `types`.
+
 ### budget
 
 Controls retrieval depth and breadth. Accepted values are `low`, `mid` (default), and `high`. Use `low` for fast simple lookups, `mid` for balanced everyday queries, and `high` when you need to find indirect connections or exhaustive coverage.
diff --git a/hindsight-docs/static/openapi.json b/hindsight-docs/static/openapi.json
index 4b06c5c53..13641402e 100644
--- a/hindsight-docs/static/openapi.json
+++ b/hindsight-docs/static/openapi.json
@@ -10795,6 +10795,12 @@
             "title": "Types",
             "description": "List of fact types to recall: 'world', 'experience', 'observation'. Defaults to world and experience if not specified."
           },
+          "prefer_observations": {
+            "type": "boolean",
+            "title": "Prefer Observations",
+            "description": "When recalling raw facts ('world'/'experience') together with 'observation', drop any raw fact that an observation in the results was consolidated from, so the observation supersedes it and you don't get duplicate content. The freed slots are backfilled with the next results, keeping the result count at the requested budget. Enabled by default; set to false to return raw facts even when an observation already covers them. No effect unless 'observation' and at least one raw type are both requested.",
+            "default": true
+          },
           "budget": {
             "$ref": "#/components/schemas/Budget",
             "default": "mid"