Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions hindsight-api-slim/hindsight_api/api/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,12 +286,16 @@ class RecallRequest(BaseModel):
)
tags: list[str] | None = Field(
default=None,
description="Filter memories by tags. If not specified, all memories are returned.",
description="Filter memories by tags. If not specified, all memories are returned. "
"Omitting tags (or passing []) together with tags_match='exact' filters to "
"untagged/global observations only (the scope written by observation_scopes='shared').",
)
tags_match: TagsMatch = Field(
default="any",
description="How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), "
"'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged).",
"'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged), "
"'exact' (set-equality on the full scope, excludes untagged). With 'exact' and no tags "
"(or []), the empty global scope is selected and only untagged memories match.",
)
tag_groups: list[TagGroup] | None = Field(
default=None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,10 @@ async def retrieve(
result.activation = row["score"]
results.append(result)

if tags:
results = filter_results_by_tags(results, tags, match=tags_match)
# filter_results_by_tags is a no-op when no filter applies (tags falsy and not
# the exact-empty/global scope), so call it unconditionally — gating on `if tags:`
# would skip the untagged-only filter for tags=[] + tags_match="exact".
results = filter_results_by_tags(results, tags, match=tags_match)

if tag_groups:
results = filter_results_by_tag_groups(results, tag_groups)
Expand Down
34 changes: 30 additions & 4 deletions hindsight-api-slim/hindsight_api/engine/search/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
EXACT matching: Memory matches only if its tag set EQUALS the request tag set (order-
independent). Used for observation "scope" filtering, where each observation lives
under exactly one scope (its full tag set) and "scope [a]" must not match "[a, b]".
An EMPTY request scope (no tags — ``[]`` or ``None``) is the global/untagged scope and
matches only untagged memories — the scope that ``observation_scopes="shared"``
consolidation writes to. This is the one mode where absent tags filter rather than
meaning "no filter"; all other modes treat empty/absent tags as "no filtering". This
mirrors the ``GET .../graph`` endpoint, where ``tags_match="exact"`` with no tags also
selects the global scope.
"""

from __future__ import annotations
Expand Down Expand Up @@ -82,11 +88,16 @@ def build_tags_where_clause(
>>> clause, params, next_offset = build_tags_where_clause(['user_a'], 3, 'mu.', 'any_strict')
>>> print(clause) # "AND mu.tags IS NOT NULL AND mu.tags != '{}' AND mu.tags && $3"
"""
column = f"{table_alias}tags" if table_alias else "tags"

if match == "exact" and not tags:
# Empty/absent scope = global/untagged: match only untagged rows. No bind param
# needed (callers gate the param on truthy `tags`, so none is appended).
return f"AND ({column} IS NULL OR {column} = '{{}}')", [], param_offset

if not tags:
return "", [], param_offset

column = f"{table_alias}tags" if table_alias else "tags"

if match == "exact":
# Set equality (order-independent): superset AND subset. Untagged rows
# (empty array) never satisfy `@>` of a non-empty scope, so they're excluded.
Expand Down Expand Up @@ -126,11 +137,16 @@ def build_tags_where_clause_simple(
Returns:
SQL clause string or empty string.
"""
column = f"{table_alias}tags" if table_alias else "tags"

if match == "exact" and not tags:
# Empty/absent scope = global/untagged: match only untagged rows. No bind param
# needed (callers gate the param on truthy `tags`, so none is appended).
return f"AND ({column} IS NULL OR {column} = '{{}}')"

if not tags:
return ""

column = f"{table_alias}tags" if table_alias else "tags"

if match == "exact":
# Set equality (order-independent): superset AND subset. Untagged rows
# (empty array) never satisfy `@>` of a non-empty scope, so they're excluded.
Expand Down Expand Up @@ -164,6 +180,10 @@ def filter_results_by_tags(
Returns:
Filtered list of results.
"""
if match == "exact" and not tags:
# Empty/absent scope = global/untagged: keep only untagged results.
return [r for r in results if not getattr(r, "tags", None)]

if not tags:
return results

Expand Down Expand Up @@ -267,6 +287,9 @@ def _build_group_clause(
if isinstance(group, TagGroupLeaf):
column = f"{table_alias}tags" if table_alias else "tags"
if group.match == "exact":
if len(group.tags) == 0:
# Empty scope = global/untagged: match only untagged rows (no bind param).
return f"({column} IS NULL OR {column} = '{{}}')", [], param_offset
clause = f"({column} @> ${param_offset} AND {column} <@ ${param_offset})"
return clause, [group.tags], param_offset + 1
operator, include_untagged = _parse_tags_match(group.match)
Expand Down Expand Up @@ -369,6 +392,9 @@ def _match_group(result: object, group: TagGroup) -> bool:
if isinstance(group, TagGroupLeaf):
result_tags = getattr(result, "tags", None)
is_untagged = result_tags is None or len(result_tags) == 0
if group.match == "exact" and len(group.tags) == 0:
# Empty scope = global/untagged: match only untagged results.
return is_untagged
_, include_untagged = _parse_tags_match(group.match)
is_any_match = group.match in ("any", "any_strict")
tags_set = set(group.tags)
Expand Down
104 changes: 104 additions & 0 deletions hindsight-api-slim/tests/test_tags_visibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
TagGroupNot,
TagGroupOr,
build_tag_groups_where_clause,
build_tags_where_clause,
build_tags_where_clause_simple,
filter_results_by_tag_groups,
filter_results_by_tags,
Expand Down Expand Up @@ -136,6 +137,46 @@ def test_tags_match_exact_with_table_alias(self):
assert "@>" in result
assert "<@" in result

# ---- Test "exact" mode with the empty scope ([]) = untagged/global only ----

def test_tags_match_exact_empty_list_matches_untagged_only(self):
"""match='exact' with [] filters to untagged rows only (no bind param)."""
result = build_tags_where_clause_simple([], 5, match="exact")
assert "IS NULL" in result
assert "= '{}'" in result
# Untagged-only is param-free: callers append no tags param for an empty list.
assert "$5" not in result
# Must not use set-equality operators (which would need a bound scope).
assert "@>" not in result
assert "<@" not in result

def test_tags_match_exact_empty_list_with_table_alias(self):
"""Empty-scope exact clause respects the table alias."""
result = build_tags_where_clause_simple([], 5, table_alias="mu.", match="exact")
assert "mu.tags IS NULL" in result
assert "mu.tags = '{}'" in result

def test_tags_match_exact_none_matches_untagged_only(self):
"""match='exact' with None (no tags) selects the global scope, like the graph endpoint."""
result = build_tags_where_clause_simple(None, 5, match="exact")
assert "IS NULL" in result
assert "= '{}'" in result
assert "$5" not in result

def test_tags_match_any_empty_list_still_no_filter(self):
"""Empty list only filters under 'exact'; other modes treat [] as no filter."""
assert build_tags_where_clause_simple([], 5, match="any") == ""
assert build_tags_where_clause_simple([], 5, match="any_strict") == ""

@pytest.mark.parametrize("tags", [None, []])
def test_tags_where_clause_exact_empty_scope_keeps_param_offset(self, tags):
"""The parameterized builder must not consume a bind index for the empty scope,
so following clauses stay aligned with their params."""
clause, params, next_offset = build_tags_where_clause(tags, param_offset=4, match="exact")
assert clause == "AND (tags IS NULL OR tags = '{}')"
assert params == []
assert next_offset == 4

# ---- Test table alias with all modes ----

def test_tags_match_any_with_table_alias(self):
Expand Down Expand Up @@ -255,6 +296,20 @@ def test_exact_mode_excludes_untagged(self):
assert len(filtered) == 1
assert filtered[0].tags == ["a"]

def test_exact_mode_empty_scope_matches_untagged_only(self):
"""'exact' mode with [] should keep only untagged results (NULL or empty)."""
results = [MockResult(["a"]), MockResult(["a", "b"]), MockResult(None), MockResult([])]
filtered = filter_results_by_tags(results, [], match="exact")
assert len(filtered) == 2
assert all(not r.tags for r in filtered)

def test_exact_mode_none_matches_untagged_only(self):
"""'exact' mode with None (no tags) selects the global scope (untagged only)."""
results = [MockResult(["a"]), MockResult(None), MockResult([])]
filtered = filter_results_by_tags(results, None, match="exact")
assert len(filtered) == 2
assert all(not r.tags for r in filtered)

def test_all_mode_includes_untagged(self):
"""'all' mode should include untagged results."""
results = [MockResult(["a", "b"]), MockResult(None), MockResult([])]
Expand Down Expand Up @@ -502,6 +557,16 @@ def test_multiple_top_level_groups_are_anded(self):
assert len(params) == 2
assert next_offset == 3

def test_exact_leaf_empty_scope_matches_untagged_only(self):
"""An exact leaf with [] becomes an untagged-only clause with no bind param."""
groups = [TagGroupLeaf(tags=[], match="exact")]
clause, params, next_offset = build_tag_groups_where_clause(groups, 5)
assert "IS NULL" in clause
assert "= '{}'" in clause
assert "$5" not in clause # param-free
assert params == []
assert next_offset == 5 # offset unchanged — no param consumed


# ============================================================================
# Unit Tests for filter_results_by_tag_groups (Python-side)
Expand Down Expand Up @@ -531,6 +596,14 @@ def test_single_leaf_any_strict_excludes_untagged(self):
assert len(filtered) == 1
assert filtered[0].tags == ["step:5"]

def test_exact_leaf_empty_scope_matches_untagged_only(self):
"""An exact leaf with [] keeps only untagged results (matches SQL builder)."""
groups = [TagGroupLeaf(tags=[], match="exact")]
results = [MockResult(["a"]), MockResult(["a", "b"]), MockResult(None), MockResult([])]
filtered = filter_results_by_tag_groups(results, groups)
assert len(filtered) == 2
assert all(not r.tags for r in filtered)

def test_single_leaf_all_strict_matches_superset(self):
"""Single all_strict leaf matches results that contain all tags."""
groups = [TagGroupLeaf(tags=["user:alice", "step:5"], match="all_strict")]
Expand Down Expand Up @@ -904,6 +977,37 @@ async def test_recall_with_empty_tags_returns_all(api_client, test_bank_id):
assert any("Rachel" in t for t in texts), "Should find Rachel"


@pytest.mark.asyncio
async def test_recall_empty_tags_exact_returns_untagged_only(api_client, test_bank_id):
"""tags=[] with tags_match='exact' returns only untagged/global memories."""
# One untagged (global) memory and one tagged memory.
response = await api_client.post(
f"/v1/default/banks/{test_bank_id}/memories",
json={
"items": [
{"content": "Sam studies astronomy."}, # no tags -> global scope
{"content": "Tina studies geology.", "tags": ["user_tina"]},
]
},
)
assert response.status_code == 200

# exact match on the empty scope -> only the untagged memory.
response = await api_client.post(
f"/v1/default/banks/{test_bank_id}/memories/recall",
json={"query": "Who studies what?", "budget": "low", "tags": [], "tags_match": "exact"},
)
assert response.status_code == 200
results = response.json()["results"]

texts = [r["text"] for r in results]
assert any("Sam" in t for t in texts), "Should find the untagged memory"
assert not any("Tina" in t for t in texts), "Should NOT find the tagged memory"
# Every returned memory must be untagged.
for r in results:
assert not r.get("tags"), f"Expected untagged result, got tags={r.get('tags')}"


@pytest.mark.asyncio
async def test_multi_user_agent_visibility(api_client):
"""
Expand Down
4 changes: 3 additions & 1 deletion hindsight-clients/go/api/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7137,7 +7137,9 @@ components:
default: any
description: "How to match tags: 'any' (OR, includes untagged), 'all' (AND,\
\ includes untagged), 'any_strict' (OR, excludes untagged), 'all_strict'\
\ (AND, excludes untagged)."
\ (AND, excludes untagged), 'exact' (set-equality on the full scope, excludes\
\ untagged). With 'exact' and no tags (or []), the empty global scope\
\ is selected and only untagged memories match."
enum:
- any
- all
Expand Down
2 changes: 1 addition & 1 deletion hindsight-clients/go/model_recall_request.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class RecallRequest(BaseModel):
query_timestamp: Optional[StrictStr] = None
include: Optional[IncludeOptions] = Field(default=None, description="Options for including additional data (entities are included by default)")
tags: Optional[List[StrictStr]] = None
tags_match: Optional[StrictStr] = Field(default='any', description="How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), 'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged).")
tags_match: Optional[StrictStr] = Field(default='any', description="How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), 'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged), 'exact' (set-equality on the full scope, excludes untagged). With 'exact' and no tags (or []), the empty global scope is selected and only untagged memories match.")
tag_groups: Optional[List[MentalModelTriggerInputTagGroupsInner]] = None
__properties: ClassVar[List[str]] = ["query", "types", "budget", "max_tokens", "trace", "query_timestamp", "include", "tags", "tags_match", "tag_groups"]

Expand Down
4 changes: 2 additions & 2 deletions hindsight-clients/typescript/generated/types.gen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2919,13 +2919,13 @@ export type RecallRequest = {
/**
* Tags
*
* Filter memories by tags. If not specified, all memories are returned.
* Filter memories by tags. If not specified, all memories are returned. Omitting tags (or passing []) together with tags_match='exact' filters to untagged/global observations only (the scope written by observation_scopes='shared').
*/
tags?: Array<string> | null;
/**
* Tags Match
*
* How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), 'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged).
* How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), 'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged), 'exact' (set-equality on the full scope, excludes untagged). With 'exact' and no tags (or []), the empty global scope is selected and only untagged memories match.
*/
tags_match?: "any" | "all" | "any_strict" | "all_strict" | "exact";
/**
Expand Down
10 changes: 10 additions & 0 deletions hindsight-docs/docs/developer/api/recall.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,16 @@ Returns memories whose tag set is exactly equal to the specified tags, regardles

Use this when filtering a precise observation scope returned by `GET /v1/default/banks/{bank_id}/observations/scopes`, where `["user:alice"]` should not also match observations scoped to `["user:alice", "project:x"]`.

:::tip Filter to global (untagged) observations only
The empty scope is a real scope — it's where `observation_scopes: "shared"` consolidation writes. Set `tags_match: "exact"` with **no tags** (omit `tags`, or pass `[]`) to recall **only** untagged/global memories and exclude every tagged one:

```json
{ "query": "...", "tags": [], "tags_match": "exact" }
```

With any other `tags_match` mode, absent or empty `tags` means "no tag filter" (all memories are eligible). Only under `exact` do absent/empty tags select "the global scope". This is the way to read back just the global observations after you've started using more specific scopes.
:::

### tag_groups

`tag_groups` is a list of compound boolean tag filters. The groups in the list are AND-ed together at the top level. Each group is a recursive boolean expression: a **leaf** node `{tags, match}`, or a **compound** node `{and: [...]}`, `{or: [...]}`, or `{not: ...}`.
Expand Down
2 changes: 1 addition & 1 deletion hindsight-docs/docs/developer/observations.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ By default, observations are scoped to all of a memory's tags combined. The `obs

See [`observation_scopes` in the Retain API](./api/retain#observation_scopes) for the full explanation and options.

To inspect the scopes that already exist in a bank, call `GET /v1/default/banks/{bank_id}/observations/scopes`. The response lists each exact tag set with its observation count; the empty tag list is the global scope. Use a returned scope as `tags` with `tags_match: "exact"` when you need to filter to that precise observation scope without also matching observations that carry extra tags.
To inspect the scopes that already exist in a bank, call `GET /v1/default/banks/{bank_id}/observations/scopes`. The response lists each exact tag set with its observation count; the empty tag list is the global scope. Use a returned scope as `tags` with `tags_match: "exact"` when you need to filter to that precise observation scope without also matching observations that carry extra tags. To recall **only** the global scope — the untagged observations written by `observation_scopes: "shared"` — pass an empty list with exact matching: `tags: []`, `tags_match: "exact"`.

---

Expand Down
4 changes: 2 additions & 2 deletions hindsight-docs/static/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -10839,7 +10839,7 @@
}
],
"title": "Tags",
"description": "Filter memories by tags. If not specified, all memories are returned."
"description": "Filter memories by tags. If not specified, all memories are returned. Omitting tags (or passing []) together with tags_match='exact' filters to untagged/global observations only (the scope written by observation_scopes='shared')."
},
"tags_match": {
"type": "string",
Expand All @@ -10851,7 +10851,7 @@
"exact"
],
"title": "Tags Match",
"description": "How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), 'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged).",
"description": "How to match tags: 'any' (OR, includes untagged), 'all' (AND, includes untagged), 'any_strict' (OR, excludes untagged), 'all_strict' (AND, excludes untagged), 'exact' (set-equality on the full scope, excludes untagged). With 'exact' and no tags (or []), the empty global scope is selected and only untagged memories match.",
"default": "any"
},
"tag_groups": {
Expand Down
Loading