ddtrace/llmobs/_experiment.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -1072,6 +1072,8 @@ def _generate_metric_from_evaluation( @@
                 metric_type = "boolean"
             elif isinstance(eval_value, (int, float)):
                 metric_type = "score"
+            elif isinstance(eval_value, dict):
+                metric_type = "json"
             else:
                 metric_type = "categorical"
                 eval_value = str(eval_value).lower()
@@ Expand Down @@

ddtrace/llmobs/_llmobs.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -2125,9 +2125,9 @@ def submit_evaluation(
  
                    raise ValueError("label value must not contain a '.'.")

                metric_type = metric_type.lower()

                if metric_type not in ("categorical", "score", "boolean"):

                if metric_type not in ("categorical", "score", "boolean", "json"):

                    error = "invalid_metric_type"

                    raise ValueError("metric_type must be one of 'categorical', 'score', or 'boolean'.")

                    raise ValueError("metric_type must be one of 'categorical', 'score', 'boolean', or 'json'.")

                if metric_type == "categorical" and not isinstance(value, str):

                    error = "invalid_metric_value"

    @@ -2138,6 +2138,9 @@ def submit_evaluation(
  
                if metric_type == "boolean" and not isinstance(value, bool):

                    error = "invalid_metric_value"

                    raise TypeError("value must be a boolean for a boolean metric.")

                if metric_type == "json" and not isinstance(value, dict):

                    error = "invalid_metric_value"

                    raise TypeError("value must be a dict for a json metric.")

                if tags is not None and not isinstance(tags, dict):

                    raise LLMObsSubmitEvaluationError("tags must be a dictionary of string key-value pairs.")

ddtrace/llmobs/_telemetry.py

-Original file line number
+Diff line change
@@ Expand Up @@
     def record_llmobs_submit_evaluation(join_on: Dict[str, Any], metric_type: str, error: Optional[str]):
-        _metric_type = metric_type if metric_type in ("categorical", "score", "boolean") else "other"
+        _metric_type = metric_type if metric_type in ("categorical", "score", "boolean", "json") else "other"
         custom_joining_key = str(int(join_on.get("tag") is not None))
         tags = _base_tags(error)
         tags.extend([("metric_type", _metric_type), ("custom_joining_key", custom_joining_key)])
@@ Expand Down @@

ddtrace/llmobs/_writer.py

-Original file line number
+Diff line change
@@ Expand Up @@
         categorical_value: str
         score_value: float
         boolean_value: bool
+        json_value: Dict[str, JSONType]
         error: Optional[Dict[str, str]]
         tags: List[str]
         experiment_id: str
@@ Expand Down @@

releasenotes/notes/llmobs-json-metric-type-2e58cd6c746f9947.yaml

-Original file line number
+Diff line change
@@ -0,0 +1,7 @@
+    ---
+    features:
+      - |
+        LLM Observability: Adds support for ``json`` metric type in evaluation metrics.
+        Users can now submit ``dict`` values as evaluation metrics using ``LLMObs.submit_evaluation()``
+        with ``metric_type="json"``. Additionally, experiment evaluators that return ``dict`` values
+        are automatically detected as ``json`` metric type.

tests/llmobs/test_llmobs_service.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1763,11 +1763,11 @@ def test_submit_evaluation_label_value_with_a_period_raises_error(llmobs, mock_l
  
    def test_submit_evaluation_incorrect_metric_type_raises_error(llmobs, mock_llmobs_logs):

        with pytest.raises(ValueError, match="metric_type must be one of 'categorical', 'score', or 'boolean'."):

        with pytest.raises(ValueError, match="metric_type must be one of 'categorical', 'score', 'boolean', or 'json'."):

            llmobs.submit_evaluation(

                span={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="wrong", value="high"

            )

        with pytest.raises(ValueError, match="metric_type must be one of 'categorical', 'score', or 'boolean'."):

        with pytest.raises(ValueError, match="metric_type must be one of 'categorical', 'score', 'boolean', or 'json'."):

            llmobs.submit_evaluation(

                span={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="", value="high"

            )

    @@ -2163,3 +2163,10 @@ def test_submit_evaluation_incorrect_categorical_value_type_raises_error(llmobs,
  
            llmobs.submit_evaluation(

                span={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="categorical", value=123

            )

    def test_submit_evaluation_incorrect_json_value_type_raises_error(llmobs, mock_llmobs_logs):

        with pytest.raises(TypeError, match="value must be a dict for a json metric."):

            llmobs.submit_evaluation(

                span={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="json", value="high"

            )

feat(llmobs): add json metric type support for evaluations #16344

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

aniszoubiramar merged 1 commit into main from anis.amar/MLOB-5400/support-json-output-in-the-sdk

Feb 5, 2026

+25 −5

-Original file line number
+Diff line change
@@ Expand Up / @@ -1072,6 +1072,8 @@ def _generate_metric_from_evaluation( @@
                 metric_type = "boolean"
             elif isinstance(eval_value, (int, float)):
                 metric_type = "score"
+            elif isinstance(eval_value, dict):
+                metric_type = "json"
             else:
                 metric_type = "categorical"
                 eval_value = str(eval_value).lower()
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up @@
     def record_llmobs_submit_evaluation(join_on: Dict[str, Any], metric_type: str, error: Optional[str]):
-        _metric_type = metric_type if metric_type in ("categorical", "score", "boolean") else "other"
+        _metric_type = metric_type if metric_type in ("categorical", "score", "boolean", "json") else "other"
         custom_joining_key = str(int(join_on.get("tag") is not None))
         tags = _base_tags(error)
         tags.extend([("metric_type", _metric_type), ("custom_joining_key", custom_joining_key)])
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up @@
         categorical_value: str
         score_value: float
         boolean_value: bool
+        json_value: Dict[str, JSONType]
         error: Optional[Dict[str, str]]
         tags: List[str]
         experiment_id: str
@@ Expand Down @@

-Original file line number
+Diff line change
@@ -0,0 +1,7 @@
+    ---
+    features:
+      - |
+        LLM Observability: Adds support for ``json`` metric type in evaluation metrics.
+        Users can now submit ``dict`` values as evaluation metrics using ``LLMObs.submit_evaluation()``
+        with ``metric_type="json"``. Additionally, experiment evaluators that return ``dict`` values
+        are automatically detected as ``json`` metric type.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat(llmobs): add json metric type support for evaluations #16344

Diff view

Diff view

There are no files selected for viewing

Uh oh!