diff --git a/ddtrace/llmobs/_experiment.py b/ddtrace/llmobs/_experiment.py
index 44d3691e8bf..c53bfef98ca 100644
--- a/ddtrace/llmobs/_experiment.py
+++ b/ddtrace/llmobs/_experiment.py
@@ -1072,6 +1072,8 @@ def _generate_metric_from_evaluation(
             metric_type = "boolean"
         elif isinstance(eval_value, (int, float)):
             metric_type = "score"
+        elif isinstance(eval_value, dict):
+            metric_type = "json"
         else:
             metric_type = "categorical"
             eval_value = str(eval_value).lower()
diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 87039a5ca3b..b7aacf4c950 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -2125,9 +2125,9 @@ def submit_evaluation(
                 raise ValueError("label value must not contain a '.'.")
 
             metric_type = metric_type.lower()
-            if metric_type not in ("categorical", "score", "boolean"):
+            if metric_type not in ("categorical", "score", "boolean", "json"):
                 error = "invalid_metric_type"
-                raise ValueError("metric_type must be one of 'categorical', 'score', or 'boolean'.")
+                raise ValueError("metric_type must be one of 'categorical', 'score', 'boolean', or 'json'.")
 
             if metric_type == "categorical" and not isinstance(value, str):
                 error = "invalid_metric_value"
@@ -2138,6 +2138,9 @@ def submit_evaluation(
             if metric_type == "boolean" and not isinstance(value, bool):
                 error = "invalid_metric_value"
                 raise TypeError("value must be a boolean for a boolean metric.")
+            if metric_type == "json" and not isinstance(value, dict):
+                error = "invalid_metric_value"
+                raise TypeError("value must be a dict for a json metric.")
 
             if tags is not None and not isinstance(tags, dict):
                 raise LLMObsSubmitEvaluationError("tags must be a dictionary of string key-value pairs.")
diff --git a/ddtrace/llmobs/_telemetry.py b/ddtrace/llmobs/_telemetry.py
index 463fe545eb0..ee0b733d5b4 100644
--- a/ddtrace/llmobs/_telemetry.py
+++ b/ddtrace/llmobs/_telemetry.py
@@ -197,7 +197,7 @@ def record_llmobs_user_processor_called(error: bool) -> None:
 
 
 def record_llmobs_submit_evaluation(join_on: Dict[str, Any], metric_type: str, error: Optional[str]):
-    _metric_type = metric_type if metric_type in ("categorical", "score", "boolean") else "other"
+    _metric_type = metric_type if metric_type in ("categorical", "score", "boolean", "json") else "other"
     custom_joining_key = str(int(join_on.get("tag") is not None))
     tags = _base_tags(error)
     tags.extend([("metric_type", _metric_type), ("custom_joining_key", custom_joining_key)])
diff --git a/ddtrace/llmobs/_writer.py b/ddtrace/llmobs/_writer.py
index 3ac1631571e..12a8f586237 100644
--- a/ddtrace/llmobs/_writer.py
+++ b/ddtrace/llmobs/_writer.py
@@ -101,6 +101,7 @@ class LLMObsExperimentEvalMetricEvent(TypedDict, total=False):
     categorical_value: str
     score_value: float
     boolean_value: bool
+    json_value: Dict[str, JSONType]
     error: Optional[Dict[str, str]]
     tags: List[str]
     experiment_id: str
diff --git a/releasenotes/notes/llmobs-json-metric-type-2e58cd6c746f9947.yaml b/releasenotes/notes/llmobs-json-metric-type-2e58cd6c746f9947.yaml
new file mode 100644
index 00000000000..cc20d134aa3
--- /dev/null
+++ b/releasenotes/notes/llmobs-json-metric-type-2e58cd6c746f9947.yaml
@@ -0,0 +1,7 @@
+---
+features:
+  - |
+    LLM Observability: Adds support for ``json`` metric type in evaluation metrics. 
+    Users can now submit ``dict`` values as evaluation metrics using ``LLMObs.submit_evaluation()`` 
+    with ``metric_type="json"``. Additionally, experiment evaluators that return ``dict`` values 
+    are automatically detected as ``json`` metric type.
diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py
index 94a1ca0758a..3d2658c2290 100644
--- a/tests/llmobs/test_llmobs_service.py
+++ b/tests/llmobs/test_llmobs_service.py
@@ -1763,11 +1763,11 @@ def test_submit_evaluation_label_value_with_a_period_raises_error(llmobs, mock_l
 
 
 def test_submit_evaluation_incorrect_metric_type_raises_error(llmobs, mock_llmobs_logs):
-    with pytest.raises(ValueError, match="metric_type must be one of 'categorical', 'score', or 'boolean'."):
+    with pytest.raises(ValueError, match="metric_type must be one of 'categorical', 'score', 'boolean', or 'json'."):
         llmobs.submit_evaluation(
             span={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="wrong", value="high"
         )
-    with pytest.raises(ValueError, match="metric_type must be one of 'categorical', 'score', or 'boolean'."):
+    with pytest.raises(ValueError, match="metric_type must be one of 'categorical', 'score', 'boolean', or 'json'."):
         llmobs.submit_evaluation(
             span={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="", value="high"
         )
@@ -2163,3 +2163,10 @@ def test_submit_evaluation_incorrect_categorical_value_type_raises_error(llmobs,
         llmobs.submit_evaluation(
             span={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="categorical", value=123
         )
+
+
+def test_submit_evaluation_incorrect_json_value_type_raises_error(llmobs, mock_llmobs_logs):
+    with pytest.raises(TypeError, match="value must be a dict for a json metric."):
+        llmobs.submit_evaluation(
+            span={"span_id": "123", "trace_id": "456"}, label="toxicity", metric_type="json", value="high"
+        )