diff --git a/runtime/datamate-python/tests/conftest.py b/runtime/datamate-python/tests/conftest.py
new file mode 100644
index 00000000..9321d5f7
--- /dev/null
+++ b/runtime/datamate-python/tests/conftest.py
@@ -0,0 +1,23 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+from types import ModuleType
+
+
+def _register_namespace(module_name: str, module_path: Path) -> None:
+    namespace_pkg = ModuleType(module_name)
+    namespace_pkg.__path__ = [str(module_path)]  # type: ignore[attr-defined]
+    sys.modules.setdefault(module_name, namespace_pkg)
+
+
+def pytest_sessionstart(session) -> None:
+    """避免测试导入 app.module.* 时触发 app/module/__init__.py 的重依赖加载。"""
+    root = Path(__file__).resolve().parents[1] / "app" / "module"
+
+    _register_namespace("app.module", root)
+    _register_namespace("app.module.cleaning", root / "cleaning")
+    _register_namespace("app.module.cleaning.service", root / "cleaning" / "service")
+    _register_namespace("app.module.rag", root / "rag")
+    _register_namespace("app.module.rag.service", root / "rag" / "service")
+    _register_namespace("app.module.rag.service.common", root / "rag" / "service" / "common")
diff --git a/runtime/datamate-python/tests/test_dataset_service.py b/runtime/datamate-python/tests/test_dataset_service.py
new file mode 100644
index 00000000..fbfcd8d8
--- /dev/null
+++ b/runtime/datamate-python/tests/test_dataset_service.py
@@ -0,0 +1,124 @@
+# -*- coding: utf-8 -*-
+
+import unittest
+from unittest.mock import MagicMock, AsyncMock
+import sys
+import os
+
+# 确保 runtime/datamate-python 目录在 sys.path 中
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+APP_DIR = os.path.dirname(TEST_DIR)
+sys.path.insert(0, APP_DIR)
+
+from app.module.dataset.service.service import Service
+from app.module.dataset.schema import DatasetResponse, PagedDatasetFileResponse, DatasetFileResponse
+from app.db.models import Dataset, DatasetFiles
+
+
+class TestDatasetService(unittest.IsolatedAsyncioTestCase):
+
+    def setUp(self):
+        # 创建模拟的 AsyncSession 对象
+        self.mock_db = MagicMock()
+        self.mock_db.execute = AsyncMock()
+        self.mock_db.commit = AsyncMock()
+        self.mock_db.rollback = AsyncMock()
+        self.mock_db.flush = AsyncMock()
+        
+        # 初始化 Service
+        self.service = Service(self.mock_db)
+
+    async def test_get_dataset_success(self):
+        """测试正常获取数据集详情"""
+        # 准备 Mock 数据
+        mock_dataset = Dataset(
+            id="test-dataset-id",
+            name="Test Dataset",
+            description="A test description",
+            dataset_type="TEXT",
+            status="DRAFT",
+            file_count=5,
+            size_bytes=1024,
+            created_by="system"
+        )
+        
+        # 模拟 db.execute 返回值
+        mock_result = MagicMock()
+        mock_result.scalar_one_or_none.return_value = mock_dataset
+        self.mock_db.execute.return_value = mock_result
+
+        # 执行测试
+        response = await self.service.get_dataset("test-dataset-id")
+
+        # 校验结果
+        self.assertIsNotNone(response)
+        self.assertEqual(response.id, "test-dataset-id")
+        self.assertEqual(response.name, "Test Dataset")
+        self.assertEqual(response.description, "A test description")
+        self.assertEqual(response.datasetType, "TEXT")
+        self.assertEqual(response.status, "DRAFT")
+        self.assertEqual(response.fileCount, 5)
+        self.assertEqual(response.totalSize, 1024)
+
+    async def test_get_dataset_not_found(self):
+        """测试获取不存在的数据集时返回 None"""
+        # 模拟数据库未找到数据
+        mock_result = MagicMock()
+        mock_result.scalar_one_or_none.return_value = None
+        self.mock_db.execute.return_value = mock_result
+
+        # 执行并验证
+        response = await self.service.get_dataset("non-existent-id")
+        self.assertIsNone(response)
+
+    async def test_create_dataset_success(self):
+        """测试创建数据集成功流程"""
+        # 1. 模拟名称不存在检查 (select Dataset.name) -> 返回 None
+        mock_result_check = MagicMock()
+        mock_result_check.scalar_one_or_none.return_value = None
+        self.mock_db.execute.return_value = mock_result_check
+
+        # 2. 调用创建服务
+        response = await self.service.create_dataset(
+            name="New Dataset",
+            dataset_type="IMAGE",
+            description="Testing create_dataset API",
+            status="PUBLISHED"
+        )
+
+        # 3. 验证结果
+        self.assertIsNotNone(response)
+        self.assertEqual(response.name, "New Dataset")
+        self.assertEqual(response.datasetType, "IMAGE")
+        self.assertEqual(response.description, "Testing create_dataset API")
+        self.assertEqual(response.status, "PUBLISHED")
+        
+        # 确认 db.add 和 db.commit 被调用
+        self.mock_db.add.assert_called_once()
+        self.mock_db.commit.assert_called_once()
+
+    async def test_create_dataset_duplicated_name(self):
+        """测试创建重名的数据集时抛出异常"""
+        # 模拟冲突的已有数据集
+        existing_dataset = Dataset(
+            id="existing-id",
+            name="Existing Dataset"
+        )
+        mock_result = MagicMock()
+        mock_result.scalar_one_or_none.return_value = existing_dataset
+        self.mock_db.execute.return_value = mock_result
+
+        # 检查是否正如预期抛出包含关键字 Exception
+        with self.assertRaises(Exception) as context:
+            await self.service.create_dataset(
+                name="Existing Dataset",
+                dataset_type="AUDIO"
+            )
+        self.assertIn("already exists", str(context.exception))
+        
+        # 校验事务有无进行 commit
+        self.mock_db.commit.assert_not_called()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/runtime/datamate-python/tests/test_module_annotation.py b/runtime/datamate-python/tests/test_module_annotation.py
new file mode 100644
index 00000000..5ae3a184
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_annotation.py
@@ -0,0 +1,132 @@
+from app.module.annotation.utils.config_validator import LabelStudioConfigValidator
+import pytest
+
+
+def test_validate_xml_success_with_object_and_control() -> None:
+    xml = """<View>
+    <Image name=\"image\" value=\"$image\"/>
+    <Choices name=\"label\" toName=\"image\">
+        <Choice value=\"Cat\"/>
+        <Choice value=\"Dog\"/>
+    </Choices>
+</View>"""
+
+    valid, error = LabelStudioConfigValidator.validate_xml(xml)
+
+    assert valid is True
+    assert error is None
+
+
+def test_validate_xml_fails_when_no_controls() -> None:
+    xml = """<View><Image name=\"image\" value=\"$image\"/></View>"""
+
+    valid, error = LabelStudioConfigValidator.validate_xml(xml)
+
+    assert valid is False
+    assert "No annotation controls" in (error or "")
+
+
+def test_validate_configuration_json_rejects_unknown_object_reference() -> None:
+    config = {
+        "labels": [
+            {
+                "fromName": "sentiment",
+                "toName": "missing_object",
+                "type": "Choices",
+                "options": ["positive", "negative"],
+            }
+        ],
+        "objects": [
+            {"name": "text", "type": "Text", "value": "$text"}
+        ],
+    }
+
+    valid, error = LabelStudioConfigValidator.validate_configuration_json(config)
+
+    assert valid is False
+    assert "unknown object" in (error or "")
+
+
+def test_extract_label_values() -> None:
+    xml = """<View>
+    <Text name=\"text\" value=\"$text\"/>
+    <Choices name=\"sentiment\" toName=\"text\">
+        <Choice value=\"positive\"/>
+        <Choice value=\"negative\"/>
+    </Choices>
+</View>"""
+
+    labels = LabelStudioConfigValidator.extract_label_values(xml)
+
+    assert labels == {"sentiment": ["positive", "negative"]}
+
+
+def test_validate_xml_rejects_invalid_root() -> None:
+    xml = """<Root><Text name=\"text\" value=\"$text\"/></Root>"""
+
+    valid, error = LabelStudioConfigValidator.validate_xml(xml)
+
+    assert valid is False
+    assert "Root element must be <View>" in (error or "")
+
+
+def test_validate_configuration_json_requires_labels() -> None:
+    valid, error = LabelStudioConfigValidator.validate_configuration_json({"objects": []})
+
+    assert valid is False
+    assert "Missing 'labels' field" in (error or "")
+
+
+def test_validate_xml_fails_for_invalid_xml() -> None:
+    xml = "<View><Text></View>"
+    valid, error = LabelStudioConfigValidator.validate_xml(xml)
+    assert valid is False
+    assert "XML parse error" in (error or "")
+
+
+@pytest.mark.parametrize(
+    "label,error_text",
+    [
+        ({"toName": "obj", "type": "Choices", "options": ["A"]}, "fromName"),
+        ({"fromName": "lbl", "type": "Choices", "options": ["A"]}, "toName"),
+        ({"fromName": "lbl", "toName": "obj", "options": ["A"]}, "type"),
+    ],
+)
+def test_validate_label_definition_required_fields(label, error_text: str) -> None:
+    valid, error = LabelStudioConfigValidator._validate_label_definition(label)
+    assert valid is False
+    assert error_text in (error or "")
+
+
+def test_validate_label_definition_rejects_unsupported_type() -> None:
+    label = {
+        "fromName": "x",
+        "toName": "obj",
+        "type": "NotSupported",
+    }
+    valid, error = LabelStudioConfigValidator._validate_label_definition(label)
+    assert valid is False
+    assert "Unsupported control type" in (error or "")
+
+
+def test_validate_object_definition_rejects_value_without_dollar_prefix() -> None:
+    obj = {"name": "txt", "type": "Text", "value": "text"}
+    valid, error = LabelStudioConfigValidator._validate_object_definition(obj)
+    assert valid is False
+    assert "must start with '$'" in (error or "")
+
+
+def test_extract_label_values_returns_empty_on_invalid_xml() -> None:
+    labels = LabelStudioConfigValidator.extract_label_values("<broken")
+    assert labels == {}
+
+
+def test_validate_xml_requires_control_name_and_to_name() -> None:
+    xml = """<View>
+    <Text name=\"text\" value=\"$text\"/>
+    <Choices toName=\"text\"><Choice value=\"A\"/></Choices>
+</View>"""
+    valid, error = LabelStudioConfigValidator.validate_xml(xml)
+    assert valid is False
+    assert "Missing 'name' attribute" in (error or "")
+
diff --git a/runtime/datamate-python/tests/test_module_cleaning.py b/runtime/datamate-python/tests/test_module_cleaning.py
new file mode 100644
index 00000000..3e902041
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_cleaning.py
@@ -0,0 +1,99 @@
+import pytest
+
+from app.core.exception import BusinessError
+from app.module.cleaning.schema.cleaning import OperatorInstanceDto
+from app.module.cleaning.service.cleaning_task_validator import CleaningTaskValidator
+from app.module.operator.constants import CATEGORY_DATAMATE_ID, CATEGORY_DATA_JUICER_ID
+
+
+def _op(op_id: str, inputs: str | None, outputs: str | None, categories: list[str] | None = None) -> OperatorInstanceDto:
+    return OperatorInstanceDto(id=op_id, inputs=inputs, outputs=outputs, categories=categories)
+
+
+def test_check_input_and_output_passes_with_multimodal() -> None:
+    instances = [
+        _op("a", "text", "multimodal"),
+        _op("b", "image", "text"),
+    ]
+
+    CleaningTaskValidator.check_input_and_output(instances)
+
+
+def test_check_input_and_output_raises_on_type_mismatch() -> None:
+    instances = [
+        _op("a", "text", "image"),
+        _op("b", "text", "text"),
+    ]
+
+    with pytest.raises(BusinessError):
+        CleaningTaskValidator.check_input_and_output(instances)
+
+
+def test_check_and_get_executor_type_raises_when_mixed_categories() -> None:
+    instances = [
+        _op("a", None, None, [CATEGORY_DATAMATE_ID]),
+        _op("b", None, None, [CATEGORY_DATA_JUICER_ID]),
+    ]
+
+    with pytest.raises(BusinessError):
+        CleaningTaskValidator.check_and_get_executor_type(instances)
+
+
+def test_check_and_get_executor_type_defaults_to_datamate() -> None:
+    instances = [_op("a", None, None, None)]
+
+    executor = CleaningTaskValidator.check_and_get_executor_type(instances)
+
+    assert executor == "datamate"
+
+
+def test_check_task_id_raises_when_empty() -> None:
+    with pytest.raises(BusinessError):
+        CleaningTaskValidator.check_task_id("")
+
+
+def test_check_task_id_accepts_normal_value() -> None:
+    CleaningTaskValidator.check_task_id("task-1")
+
+
+def test_check_input_and_output_returns_for_empty_instances() -> None:
+    CleaningTaskValidator.check_input_and_output([])
+
+
+def test_check_input_and_output_raises_when_current_has_no_outputs() -> None:
+    instances = [
+        _op("a", "text", None),
+        _op("b", "text", "text"),
+    ]
+    with pytest.raises(BusinessError):
+        CleaningTaskValidator.check_input_and_output(instances)
+
+
+def test_check_input_and_output_raises_when_next_has_no_inputs() -> None:
+    instances = [
+        _op("a", "text", "text"),
+        _op("b", None, "text"),
+    ]
+    with pytest.raises(BusinessError):
+        CleaningTaskValidator.check_input_and_output(instances)
+
+
+@pytest.mark.parametrize(
+    "out_type,in_type",
+    [
+        ("text", "text"),
+        (" image ", "image"),
+        ("AUDIO", "audio"),
+    ],
+)
+def test_check_input_and_output_allows_exact_match_with_normalization(out_type: str, in_type: str) -> None:
+    instances = [
+        _op("a", "x", out_type),
+        _op("b", in_type, "y"),
+    ]
+    CleaningTaskValidator.check_input_and_output(instances)
+
+
+def test_check_and_get_executor_type_prefers_datajuicer_when_only_datajuicer() -> None:
+    instances = [_op("a", None, None, [CATEGORY_DATA_JUICER_ID])]
+    assert CleaningTaskValidator.check_and_get_executor_type(instances) == "default"
diff --git a/runtime/datamate-python/tests/test_module_collection.py b/runtime/datamate-python/tests/test_module_collection.py
new file mode 100644
index 00000000..072ddfe9
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_collection.py
@@ -0,0 +1,125 @@
+import pytest
+from types import SimpleNamespace
+from app.module.collection.schema.collection import converter_to_response, converter_execution_to_response
+
+from app.module.collection.schema.collection import (
+    CollectionConfig,
+    CollectionTaskCreate,
+    CollectionTaskUpdate,
+    SyncMode,
+    convert_for_create,
+)
+
+
+def test_collection_task_update_rejects_blank_schedule_expression() -> None:
+    with pytest.raises(ValueError):
+        CollectionTaskUpdate(schedule_expression="   ")
+
+
+def test_collection_task_update_rejects_non_positive_timeout() -> None:
+    with pytest.raises(ValueError):
+        CollectionTaskUpdate(timeout_seconds=0)
+
+
+def test_convert_for_create_handles_sync_mode_schedule_expression() -> None:
+    config = CollectionConfig(parameter={"k": "v"})
+
+    scheduled = CollectionTaskCreate(
+        name="task-scheduled",
+        sync_mode=SyncMode.SCHEDULED,
+        schedule_expression="0 0 * * *",
+        config=config,
+        template_id="tpl-1",
+    )
+    once = CollectionTaskCreate(
+        name="task-once",
+        sync_mode=SyncMode.ONCE,
+        schedule_expression="0 0 * * *",
+        config=config,
+        template_id="tpl-1",
+    )
+
+    scheduled_record = convert_for_create(scheduled, "task-1")
+    once_record = convert_for_create(once, "task-2")
+
+    assert scheduled_record.schedule_expression == "0 0 * * *"
+    assert once_record.schedule_expression is None
+    assert scheduled_record.target_path == "/dataset/local/task-1"
+
+
+def test_collection_task_update_accepts_positive_timeout() -> None:
+    updated = CollectionTaskUpdate(timeout_seconds=30)
+    assert updated.timeout_seconds == 30
+
+
+def test_convert_for_create_sets_pending_status() -> None:
+    config = CollectionConfig(parameter={"k": "v"})
+    once = CollectionTaskCreate(
+        name="task-once",
+        sync_mode=SyncMode.ONCE,
+        config=config,
+        template_id="tpl-1",
+    )
+
+    record = convert_for_create(once, "task-3")
+
+    assert record.status == "PENDING"
+
+
+def test_collection_task_update_accepts_none_fields() -> None:
+    updated = CollectionTaskUpdate()
+    assert updated.timeout_seconds is None
+    assert updated.config is None
+
+
+def test_converter_to_response_maps_json_config() -> None:
+    task = SimpleNamespace(
+        id="t1",
+        name="task",
+        description="desc",
+        sync_mode="ONCE",
+        template_id="tpl",
+        template_name="template",
+        target_path="/dataset/local/t1",
+        config='{"parameter": {"a": 1}}',
+        schedule_expression=None,
+        status="PENDING",
+        retry_count=3,
+        timeout_seconds=60,
+        last_execution_id="e1",
+        created_at=None,
+        updated_at=None,
+        created_by="u",
+        updated_by="u",
+    )
+
+    response = converter_to_response(task)
+
+    assert response.id == "t1"
+    assert response.config.parameter == {"a": 1}
+    assert response.status.value == "PENDING"
+
+
+def test_converter_execution_to_response_maps_fields() -> None:
+    execution = SimpleNamespace(
+        id="e1",
+        task_id="t1",
+        task_name="task",
+        status="RUNNING",
+        log_path="/x.log",
+        started_at=None,
+        completed_at=None,
+        duration_seconds=1,
+        error_message=None,
+        created_at=None,
+        updated_at=None,
+        created_by="u",
+        updated_by="u",
+    )
+
+    response = converter_execution_to_response(execution)
+
+    assert response.id == "e1"
+    assert response.task_id == "t1"
+    assert response.status == "RUNNING"
+
diff --git a/runtime/datamate-python/tests/test_module_dataset.py b/runtime/datamate-python/tests/test_module_dataset.py
new file mode 100644
index 00000000..c860420c
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_dataset.py
@@ -0,0 +1,145 @@
+import asyncio
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+from app.module.dataset.service.service import Service
+
+
+def _run(coro):
+    return asyncio.run(coro)
+
+
+def test_create_dataset_uses_default_status_when_not_provided() -> None:
+    db = MagicMock()
+    first_result = MagicMock()
+    first_result.scalar_one_or_none.return_value = None
+    db.execute = AsyncMock(return_value=first_result)
+    db.flush = AsyncMock()
+    db.commit = AsyncMock()
+    db.rollback = AsyncMock()
+
+    service = Service(db)
+    response = _run(service.create_dataset(name="ds1", dataset_type="TEXT", description="desc"))
+
+    assert response.status == "DRAFT"
+    assert response.name == "ds1"
+    db.commit.assert_called_once()
+
+
+def test_get_dataset_returns_none_when_execute_fails() -> None:
+    db = MagicMock()
+    db.execute = AsyncMock(side_effect=RuntimeError("db unavailable"))
+
+    service = Service(db)
+    response = _run(service.get_dataset("dataset-1"))
+
+    assert response is None
+
+
+def test_get_file_download_url_returns_file_path() -> None:
+    db = MagicMock()
+    fake_file = SimpleNamespace(file_path="/dataset/ds1/a.txt")
+    result = MagicMock()
+    result.scalar_one_or_none.return_value = fake_file
+    db.execute = AsyncMock(return_value=result)
+
+    service = Service(db)
+    file_path = _run(service.get_file_download_url("ds1", "file1"))
+
+    assert file_path == "/dataset/ds1/a.txt"
+
+
+def test_create_dataset_raises_for_duplicated_name() -> None:
+    db = MagicMock()
+    duplicated = SimpleNamespace(id="d1", name="dup")
+    result = MagicMock()
+    result.scalar_one_or_none.return_value = duplicated
+    db.execute = AsyncMock(return_value=result)
+    db.rollback = AsyncMock()
+
+    service = Service(db)
+
+    try:
+        _run(service.create_dataset(name="dup", dataset_type="TEXT"))
+        raised = False
+    except Exception as exc:  # noqa: BLE001
+        raised = True
+        assert "already exists" in str(exc)
+
+    assert raised is True
+    db.rollback.assert_called_once()
+
+
+def test_get_file_download_url_returns_none_when_file_missing() -> None:
+    db = MagicMock()
+    result = MagicMock()
+    result.scalar_one_or_none.return_value = None
+    db.execute = AsyncMock(return_value=result)
+
+    service = Service(db)
+    file_path = _run(service.get_file_download_url("ds1", "missing"))
+
+    assert file_path is None
+
+
+def test_get_dataset_returns_none_when_not_found() -> None:
+    db = MagicMock()
+    result = MagicMock()
+    result.scalar_one_or_none.return_value = None
+    db.execute = AsyncMock(return_value=result)
+
+    service = Service(db)
+    response = _run(service.get_dataset("not-exist"))
+
+    assert response is None
+
+
+def test_get_dataset_files_returns_paged_response() -> None:
+    db = MagicMock()
+    count_result = MagicMock()
+    count_result.scalar_one.return_value = 2
+    files_result = MagicMock()
+    files_result.scalars.return_value.all.return_value = [
+        SimpleNamespace(
+            id="f1",
+            file_name="a.txt",
+            file_type="txt",
+            file_path="/dataset/a.txt",
+            file_size=12,
+            status="ACTIVE",
+            upload_time=None,
+            last_access_time=None,
+            tags=[],
+            tags_updated_at=None,
+        ),
+        SimpleNamespace(
+            id="f2",
+            file_name="b.txt",
+            file_type="txt",
+            file_path="/dataset/b.txt",
+            file_size=20,
+            status="ACTIVE",
+            upload_time=None,
+            last_access_time=None,
+            tags=[],
+            tags_updated_at=None,
+        ),
+    ]
+    db.execute = AsyncMock(side_effect=[count_result, files_result])
+
+    service = Service(db)
+    response = _run(service.get_dataset_files("ds1", page=0, size=10))
+
+    assert response is not None
+    assert response.totalElements == 2
+    assert len(response.content) == 2
+    assert response.content[0].fileName == "a.txt"
+
+
+def test_get_dataset_files_returns_none_when_query_fails() -> None:
+    db = MagicMock()
+    db.execute = AsyncMock(side_effect=RuntimeError("query fail"))
+    service = Service(db)
+
+    response = _run(service.get_dataset_files("ds1"))
+    assert response is None
diff --git a/runtime/datamate-python/tests/test_module_evaluation.py b/runtime/datamate-python/tests/test_module_evaluation.py
new file mode 100644
index 00000000..a95159c0
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_evaluation.py
@@ -0,0 +1,48 @@
+from app.module.evaluation.schema.prompt import EVALUATION_PROMPT_TEMPLATE
+from app.module.evaluation.service.prompt_template_service import PromptTemplateService
+
+
+def test_get_prompt_templates_size_matches_source() -> None:
+    response = PromptTemplateService.get_prompt_templates()
+    assert len(response.templates) == len(EVALUATION_PROMPT_TEMPLATE)
+
+
+def test_get_prompt_templates_dimensions_are_mapped() -> None:
+    response = PromptTemplateService.get_prompt_templates()
+    assert response.templates
+
+    first = response.templates[0]
+    assert isinstance(first.evalType, str)
+    assert isinstance(first.prompt, str)
+    for dim in first.defaultDimensions:
+        assert isinstance(dim.dimension, str)
+        assert isinstance(dim.description, str)
+
+
+def test_get_prompt_templates_all_items_have_eval_type_and_prompt() -> None:
+    response = PromptTemplateService.get_prompt_templates()
+
+    assert all(item.evalType for item in response.templates)
+    assert all(isinstance(item.prompt, str) for item in response.templates)
+
+
+def test_get_prompt_templates_preserves_eval_type_order() -> None:
+    response = PromptTemplateService.get_prompt_templates()
+    expected = [item.get("evalType", "") for item in EVALUATION_PROMPT_TEMPLATE]
+    actual = [item.evalType for item in response.templates]
+    assert actual == expected
+
+
+def test_get_prompt_templates_handles_empty_dimensions() -> None:
+    response = PromptTemplateService.get_prompt_templates()
+    for idx, raw in enumerate(EVALUATION_PROMPT_TEMPLATE):
+        if not raw.get("defaultDimensions"):
+            assert response.templates[idx].defaultDimensions == []
+
+
+def test_prompt_template_dimension_fields_are_non_none() -> None:
+    response = PromptTemplateService.get_prompt_templates()
+    for item in response.templates:
+        for dim in item.defaultDimensions:
+            assert dim.dimension is not None
+            assert dim.description is not None
diff --git a/runtime/datamate-python/tests/test_module_generation.py b/runtime/datamate-python/tests/test_module_generation.py
new file mode 100644
index 00000000..50f92330
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_generation.py
@@ -0,0 +1,51 @@
+import pytest
+
+from app.module.generation.schema.generation import SynthesisType
+from app.module.generation.service.prompt import (
+    ANSWER_GENERATOR_PROMPT,
+    COT_GENERATOR_PROMPT,
+    QUESTION_GENERATOR_PROMPT,
+    get_prompt,
+)
+
+
+def test_get_prompt_dispatches_by_synthesis_type() -> None:
+    assert get_prompt(SynthesisType.QA) == ANSWER_GENERATOR_PROMPT
+    assert get_prompt(SynthesisType.COT) == COT_GENERATOR_PROMPT
+    assert get_prompt(SynthesisType.QUESTION) == QUESTION_GENERATOR_PROMPT
+
+
+def test_get_prompt_raises_for_unsupported_type() -> None:
+    with pytest.raises(ValueError):
+        get_prompt("UNKNOWN")
+
+
+def test_synthesis_type_values_are_stable() -> None:
+    assert SynthesisType.QA.value == "QA"
+    assert SynthesisType.COT.value == "COT"
+    assert SynthesisType.QUESTION.value == "QUESTION"
+
+
+def test_get_prompt_error_contains_unsupported_type() -> None:
+    with pytest.raises(ValueError) as exc:
+        get_prompt("X")
+
+    assert "Unsupported synthesis type" in str(exc.value)
+
+
+@pytest.mark.parametrize(
+    "synth_type,required_text",
+    [
+        (SynthesisType.QA, "output"),
+        (SynthesisType.COT, "chain_of_thought"),
+        (SynthesisType.QUESTION, "JSON"),
+    ],
+)
+def test_get_prompt_contains_expected_keywords(synth_type: SynthesisType, required_text: str) -> None:
+    prompt = get_prompt(synth_type)
+    assert required_text in prompt
+
+
+def test_synthesis_type_enum_values_are_unique() -> None:
+    values = [t.value for t in SynthesisType]
+    assert len(values) == len(set(values))
diff --git a/runtime/datamate-python/tests/test_module_operator.py b/runtime/datamate-python/tests/test_module_operator.py
new file mode 100644
index 00000000..73f4d566
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_operator.py
@@ -0,0 +1,66 @@
+from unittest.mock import MagicMock
+
+import pytest
+
+from app.module.operator.parsers.parser_holder import ParserHolder
+from app.module.operator.parsers.zip_parser import ZipParser
+
+
+def test_get_parser_returns_zip_parser() -> None:
+    holder = ParserHolder()
+    parser = holder.get_parser("abc.zip")
+    assert isinstance(parser, ZipParser)
+
+
+def test_get_parser_raises_for_unsupported_file() -> None:
+    holder = ParserHolder()
+    with pytest.raises(ValueError):
+        holder.get_parser("abc.txt")
+
+
+def test_extract_to_delegates_to_target_parser() -> None:
+    holder = ParserHolder()
+    fake_parser = MagicMock()
+    holder._parsers["zip"] = fake_parser
+
+    holder.extract_to("zip", "archive.zip", "target")
+
+    fake_parser.extract_to.assert_called_once_with("archive.zip", "target")
+
+
+def test_get_parser_supports_uppercase_extension() -> None:
+    holder = ParserHolder()
+    parser = holder.get_parser("ABC.ZIP")
+    assert isinstance(parser, ZipParser)
+
+
+def test_parse_yaml_from_archive_delegates_to_selected_parser() -> None:
+    holder = ParserHolder()
+    fake_parser = MagicMock()
+    fake_result = object()
+    fake_parser.parse_yaml_from_archive.return_value = fake_result
+    holder._parsers["zip"] = fake_parser
+
+    result = holder.parse_yaml_from_archive("zip", "a.zip", "metadata.yml")
+
+    assert result is fake_result
+    fake_parser.parse_yaml_from_archive.assert_called_once_with("a.zip", "metadata.yml", None, None)
+
+
+@pytest.mark.parametrize("name", ["a.tar", "a.gz", "a.tgz"])
+def test_get_parser_supports_tar_like_extensions(name: str) -> None:
+    holder = ParserHolder()
+    parser = holder.get_parser(name)
+    assert parser is not None
+
+
+def test_parse_yaml_from_archive_raises_when_type_unsupported() -> None:
+    holder = ParserHolder()
+    with pytest.raises(ValueError):
+        holder.parse_yaml_from_archive("rar", "a.rar", "metadata.yml")
+
+
+def test_extract_to_raises_when_type_unsupported() -> None:
+    holder = ParserHolder()
+    with pytest.raises(ValueError):
+        holder.extract_to("rar", "a.rar", "tmp")
diff --git a/runtime/datamate-python/tests/test_module_orchestration.py b/runtime/datamate-python/tests/test_module_orchestration.py
new file mode 100644
index 00000000..a7432f15
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_orchestration.py
@@ -0,0 +1,32 @@
+from pathlib import Path
+
+
+def test_orchestration_module_has_no_python_sources_yet() -> None:
+    module_dir = Path(__file__).resolve().parents[1] / "app" / "module" / "orchestration"
+    py_files = list(module_dir.rglob("*.py"))
+
+    assert py_files == [], (
+        "orchestration 模块已有 Python 实现，请补充真实业务单测并删除该占位用例"
+    )
+
+
+def test_orchestration_module_scaffold_directories_exist() -> None:
+    root = Path(__file__).resolve().parents[1] / "app" / "module" / "orchestration"
+
+    assert (root / "interface").exists()
+    assert (root / "schema").exists()
+    assert (root / "service").exists()
+
+
+def test_orchestration_scaffold_contains_only_directories_or_cache() -> None:
+    root = Path(__file__).resolve().parents[1] / "app" / "module" / "orchestration"
+    names = {p.name for p in root.iterdir()}
+    assert "interface" in names
+    assert "schema" in names
+    assert "service" in names
+
+
+def test_orchestration_module_path_exists() -> None:
+    root = Path(__file__).resolve().parents[1] / "app" / "module" / "orchestration"
+    assert root.exists()
+    assert root.is_dir()
diff --git a/runtime/datamate-python/tests/test_module_rag.py b/runtime/datamate-python/tests/test_module_rag.py
new file mode 100644
index 00000000..2a61e72b
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_rag.py
@@ -0,0 +1,49 @@
+import pytest
+
+from app.module.rag.service.common.text_cleaner import TextCleaner
+
+
+def test_clean_removes_control_chars_and_empty_lines() -> None:
+    raw = "Hello\x00   world\n\n\n\tLine2\n"
+
+    cleaned = TextCleaner.clean(raw)
+
+    assert cleaned == "Hello world\n Line2"
+
+
+def test_has_printable_content() -> None:
+    assert TextCleaner.has_printable_content("   \n\t") is False
+    assert TextCleaner.has_printable_content("  数据A ") is True
+
+
+def test_clean_returns_empty_string_for_none_or_empty() -> None:
+    assert TextCleaner.clean(None) == ""
+    assert TextCleaner.clean("") == ""
+
+
+def test_clean_normalizes_multiple_spaces() -> None:
+    cleaned = TextCleaner.clean("A   B\t\tC")
+    assert cleaned == "A B C"
+
+
+def test_remove_control_characters_private_method_behavior() -> None:
+    cleaned = TextCleaner._remove_control_characters("ab\x01\x02cd")
+    assert cleaned == "abcd"
+
+
+def test_remove_empty_lines_private_method_behavior() -> None:
+    text = "line1\n\n  \nline2\n"
+    assert TextCleaner._remove_empty_lines(text) == "line1\nline2"
+
+
+@pytest.mark.parametrize(
+    "text,expected",
+    [
+        ("", False),
+        ("\n\t ", False),
+        ("A", True),
+        (" 1 ", True),
+    ],
+)
+def test_has_printable_content_parametrized(text: str, expected: bool) -> None:
+    assert TextCleaner.has_printable_content(text) is expected
diff --git a/runtime/datamate-python/tests/test_module_ratio.py b/runtime/datamate-python/tests/test_module_ratio.py
new file mode 100644
index 00000000..323763b5
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_ratio.py
@@ -0,0 +1,88 @@
+import pytest
+
+from app.module.ratio.schema.ratio_task import CreateRatioTaskRequest, FilterCondition
+
+
+def test_filter_condition_rejects_bad_date_range_order() -> None:
+    with pytest.raises(ValueError):
+        FilterCondition(dateRange=["2025-01-02", "2025-01-01"])
+
+
+def test_create_ratio_task_request_validates_numeric_totals() -> None:
+    with pytest.raises(ValueError):
+        CreateRatioTaskRequest(name="r1", totals="abc", config=[])
+
+
+def test_create_ratio_task_request_accepts_valid_numeric_values() -> None:
+    request = CreateRatioTaskRequest(
+        name="ratio-task",
+        totals="10",
+        config=[
+            {
+                "datasetId": "ds-1",
+                "counts": "5",
+                "filterConditions": {
+                    "dateRange": ["2025-01-01", "2025-01-31"],
+                    "label": {"label": "intent", "value": "A"},
+                },
+            }
+        ],
+    )
+
+    assert request.totals == "10"
+    assert request.config[0].counts == "5"
+
+
+def test_filter_condition_rejects_invalid_date_range_length() -> None:
+    with pytest.raises(ValueError):
+        FilterCondition(dateRange=["2025-01-01"])
+
+
+def test_create_ratio_task_request_rejects_non_numeric_counts() -> None:
+    with pytest.raises(ValueError):
+        CreateRatioTaskRequest(
+            name="ratio-task",
+            totals="10",
+            config=[
+                {
+                    "datasetId": "ds-1",
+                    "counts": "x",
+                    "filterConditions": {"dateRange": ["2025-01-01", "2025-01-02"]},
+                }
+            ],
+        )
+
+
+def test_filter_condition_accepts_none_date_range() -> None:
+    cond = FilterCondition(dateRange=None)
+    assert cond.date_range is None
+
+
+def test_filter_condition_rejects_invalid_date_string() -> None:
+    with pytest.raises(ValueError):
+        FilterCondition(dateRange=["bad-date", "2025-01-01"])
+
+
+def test_create_ratio_task_request_accepts_zero_totals() -> None:
+    req = CreateRatioTaskRequest(name="r0", totals="0", config=[])
+    assert req.totals == "0"
+
+
+def test_create_ratio_task_request_rejects_negative_totals() -> None:
+    with pytest.raises(ValueError):
+        CreateRatioTaskRequest(name="r1", totals="-1", config=[])
+
+
+def test_create_ratio_task_request_alias_mapping_for_dataset_id() -> None:
+    req = CreateRatioTaskRequest(
+        name="ratio-task",
+        totals="2",
+        config=[
+            {
+                "datasetId": "ds-alias",
+                "counts": "1",
+                "filterConditions": {"dateRange": ["2025-01-01", "2025-01-02"]},
+            }
+        ],
+    )
+    assert req.config[0].dataset_id == "ds-alias"
diff --git a/runtime/datamate-python/tests/test_module_shared.py b/runtime/datamate-python/tests/test_module_shared.py
new file mode 100644
index 00000000..e81b7e49
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_shared.py
@@ -0,0 +1,77 @@
+import json
+
+import pytest
+
+from app.module.shared.util.structured_file import (
+    COTItemHandler,
+    ItemTypes,
+    QAItemHandler,
+    StructuredFileHandlerFactory,
+)
+
+
+def test_qa_handler_validate_json_accepts_alpaca_item() -> None:
+    handler = QAItemHandler()
+    assert handler.validate_json({"instruction": "i", "output": "o"}) is True
+
+
+def test_get_items_from_jsonl_skips_invalid_rows(tmp_path) -> None:
+    file_path = tmp_path / "qa.jsonl"
+    rows = [
+        {"instruction": "i1", "output": "o1"},
+        {"instruction": "missing_output"},
+        {"instruction": "i2", "output": "o2"},
+    ]
+    file_path.write_text("\n".join(json.dumps(r, ensure_ascii=False) for r in rows), encoding="utf-8")
+
+    handler = QAItemHandler()
+    items = handler.get_items_from_file(str(file_path))
+
+    assert len(items) == 2
+    assert items[0]["output"] == "o1"
+    assert items[1]["output"] == "o2"
+
+
+def test_factory_get_handler_rejects_unknown_item_type() -> None:
+    factory = StructuredFileHandlerFactory()
+    with pytest.raises(ValueError):
+        factory.get_handler("UNKNOWN")
+
+
+def test_qa_handler_validate_json_rejects_invalid_item() -> None:
+    handler = QAItemHandler()
+    assert handler.validate_json({"input": "x"}) is False
+
+
+def test_factory_get_handler_returns_qa_handler() -> None:
+    factory = StructuredFileHandlerFactory()
+    handler = factory.get_handler(ItemTypes.QA.value)
+    assert isinstance(handler, QAItemHandler)
+
+
+def test_get_items_from_json_file_for_qa(tmp_path) -> None:
+    file_path = tmp_path / "qa.json"
+    file_path.write_text(
+        json.dumps([
+            {"instruction": "q1", "output": "a1"},
+            {"instruction": "q2", "output": "a2"},
+        ], ensure_ascii=False),
+        encoding="utf-8",
+    )
+
+    handler = QAItemHandler()
+    items = handler.get_items_from_file(str(file_path))
+
+    assert len(items) == 2
+    assert items[0]["instruction"] == "q1"
+
+
+def test_cot_handler_validate_json_requires_question_field() -> None:
+    handler = COTItemHandler()
+    assert handler.validate_json({"instruction": "x", "output": "y"}) is False
+
+
+def test_factory_get_handler_returns_cot_handler() -> None:
+    factory = StructuredFileHandlerFactory()
+    handler = factory.get_handler(ItemTypes.COT.value)
+    assert isinstance(handler, COTItemHandler)
diff --git a/runtime/datamate-python/tests/test_module_synthesis.py b/runtime/datamate-python/tests/test_module_synthesis.py
new file mode 100644
index 00000000..2417308d
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_synthesis.py
@@ -0,0 +1,32 @@
+from pathlib import Path
+
+
+def test_synthesis_module_has_no_python_sources_yet() -> None:
+    module_dir = Path(__file__).resolve().parents[1] / "app" / "module" / "synthesis"
+    py_files = list(module_dir.rglob("*.py"))
+
+    assert py_files == [], (
+        "synthesis 模块已有 Python 实现，请补充真实业务单测并删除该占位用例"
+    )
+
+
+def test_synthesis_module_scaffold_directories_exist() -> None:
+    root = Path(__file__).resolve().parents[1] / "app" / "module" / "synthesis"
+
+    assert (root / "interface").exists()
+    assert (root / "schema").exists()
+    assert (root / "service").exists()
+
+
+def test_synthesis_scaffold_contains_only_expected_directories_or_cache() -> None:
+    root = Path(__file__).resolve().parents[1] / "app" / "module" / "synthesis"
+    names = {p.name for p in root.iterdir()}
+    assert "interface" in names
+    assert "schema" in names
+    assert "service" in names
+
+
+def test_synthesis_module_path_exists() -> None:
+    root = Path(__file__).resolve().parents[1] / "app" / "module" / "synthesis"
+    assert root.exists()
+    assert root.is_dir()
diff --git a/runtime/datamate-python/tests/test_module_system.py b/runtime/datamate-python/tests/test_module_system.py
new file mode 100644
index 00000000..01349fef
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_system.py
@@ -0,0 +1,67 @@
+import asyncio
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+from app.module.system.service.common_service import get_model_by_id
+
+
+def _run(coro):
+    return asyncio.run(coro)
+
+
+def test_get_model_by_id_returns_model_when_found() -> None:
+    db = MagicMock()
+    model = SimpleNamespace(id="m1")
+    result = MagicMock()
+    result.scalar_one_or_none.return_value = model
+    db.execute = AsyncMock(return_value=result)
+
+    fetched = _run(get_model_by_id(db, "m1"))
+
+    assert fetched is model
+
+
+def test_get_model_by_id_returns_none_when_missing() -> None:
+    db = MagicMock()
+    result = MagicMock()
+    result.scalar_one_or_none.return_value = None
+    db.execute = AsyncMock(return_value=result)
+
+    fetched = _run(get_model_by_id(db, "missing"))
+
+    assert fetched is None
+
+
+def test_get_model_by_id_invokes_db_execute_once() -> None:
+    db = MagicMock()
+    result = MagicMock()
+    result.scalar_one_or_none.return_value = None
+    db.execute = AsyncMock(return_value=result)
+
+    _run(get_model_by_id(db, "m2"))
+
+    db.execute.assert_called_once()
+
+
+def test_get_model_by_id_passes_query_object_to_execute() -> None:
+    db = MagicMock()
+    result = MagicMock()
+    result.scalar_one_or_none.return_value = None
+    db.execute = AsyncMock(return_value=result)
+
+    _run(get_model_by_id(db, "model-xyz"))
+
+    args, _ = db.execute.call_args
+    assert len(args) == 1
+    assert args[0] is not None
+
+
+def test_get_model_by_id_returns_exact_scalar_object() -> None:
+    db = MagicMock()
+    model_obj = SimpleNamespace(id="m100", endpoint="x")
+    result = MagicMock()
+    result.scalar_one_or_none.return_value = model_obj
+    db.execute = AsyncMock(return_value=result)
+
+    fetched = _run(get_model_by_id(db, "m100"))
+    assert fetched is model_obj