diff --git a/runtime/datamate-python/tests/conftest.py b/runtime/datamate-python/tests/conftest.py
new file mode 100644
index 00000000..9321d5f7
--- /dev/null
+++ b/runtime/datamate-python/tests/conftest.py
@@ -0,0 +1,23 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+from types import ModuleType
+
+
+def _register_namespace(module_name: str, module_path: Path) -> None:
+ namespace_pkg = ModuleType(module_name)
+ namespace_pkg.__path__ = [str(module_path)] # type: ignore[attr-defined]
+ sys.modules.setdefault(module_name, namespace_pkg)
+
+
+def pytest_sessionstart(session) -> None:
+ """避免测试导入 app.module.* 时触发 app/module/__init__.py 的重依赖加载。"""
+ root = Path(__file__).resolve().parents[1] / "app" / "module"
+
+ _register_namespace("app.module", root)
+ _register_namespace("app.module.cleaning", root / "cleaning")
+ _register_namespace("app.module.cleaning.service", root / "cleaning" / "service")
+ _register_namespace("app.module.rag", root / "rag")
+ _register_namespace("app.module.rag.service", root / "rag" / "service")
+ _register_namespace("app.module.rag.service.common", root / "rag" / "service" / "common")
diff --git a/runtime/datamate-python/tests/test_dataset_service.py b/runtime/datamate-python/tests/test_dataset_service.py
new file mode 100644
index 00000000..fbfcd8d8
--- /dev/null
+++ b/runtime/datamate-python/tests/test_dataset_service.py
@@ -0,0 +1,124 @@
+# -*- coding: utf-8 -*-
+
+import unittest
+from unittest.mock import MagicMock, AsyncMock
+import sys
+import os
+
+# 确保 runtime/datamate-python 目录在 sys.path 中
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+APP_DIR = os.path.dirname(TEST_DIR)
+sys.path.insert(0, APP_DIR)
+
+from app.module.dataset.service.service import Service
+from app.module.dataset.schema import DatasetResponse, PagedDatasetFileResponse, DatasetFileResponse
+from app.db.models import Dataset, DatasetFiles
+
+
+class TestDatasetService(unittest.IsolatedAsyncioTestCase):
+
+ def setUp(self):
+ # 创建模拟的 AsyncSession 对象
+ self.mock_db = MagicMock()
+ self.mock_db.execute = AsyncMock()
+ self.mock_db.commit = AsyncMock()
+ self.mock_db.rollback = AsyncMock()
+ self.mock_db.flush = AsyncMock()
+
+ # 初始化 Service
+ self.service = Service(self.mock_db)
+
+ async def test_get_dataset_success(self):
+ """测试正常获取数据集详情"""
+ # 准备 Mock 数据
+ mock_dataset = Dataset(
+ id="test-dataset-id",
+ name="Test Dataset",
+ description="A test description",
+ dataset_type="TEXT",
+ status="DRAFT",
+ file_count=5,
+ size_bytes=1024,
+ created_by="system"
+ )
+
+ # 模拟 db.execute 返回值
+ mock_result = MagicMock()
+ mock_result.scalar_one_or_none.return_value = mock_dataset
+ self.mock_db.execute.return_value = mock_result
+
+ # 执行测试
+ response = await self.service.get_dataset("test-dataset-id")
+
+ # 校验结果
+ self.assertIsNotNone(response)
+ self.assertEqual(response.id, "test-dataset-id")
+ self.assertEqual(response.name, "Test Dataset")
+ self.assertEqual(response.description, "A test description")
+ self.assertEqual(response.datasetType, "TEXT")
+ self.assertEqual(response.status, "DRAFT")
+ self.assertEqual(response.fileCount, 5)
+ self.assertEqual(response.totalSize, 1024)
+
+ async def test_get_dataset_not_found(self):
+ """测试获取不存在的数据集时返回 None"""
+ # 模拟数据库未找到数据
+ mock_result = MagicMock()
+ mock_result.scalar_one_or_none.return_value = None
+ self.mock_db.execute.return_value = mock_result
+
+ # 执行并验证
+ response = await self.service.get_dataset("non-existent-id")
+ self.assertIsNone(response)
+
+ async def test_create_dataset_success(self):
+ """测试创建数据集成功流程"""
+ # 1. 模拟名称不存在检查 (select Dataset.name) -> 返回 None
+ mock_result_check = MagicMock()
+ mock_result_check.scalar_one_or_none.return_value = None
+ self.mock_db.execute.return_value = mock_result_check
+
+ # 2. 调用创建服务
+ response = await self.service.create_dataset(
+ name="New Dataset",
+ dataset_type="IMAGE",
+ description="Testing create_dataset API",
+ status="PUBLISHED"
+ )
+
+ # 3. 验证结果
+ self.assertIsNotNone(response)
+ self.assertEqual(response.name, "New Dataset")
+ self.assertEqual(response.datasetType, "IMAGE")
+ self.assertEqual(response.description, "Testing create_dataset API")
+ self.assertEqual(response.status, "PUBLISHED")
+
+ # 确认 db.add 和 db.commit 被调用
+ self.mock_db.add.assert_called_once()
+ self.mock_db.commit.assert_called_once()
+
+ async def test_create_dataset_duplicated_name(self):
+ """测试创建重名的数据集时抛出异常"""
+ # 模拟冲突的已有数据集
+ existing_dataset = Dataset(
+ id="existing-id",
+ name="Existing Dataset"
+ )
+ mock_result = MagicMock()
+ mock_result.scalar_one_or_none.return_value = existing_dataset
+ self.mock_db.execute.return_value = mock_result
+
+ # 检查是否正如预期抛出包含关键字 Exception
+ with self.assertRaises(Exception) as context:
+ await self.service.create_dataset(
+ name="Existing Dataset",
+ dataset_type="AUDIO"
+ )
+ self.assertIn("already exists", str(context.exception))
+
+ # 校验事务有无进行 commit
+ self.mock_db.commit.assert_not_called()
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/runtime/datamate-python/tests/test_module_annotation.py b/runtime/datamate-python/tests/test_module_annotation.py
new file mode 100644
index 00000000..5ae3a184
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_annotation.py
@@ -0,0 +1,132 @@
+from app.module.annotation.utils.config_validator import LabelStudioConfigValidator
+import pytest
+
+
+def test_validate_xml_success_with_object_and_control() -> None:
+ xml = """
+
+
+
+
+
+"""
+
+ valid, error = LabelStudioConfigValidator.validate_xml(xml)
+
+ assert valid is True
+ assert error is None
+
+
+def test_validate_xml_fails_when_no_controls() -> None:
+ xml = """"""
+
+ valid, error = LabelStudioConfigValidator.validate_xml(xml)
+
+ assert valid is False
+ assert "No annotation controls" in (error or "")
+
+
+def test_validate_configuration_json_rejects_unknown_object_reference() -> None:
+ config = {
+ "labels": [
+ {
+ "fromName": "sentiment",
+ "toName": "missing_object",
+ "type": "Choices",
+ "options": ["positive", "negative"],
+ }
+ ],
+ "objects": [
+ {"name": "text", "type": "Text", "value": "$text"}
+ ],
+ }
+
+ valid, error = LabelStudioConfigValidator.validate_configuration_json(config)
+
+ assert valid is False
+ assert "unknown object" in (error or "")
+
+
+def test_extract_label_values() -> None:
+ xml = """
+
+
+
+
+
+"""
+
+ labels = LabelStudioConfigValidator.extract_label_values(xml)
+
+ assert labels == {"sentiment": ["positive", "negative"]}
+
+
+def test_validate_xml_rejects_invalid_root() -> None:
+ xml = """"""
+
+ valid, error = LabelStudioConfigValidator.validate_xml(xml)
+
+ assert valid is False
+ assert "Root element must be " in (error or "")
+
+
+def test_validate_configuration_json_requires_labels() -> None:
+ valid, error = LabelStudioConfigValidator.validate_configuration_json({"objects": []})
+
+ assert valid is False
+ assert "Missing 'labels' field" in (error or "")
+
+
+def test_validate_xml_fails_for_invalid_xml() -> None:
+ xml = ""
+ valid, error = LabelStudioConfigValidator.validate_xml(xml)
+ assert valid is False
+ assert "XML parse error" in (error or "")
+
+
+@pytest.mark.parametrize(
+ "label,error_text",
+ [
+ ({"toName": "obj", "type": "Choices", "options": ["A"]}, "fromName"),
+ ({"fromName": "lbl", "type": "Choices", "options": ["A"]}, "toName"),
+ ({"fromName": "lbl", "toName": "obj", "options": ["A"]}, "type"),
+ ],
+)
+def test_validate_label_definition_required_fields(label, error_text: str) -> None:
+ valid, error = LabelStudioConfigValidator._validate_label_definition(label)
+ assert valid is False
+ assert error_text in (error or "")
+
+
+def test_validate_label_definition_rejects_unsupported_type() -> None:
+ label = {
+ "fromName": "x",
+ "toName": "obj",
+ "type": "NotSupported",
+ }
+ valid, error = LabelStudioConfigValidator._validate_label_definition(label)
+ assert valid is False
+ assert "Unsupported control type" in (error or "")
+
+
+def test_validate_object_definition_rejects_value_without_dollar_prefix() -> None:
+ obj = {"name": "txt", "type": "Text", "value": "text"}
+ valid, error = LabelStudioConfigValidator._validate_object_definition(obj)
+ assert valid is False
+ assert "must start with '$'" in (error or "")
+
+
+def test_extract_label_values_returns_empty_on_invalid_xml() -> None:
+ labels = LabelStudioConfigValidator.extract_label_values(" None:
+ xml = """
+
+
+"""
+ valid, error = LabelStudioConfigValidator.validate_xml(xml)
+ assert valid is False
+ assert "Missing 'name' attribute" in (error or "")
+
diff --git a/runtime/datamate-python/tests/test_module_cleaning.py b/runtime/datamate-python/tests/test_module_cleaning.py
new file mode 100644
index 00000000..3e902041
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_cleaning.py
@@ -0,0 +1,99 @@
+import pytest
+
+from app.core.exception import BusinessError
+from app.module.cleaning.schema.cleaning import OperatorInstanceDto
+from app.module.cleaning.service.cleaning_task_validator import CleaningTaskValidator
+from app.module.operator.constants import CATEGORY_DATAMATE_ID, CATEGORY_DATA_JUICER_ID
+
+
+def _op(op_id: str, inputs: str | None, outputs: str | None, categories: list[str] | None = None) -> OperatorInstanceDto:
+ return OperatorInstanceDto(id=op_id, inputs=inputs, outputs=outputs, categories=categories)
+
+
+def test_check_input_and_output_passes_with_multimodal() -> None:
+ instances = [
+ _op("a", "text", "multimodal"),
+ _op("b", "image", "text"),
+ ]
+
+ CleaningTaskValidator.check_input_and_output(instances)
+
+
+def test_check_input_and_output_raises_on_type_mismatch() -> None:
+ instances = [
+ _op("a", "text", "image"),
+ _op("b", "text", "text"),
+ ]
+
+ with pytest.raises(BusinessError):
+ CleaningTaskValidator.check_input_and_output(instances)
+
+
+def test_check_and_get_executor_type_raises_when_mixed_categories() -> None:
+ instances = [
+ _op("a", None, None, [CATEGORY_DATAMATE_ID]),
+ _op("b", None, None, [CATEGORY_DATA_JUICER_ID]),
+ ]
+
+ with pytest.raises(BusinessError):
+ CleaningTaskValidator.check_and_get_executor_type(instances)
+
+
+def test_check_and_get_executor_type_defaults_to_datamate() -> None:
+ instances = [_op("a", None, None, None)]
+
+ executor = CleaningTaskValidator.check_and_get_executor_type(instances)
+
+ assert executor == "datamate"
+
+
+def test_check_task_id_raises_when_empty() -> None:
+ with pytest.raises(BusinessError):
+ CleaningTaskValidator.check_task_id("")
+
+
+def test_check_task_id_accepts_normal_value() -> None:
+ CleaningTaskValidator.check_task_id("task-1")
+
+
+def test_check_input_and_output_returns_for_empty_instances() -> None:
+ CleaningTaskValidator.check_input_and_output([])
+
+
+def test_check_input_and_output_raises_when_current_has_no_outputs() -> None:
+ instances = [
+ _op("a", "text", None),
+ _op("b", "text", "text"),
+ ]
+ with pytest.raises(BusinessError):
+ CleaningTaskValidator.check_input_and_output(instances)
+
+
+def test_check_input_and_output_raises_when_next_has_no_inputs() -> None:
+ instances = [
+ _op("a", "text", "text"),
+ _op("b", None, "text"),
+ ]
+ with pytest.raises(BusinessError):
+ CleaningTaskValidator.check_input_and_output(instances)
+
+
+@pytest.mark.parametrize(
+ "out_type,in_type",
+ [
+ ("text", "text"),
+ (" image ", "image"),
+ ("AUDIO", "audio"),
+ ],
+)
+def test_check_input_and_output_allows_exact_match_with_normalization(out_type: str, in_type: str) -> None:
+ instances = [
+ _op("a", "x", out_type),
+ _op("b", in_type, "y"),
+ ]
+ CleaningTaskValidator.check_input_and_output(instances)
+
+
+def test_check_and_get_executor_type_prefers_datajuicer_when_only_datajuicer() -> None:
+ instances = [_op("a", None, None, [CATEGORY_DATA_JUICER_ID])]
+ assert CleaningTaskValidator.check_and_get_executor_type(instances) == "default"
diff --git a/runtime/datamate-python/tests/test_module_collection.py b/runtime/datamate-python/tests/test_module_collection.py
new file mode 100644
index 00000000..072ddfe9
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_collection.py
@@ -0,0 +1,125 @@
+import pytest
+from types import SimpleNamespace
+from app.module.collection.schema.collection import converter_to_response, converter_execution_to_response
+
+from app.module.collection.schema.collection import (
+ CollectionConfig,
+ CollectionTaskCreate,
+ CollectionTaskUpdate,
+ SyncMode,
+ convert_for_create,
+)
+
+
+def test_collection_task_update_rejects_blank_schedule_expression() -> None:
+ with pytest.raises(ValueError):
+ CollectionTaskUpdate(schedule_expression=" ")
+
+
+def test_collection_task_update_rejects_non_positive_timeout() -> None:
+ with pytest.raises(ValueError):
+ CollectionTaskUpdate(timeout_seconds=0)
+
+
+def test_convert_for_create_handles_sync_mode_schedule_expression() -> None:
+ config = CollectionConfig(parameter={"k": "v"})
+
+ scheduled = CollectionTaskCreate(
+ name="task-scheduled",
+ sync_mode=SyncMode.SCHEDULED,
+ schedule_expression="0 0 * * *",
+ config=config,
+ template_id="tpl-1",
+ )
+ once = CollectionTaskCreate(
+ name="task-once",
+ sync_mode=SyncMode.ONCE,
+ schedule_expression="0 0 * * *",
+ config=config,
+ template_id="tpl-1",
+ )
+
+ scheduled_record = convert_for_create(scheduled, "task-1")
+ once_record = convert_for_create(once, "task-2")
+
+ assert scheduled_record.schedule_expression == "0 0 * * *"
+ assert once_record.schedule_expression is None
+ assert scheduled_record.target_path == "/dataset/local/task-1"
+
+
+def test_collection_task_update_accepts_positive_timeout() -> None:
+ updated = CollectionTaskUpdate(timeout_seconds=30)
+ assert updated.timeout_seconds == 30
+
+
+def test_convert_for_create_sets_pending_status() -> None:
+ config = CollectionConfig(parameter={"k": "v"})
+ once = CollectionTaskCreate(
+ name="task-once",
+ sync_mode=SyncMode.ONCE,
+ config=config,
+ template_id="tpl-1",
+ )
+
+ record = convert_for_create(once, "task-3")
+
+ assert record.status == "PENDING"
+
+
+def test_collection_task_update_accepts_none_fields() -> None:
+ updated = CollectionTaskUpdate()
+ assert updated.timeout_seconds is None
+ assert updated.config is None
+
+
+def test_converter_to_response_maps_json_config() -> None:
+ task = SimpleNamespace(
+ id="t1",
+ name="task",
+ description="desc",
+ sync_mode="ONCE",
+ template_id="tpl",
+ template_name="template",
+ target_path="/dataset/local/t1",
+ config='{"parameter": {"a": 1}}',
+ schedule_expression=None,
+ status="PENDING",
+ retry_count=3,
+ timeout_seconds=60,
+ last_execution_id="e1",
+ created_at=None,
+ updated_at=None,
+ created_by="u",
+ updated_by="u",
+ )
+
+ response = converter_to_response(task)
+
+ assert response.id == "t1"
+ assert response.config.parameter == {"a": 1}
+ assert response.status.value == "PENDING"
+
+
+def test_converter_execution_to_response_maps_fields() -> None:
+ execution = SimpleNamespace(
+ id="e1",
+ task_id="t1",
+ task_name="task",
+ status="RUNNING",
+ log_path="/x.log",
+ started_at=None,
+ completed_at=None,
+ duration_seconds=1,
+ error_message=None,
+ created_at=None,
+ updated_at=None,
+ created_by="u",
+ updated_by="u",
+ )
+
+ response = converter_execution_to_response(execution)
+
+ assert response.id == "e1"
+ assert response.task_id == "t1"
+ assert response.status == "RUNNING"
+
diff --git a/runtime/datamate-python/tests/test_module_dataset.py b/runtime/datamate-python/tests/test_module_dataset.py
new file mode 100644
index 00000000..c860420c
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_dataset.py
@@ -0,0 +1,145 @@
+import asyncio
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+from app.module.dataset.service.service import Service
+
+
+def _run(coro):
+ return asyncio.run(coro)
+
+
+def test_create_dataset_uses_default_status_when_not_provided() -> None:
+ db = MagicMock()
+ first_result = MagicMock()
+ first_result.scalar_one_or_none.return_value = None
+ db.execute = AsyncMock(return_value=first_result)
+ db.flush = AsyncMock()
+ db.commit = AsyncMock()
+ db.rollback = AsyncMock()
+
+ service = Service(db)
+ response = _run(service.create_dataset(name="ds1", dataset_type="TEXT", description="desc"))
+
+ assert response.status == "DRAFT"
+ assert response.name == "ds1"
+ db.commit.assert_called_once()
+
+
+def test_get_dataset_returns_none_when_execute_fails() -> None:
+ db = MagicMock()
+ db.execute = AsyncMock(side_effect=RuntimeError("db unavailable"))
+
+ service = Service(db)
+ response = _run(service.get_dataset("dataset-1"))
+
+ assert response is None
+
+
+def test_get_file_download_url_returns_file_path() -> None:
+ db = MagicMock()
+ fake_file = SimpleNamespace(file_path="/dataset/ds1/a.txt")
+ result = MagicMock()
+ result.scalar_one_or_none.return_value = fake_file
+ db.execute = AsyncMock(return_value=result)
+
+ service = Service(db)
+ file_path = _run(service.get_file_download_url("ds1", "file1"))
+
+ assert file_path == "/dataset/ds1/a.txt"
+
+
+def test_create_dataset_raises_for_duplicated_name() -> None:
+ db = MagicMock()
+ duplicated = SimpleNamespace(id="d1", name="dup")
+ result = MagicMock()
+ result.scalar_one_or_none.return_value = duplicated
+ db.execute = AsyncMock(return_value=result)
+ db.rollback = AsyncMock()
+
+ service = Service(db)
+
+ try:
+ _run(service.create_dataset(name="dup", dataset_type="TEXT"))
+ raised = False
+ except Exception as exc: # noqa: BLE001
+ raised = True
+ assert "already exists" in str(exc)
+
+ assert raised is True
+ db.rollback.assert_called_once()
+
+
+def test_get_file_download_url_returns_none_when_file_missing() -> None:
+ db = MagicMock()
+ result = MagicMock()
+ result.scalar_one_or_none.return_value = None
+ db.execute = AsyncMock(return_value=result)
+
+ service = Service(db)
+ file_path = _run(service.get_file_download_url("ds1", "missing"))
+
+ assert file_path is None
+
+
+def test_get_dataset_returns_none_when_not_found() -> None:
+ db = MagicMock()
+ result = MagicMock()
+ result.scalar_one_or_none.return_value = None
+ db.execute = AsyncMock(return_value=result)
+
+ service = Service(db)
+ response = _run(service.get_dataset("not-exist"))
+
+ assert response is None
+
+
+def test_get_dataset_files_returns_paged_response() -> None:
+ db = MagicMock()
+ count_result = MagicMock()
+ count_result.scalar_one.return_value = 2
+ files_result = MagicMock()
+ files_result.scalars.return_value.all.return_value = [
+ SimpleNamespace(
+ id="f1",
+ file_name="a.txt",
+ file_type="txt",
+ file_path="/dataset/a.txt",
+ file_size=12,
+ status="ACTIVE",
+ upload_time=None,
+ last_access_time=None,
+ tags=[],
+ tags_updated_at=None,
+ ),
+ SimpleNamespace(
+ id="f2",
+ file_name="b.txt",
+ file_type="txt",
+ file_path="/dataset/b.txt",
+ file_size=20,
+ status="ACTIVE",
+ upload_time=None,
+ last_access_time=None,
+ tags=[],
+ tags_updated_at=None,
+ ),
+ ]
+ db.execute = AsyncMock(side_effect=[count_result, files_result])
+
+ service = Service(db)
+ response = _run(service.get_dataset_files("ds1", page=0, size=10))
+
+ assert response is not None
+ assert response.totalElements == 2
+ assert len(response.content) == 2
+ assert response.content[0].fileName == "a.txt"
+
+
+def test_get_dataset_files_returns_none_when_query_fails() -> None:
+ db = MagicMock()
+ db.execute = AsyncMock(side_effect=RuntimeError("query fail"))
+ service = Service(db)
+
+ response = _run(service.get_dataset_files("ds1"))
+ assert response is None
diff --git a/runtime/datamate-python/tests/test_module_evaluation.py b/runtime/datamate-python/tests/test_module_evaluation.py
new file mode 100644
index 00000000..a95159c0
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_evaluation.py
@@ -0,0 +1,48 @@
+from app.module.evaluation.schema.prompt import EVALUATION_PROMPT_TEMPLATE
+from app.module.evaluation.service.prompt_template_service import PromptTemplateService
+
+
+def test_get_prompt_templates_size_matches_source() -> None:
+ response = PromptTemplateService.get_prompt_templates()
+ assert len(response.templates) == len(EVALUATION_PROMPT_TEMPLATE)
+
+
+def test_get_prompt_templates_dimensions_are_mapped() -> None:
+ response = PromptTemplateService.get_prompt_templates()
+ assert response.templates
+
+ first = response.templates[0]
+ assert isinstance(first.evalType, str)
+ assert isinstance(first.prompt, str)
+ for dim in first.defaultDimensions:
+ assert isinstance(dim.dimension, str)
+ assert isinstance(dim.description, str)
+
+
+def test_get_prompt_templates_all_items_have_eval_type_and_prompt() -> None:
+ response = PromptTemplateService.get_prompt_templates()
+
+ assert all(item.evalType for item in response.templates)
+ assert all(isinstance(item.prompt, str) for item in response.templates)
+
+
+def test_get_prompt_templates_preserves_eval_type_order() -> None:
+ response = PromptTemplateService.get_prompt_templates()
+ expected = [item.get("evalType", "") for item in EVALUATION_PROMPT_TEMPLATE]
+ actual = [item.evalType for item in response.templates]
+ assert actual == expected
+
+
+def test_get_prompt_templates_handles_empty_dimensions() -> None:
+ response = PromptTemplateService.get_prompt_templates()
+ for idx, raw in enumerate(EVALUATION_PROMPT_TEMPLATE):
+ if not raw.get("defaultDimensions"):
+ assert response.templates[idx].defaultDimensions == []
+
+
+def test_prompt_template_dimension_fields_are_non_none() -> None:
+ response = PromptTemplateService.get_prompt_templates()
+ for item in response.templates:
+ for dim in item.defaultDimensions:
+ assert dim.dimension is not None
+ assert dim.description is not None
diff --git a/runtime/datamate-python/tests/test_module_generation.py b/runtime/datamate-python/tests/test_module_generation.py
new file mode 100644
index 00000000..50f92330
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_generation.py
@@ -0,0 +1,51 @@
+import pytest
+
+from app.module.generation.schema.generation import SynthesisType
+from app.module.generation.service.prompt import (
+ ANSWER_GENERATOR_PROMPT,
+ COT_GENERATOR_PROMPT,
+ QUESTION_GENERATOR_PROMPT,
+ get_prompt,
+)
+
+
+def test_get_prompt_dispatches_by_synthesis_type() -> None:
+ assert get_prompt(SynthesisType.QA) == ANSWER_GENERATOR_PROMPT
+ assert get_prompt(SynthesisType.COT) == COT_GENERATOR_PROMPT
+ assert get_prompt(SynthesisType.QUESTION) == QUESTION_GENERATOR_PROMPT
+
+
+def test_get_prompt_raises_for_unsupported_type() -> None:
+ with pytest.raises(ValueError):
+ get_prompt("UNKNOWN")
+
+
+def test_synthesis_type_values_are_stable() -> None:
+ assert SynthesisType.QA.value == "QA"
+ assert SynthesisType.COT.value == "COT"
+ assert SynthesisType.QUESTION.value == "QUESTION"
+
+
+def test_get_prompt_error_contains_unsupported_type() -> None:
+ with pytest.raises(ValueError) as exc:
+ get_prompt("X")
+
+ assert "Unsupported synthesis type" in str(exc.value)
+
+
+@pytest.mark.parametrize(
+ "synth_type,required_text",
+ [
+ (SynthesisType.QA, "output"),
+ (SynthesisType.COT, "chain_of_thought"),
+ (SynthesisType.QUESTION, "JSON"),
+ ],
+)
+def test_get_prompt_contains_expected_keywords(synth_type: SynthesisType, required_text: str) -> None:
+ prompt = get_prompt(synth_type)
+ assert required_text in prompt
+
+
+def test_synthesis_type_enum_values_are_unique() -> None:
+ values = [t.value for t in SynthesisType]
+ assert len(values) == len(set(values))
diff --git a/runtime/datamate-python/tests/test_module_operator.py b/runtime/datamate-python/tests/test_module_operator.py
new file mode 100644
index 00000000..73f4d566
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_operator.py
@@ -0,0 +1,66 @@
+from unittest.mock import MagicMock
+
+import pytest
+
+from app.module.operator.parsers.parser_holder import ParserHolder
+from app.module.operator.parsers.zip_parser import ZipParser
+
+
+def test_get_parser_returns_zip_parser() -> None:
+ holder = ParserHolder()
+ parser = holder.get_parser("abc.zip")
+ assert isinstance(parser, ZipParser)
+
+
+def test_get_parser_raises_for_unsupported_file() -> None:
+ holder = ParserHolder()
+ with pytest.raises(ValueError):
+ holder.get_parser("abc.txt")
+
+
+def test_extract_to_delegates_to_target_parser() -> None:
+ holder = ParserHolder()
+ fake_parser = MagicMock()
+ holder._parsers["zip"] = fake_parser
+
+ holder.extract_to("zip", "archive.zip", "target")
+
+ fake_parser.extract_to.assert_called_once_with("archive.zip", "target")
+
+
+def test_get_parser_supports_uppercase_extension() -> None:
+ holder = ParserHolder()
+ parser = holder.get_parser("ABC.ZIP")
+ assert isinstance(parser, ZipParser)
+
+
+def test_parse_yaml_from_archive_delegates_to_selected_parser() -> None:
+ holder = ParserHolder()
+ fake_parser = MagicMock()
+ fake_result = object()
+ fake_parser.parse_yaml_from_archive.return_value = fake_result
+ holder._parsers["zip"] = fake_parser
+
+ result = holder.parse_yaml_from_archive("zip", "a.zip", "metadata.yml")
+
+ assert result is fake_result
+ fake_parser.parse_yaml_from_archive.assert_called_once_with("a.zip", "metadata.yml", None, None)
+
+
+@pytest.mark.parametrize("name", ["a.tar", "a.gz", "a.tgz"])
+def test_get_parser_supports_tar_like_extensions(name: str) -> None:
+ holder = ParserHolder()
+ parser = holder.get_parser(name)
+ assert parser is not None
+
+
+def test_parse_yaml_from_archive_raises_when_type_unsupported() -> None:
+ holder = ParserHolder()
+ with pytest.raises(ValueError):
+ holder.parse_yaml_from_archive("rar", "a.rar", "metadata.yml")
+
+
+def test_extract_to_raises_when_type_unsupported() -> None:
+ holder = ParserHolder()
+ with pytest.raises(ValueError):
+ holder.extract_to("rar", "a.rar", "tmp")
diff --git a/runtime/datamate-python/tests/test_module_orchestration.py b/runtime/datamate-python/tests/test_module_orchestration.py
new file mode 100644
index 00000000..a7432f15
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_orchestration.py
@@ -0,0 +1,32 @@
+from pathlib import Path
+
+
+def test_orchestration_module_has_no_python_sources_yet() -> None:
+ module_dir = Path(__file__).resolve().parents[1] / "app" / "module" / "orchestration"
+ py_files = list(module_dir.rglob("*.py"))
+
+ assert py_files == [], (
+ "orchestration 模块已有 Python 实现,请补充真实业务单测并删除该占位用例"
+ )
+
+
+def test_orchestration_module_scaffold_directories_exist() -> None:
+ root = Path(__file__).resolve().parents[1] / "app" / "module" / "orchestration"
+
+ assert (root / "interface").exists()
+ assert (root / "schema").exists()
+ assert (root / "service").exists()
+
+
+def test_orchestration_scaffold_contains_only_directories_or_cache() -> None:
+ root = Path(__file__).resolve().parents[1] / "app" / "module" / "orchestration"
+ names = {p.name for p in root.iterdir()}
+ assert "interface" in names
+ assert "schema" in names
+ assert "service" in names
+
+
+def test_orchestration_module_path_exists() -> None:
+ root = Path(__file__).resolve().parents[1] / "app" / "module" / "orchestration"
+ assert root.exists()
+ assert root.is_dir()
diff --git a/runtime/datamate-python/tests/test_module_rag.py b/runtime/datamate-python/tests/test_module_rag.py
new file mode 100644
index 00000000..2a61e72b
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_rag.py
@@ -0,0 +1,49 @@
+import pytest
+
+from app.module.rag.service.common.text_cleaner import TextCleaner
+
+
+def test_clean_removes_control_chars_and_empty_lines() -> None:
+ raw = "Hello\x00 world\n\n\n\tLine2\n"
+
+ cleaned = TextCleaner.clean(raw)
+
+ assert cleaned == "Hello world\n Line2"
+
+
+def test_has_printable_content() -> None:
+ assert TextCleaner.has_printable_content(" \n\t") is False
+ assert TextCleaner.has_printable_content(" 数据A ") is True
+
+
+def test_clean_returns_empty_string_for_none_or_empty() -> None:
+ assert TextCleaner.clean(None) == ""
+ assert TextCleaner.clean("") == ""
+
+
+def test_clean_normalizes_multiple_spaces() -> None:
+ cleaned = TextCleaner.clean("A B\t\tC")
+ assert cleaned == "A B C"
+
+
+def test_remove_control_characters_private_method_behavior() -> None:
+ cleaned = TextCleaner._remove_control_characters("ab\x01\x02cd")
+ assert cleaned == "abcd"
+
+
+def test_remove_empty_lines_private_method_behavior() -> None:
+ text = "line1\n\n \nline2\n"
+ assert TextCleaner._remove_empty_lines(text) == "line1\nline2"
+
+
+@pytest.mark.parametrize(
+ "text,expected",
+ [
+ ("", False),
+ ("\n\t ", False),
+ ("A", True),
+ (" 1 ", True),
+ ],
+)
+def test_has_printable_content_parametrized(text: str, expected: bool) -> None:
+ assert TextCleaner.has_printable_content(text) is expected
diff --git a/runtime/datamate-python/tests/test_module_ratio.py b/runtime/datamate-python/tests/test_module_ratio.py
new file mode 100644
index 00000000..323763b5
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_ratio.py
@@ -0,0 +1,88 @@
+import pytest
+
+from app.module.ratio.schema.ratio_task import CreateRatioTaskRequest, FilterCondition
+
+
+def test_filter_condition_rejects_bad_date_range_order() -> None:
+ with pytest.raises(ValueError):
+ FilterCondition(dateRange=["2025-01-02", "2025-01-01"])
+
+
+def test_create_ratio_task_request_validates_numeric_totals() -> None:
+ with pytest.raises(ValueError):
+ CreateRatioTaskRequest(name="r1", totals="abc", config=[])
+
+
+def test_create_ratio_task_request_accepts_valid_numeric_values() -> None:
+ request = CreateRatioTaskRequest(
+ name="ratio-task",
+ totals="10",
+ config=[
+ {
+ "datasetId": "ds-1",
+ "counts": "5",
+ "filterConditions": {
+ "dateRange": ["2025-01-01", "2025-01-31"],
+ "label": {"label": "intent", "value": "A"},
+ },
+ }
+ ],
+ )
+
+ assert request.totals == "10"
+ assert request.config[0].counts == "5"
+
+
+def test_filter_condition_rejects_invalid_date_range_length() -> None:
+ with pytest.raises(ValueError):
+ FilterCondition(dateRange=["2025-01-01"])
+
+
+def test_create_ratio_task_request_rejects_non_numeric_counts() -> None:
+ with pytest.raises(ValueError):
+ CreateRatioTaskRequest(
+ name="ratio-task",
+ totals="10",
+ config=[
+ {
+ "datasetId": "ds-1",
+ "counts": "x",
+ "filterConditions": {"dateRange": ["2025-01-01", "2025-01-02"]},
+ }
+ ],
+ )
+
+
+def test_filter_condition_accepts_none_date_range() -> None:
+ cond = FilterCondition(dateRange=None)
+ assert cond.date_range is None
+
+
+def test_filter_condition_rejects_invalid_date_string() -> None:
+ with pytest.raises(ValueError):
+ FilterCondition(dateRange=["bad-date", "2025-01-01"])
+
+
+def test_create_ratio_task_request_accepts_zero_totals() -> None:
+ req = CreateRatioTaskRequest(name="r0", totals="0", config=[])
+ assert req.totals == "0"
+
+
+def test_create_ratio_task_request_rejects_negative_totals() -> None:
+ with pytest.raises(ValueError):
+ CreateRatioTaskRequest(name="r1", totals="-1", config=[])
+
+
+def test_create_ratio_task_request_alias_mapping_for_dataset_id() -> None:
+ req = CreateRatioTaskRequest(
+ name="ratio-task",
+ totals="2",
+ config=[
+ {
+ "datasetId": "ds-alias",
+ "counts": "1",
+ "filterConditions": {"dateRange": ["2025-01-01", "2025-01-02"]},
+ }
+ ],
+ )
+ assert req.config[0].dataset_id == "ds-alias"
diff --git a/runtime/datamate-python/tests/test_module_shared.py b/runtime/datamate-python/tests/test_module_shared.py
new file mode 100644
index 00000000..e81b7e49
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_shared.py
@@ -0,0 +1,77 @@
+import json
+
+import pytest
+
+from app.module.shared.util.structured_file import (
+ COTItemHandler,
+ ItemTypes,
+ QAItemHandler,
+ StructuredFileHandlerFactory,
+)
+
+
+def test_qa_handler_validate_json_accepts_alpaca_item() -> None:
+ handler = QAItemHandler()
+ assert handler.validate_json({"instruction": "i", "output": "o"}) is True
+
+
+def test_get_items_from_jsonl_skips_invalid_rows(tmp_path) -> None:
+ file_path = tmp_path / "qa.jsonl"
+ rows = [
+ {"instruction": "i1", "output": "o1"},
+ {"instruction": "missing_output"},
+ {"instruction": "i2", "output": "o2"},
+ ]
+ file_path.write_text("\n".join(json.dumps(r, ensure_ascii=False) for r in rows), encoding="utf-8")
+
+ handler = QAItemHandler()
+ items = handler.get_items_from_file(str(file_path))
+
+ assert len(items) == 2
+ assert items[0]["output"] == "o1"
+ assert items[1]["output"] == "o2"
+
+
+def test_factory_get_handler_rejects_unknown_item_type() -> None:
+ factory = StructuredFileHandlerFactory()
+ with pytest.raises(ValueError):
+ factory.get_handler("UNKNOWN")
+
+
+def test_qa_handler_validate_json_rejects_invalid_item() -> None:
+ handler = QAItemHandler()
+ assert handler.validate_json({"input": "x"}) is False
+
+
+def test_factory_get_handler_returns_qa_handler() -> None:
+ factory = StructuredFileHandlerFactory()
+ handler = factory.get_handler(ItemTypes.QA.value)
+ assert isinstance(handler, QAItemHandler)
+
+
+def test_get_items_from_json_file_for_qa(tmp_path) -> None:
+ file_path = tmp_path / "qa.json"
+ file_path.write_text(
+ json.dumps([
+ {"instruction": "q1", "output": "a1"},
+ {"instruction": "q2", "output": "a2"},
+ ], ensure_ascii=False),
+ encoding="utf-8",
+ )
+
+ handler = QAItemHandler()
+ items = handler.get_items_from_file(str(file_path))
+
+ assert len(items) == 2
+ assert items[0]["instruction"] == "q1"
+
+
+def test_cot_handler_validate_json_requires_question_field() -> None:
+ handler = COTItemHandler()
+ assert handler.validate_json({"instruction": "x", "output": "y"}) is False
+
+
+def test_factory_get_handler_returns_cot_handler() -> None:
+ factory = StructuredFileHandlerFactory()
+ handler = factory.get_handler(ItemTypes.COT.value)
+ assert isinstance(handler, COTItemHandler)
diff --git a/runtime/datamate-python/tests/test_module_synthesis.py b/runtime/datamate-python/tests/test_module_synthesis.py
new file mode 100644
index 00000000..2417308d
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_synthesis.py
@@ -0,0 +1,32 @@
+from pathlib import Path
+
+
+def test_synthesis_module_has_no_python_sources_yet() -> None:
+ module_dir = Path(__file__).resolve().parents[1] / "app" / "module" / "synthesis"
+ py_files = list(module_dir.rglob("*.py"))
+
+ assert py_files == [], (
+ "synthesis 模块已有 Python 实现,请补充真实业务单测并删除该占位用例"
+ )
+
+
+def test_synthesis_module_scaffold_directories_exist() -> None:
+ root = Path(__file__).resolve().parents[1] / "app" / "module" / "synthesis"
+
+ assert (root / "interface").exists()
+ assert (root / "schema").exists()
+ assert (root / "service").exists()
+
+
+def test_synthesis_scaffold_contains_only_expected_directories_or_cache() -> None:
+ root = Path(__file__).resolve().parents[1] / "app" / "module" / "synthesis"
+ names = {p.name for p in root.iterdir()}
+ assert "interface" in names
+ assert "schema" in names
+ assert "service" in names
+
+
+def test_synthesis_module_path_exists() -> None:
+ root = Path(__file__).resolve().parents[1] / "app" / "module" / "synthesis"
+ assert root.exists()
+ assert root.is_dir()
diff --git a/runtime/datamate-python/tests/test_module_system.py b/runtime/datamate-python/tests/test_module_system.py
new file mode 100644
index 00000000..01349fef
--- /dev/null
+++ b/runtime/datamate-python/tests/test_module_system.py
@@ -0,0 +1,67 @@
+import asyncio
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+from app.module.system.service.common_service import get_model_by_id
+
+
+def _run(coro):
+ return asyncio.run(coro)
+
+
+def test_get_model_by_id_returns_model_when_found() -> None:
+ db = MagicMock()
+ model = SimpleNamespace(id="m1")
+ result = MagicMock()
+ result.scalar_one_or_none.return_value = model
+ db.execute = AsyncMock(return_value=result)
+
+ fetched = _run(get_model_by_id(db, "m1"))
+
+ assert fetched is model
+
+
+def test_get_model_by_id_returns_none_when_missing() -> None:
+ db = MagicMock()
+ result = MagicMock()
+ result.scalar_one_or_none.return_value = None
+ db.execute = AsyncMock(return_value=result)
+
+ fetched = _run(get_model_by_id(db, "missing"))
+
+ assert fetched is None
+
+
+def test_get_model_by_id_invokes_db_execute_once() -> None:
+ db = MagicMock()
+ result = MagicMock()
+ result.scalar_one_or_none.return_value = None
+ db.execute = AsyncMock(return_value=result)
+
+ _run(get_model_by_id(db, "m2"))
+
+ db.execute.assert_called_once()
+
+
+def test_get_model_by_id_passes_query_object_to_execute() -> None:
+ db = MagicMock()
+ result = MagicMock()
+ result.scalar_one_or_none.return_value = None
+ db.execute = AsyncMock(return_value=result)
+
+ _run(get_model_by_id(db, "model-xyz"))
+
+ args, _ = db.execute.call_args
+ assert len(args) == 1
+ assert args[0] is not None
+
+
+def test_get_model_by_id_returns_exact_scalar_object() -> None:
+ db = MagicMock()
+ model_obj = SimpleNamespace(id="m100", endpoint="x")
+ result = MagicMock()
+ result.scalar_one_or_none.return_value = model_obj
+ db.execute = AsyncMock(return_value=result)
+
+ fetched = _run(get_model_by_id(db, "m100"))
+ assert fetched is model_obj