Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions runtime/datamate-python/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from __future__ import annotations

import sys
from pathlib import Path
from types import ModuleType


def _register_namespace(module_name: str, module_path: Path) -> None:
namespace_pkg = ModuleType(module_name)
namespace_pkg.__path__ = [str(module_path)] # type: ignore[attr-defined]
sys.modules.setdefault(module_name, namespace_pkg)


def pytest_sessionstart(session) -> None:
"""避免测试导入 app.module.* 时触发 app/module/__init__.py 的重依赖加载。"""
root = Path(__file__).resolve().parents[1] / "app" / "module"

_register_namespace("app.module", root)
_register_namespace("app.module.cleaning", root / "cleaning")
_register_namespace("app.module.cleaning.service", root / "cleaning" / "service")
_register_namespace("app.module.rag", root / "rag")
_register_namespace("app.module.rag.service", root / "rag" / "service")
_register_namespace("app.module.rag.service.common", root / "rag" / "service" / "common")
124 changes: 124 additions & 0 deletions runtime/datamate-python/tests/test_dataset_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# -*- coding: utf-8 -*-

import unittest
from unittest.mock import MagicMock, AsyncMock
import sys
import os

# 确保 runtime/datamate-python 目录在 sys.path 中
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
APP_DIR = os.path.dirname(TEST_DIR)
sys.path.insert(0, APP_DIR)

from app.module.dataset.service.service import Service
from app.module.dataset.schema import DatasetResponse, PagedDatasetFileResponse, DatasetFileResponse
from app.db.models import Dataset, DatasetFiles


class TestDatasetService(unittest.IsolatedAsyncioTestCase):

def setUp(self):
# 创建模拟的 AsyncSession 对象
self.mock_db = MagicMock()
self.mock_db.execute = AsyncMock()
self.mock_db.commit = AsyncMock()
self.mock_db.rollback = AsyncMock()
self.mock_db.flush = AsyncMock()

# 初始化 Service
self.service = Service(self.mock_db)

async def test_get_dataset_success(self):
"""测试正常获取数据集详情"""
# 准备 Mock 数据
mock_dataset = Dataset(
id="test-dataset-id",
name="Test Dataset",
description="A test description",
dataset_type="TEXT",
status="DRAFT",
file_count=5,
size_bytes=1024,
created_by="system"
)

# 模拟 db.execute 返回值
mock_result = MagicMock()
mock_result.scalar_one_or_none.return_value = mock_dataset
self.mock_db.execute.return_value = mock_result

# 执行测试
response = await self.service.get_dataset("test-dataset-id")

# 校验结果
self.assertIsNotNone(response)
self.assertEqual(response.id, "test-dataset-id")
self.assertEqual(response.name, "Test Dataset")
self.assertEqual(response.description, "A test description")
self.assertEqual(response.datasetType, "TEXT")
self.assertEqual(response.status, "DRAFT")
self.assertEqual(response.fileCount, 5)
self.assertEqual(response.totalSize, 1024)

async def test_get_dataset_not_found(self):
"""测试获取不存在的数据集时返回 None"""
# 模拟数据库未找到数据
mock_result = MagicMock()
mock_result.scalar_one_or_none.return_value = None
self.mock_db.execute.return_value = mock_result

# 执行并验证
response = await self.service.get_dataset("non-existent-id")
self.assertIsNone(response)

async def test_create_dataset_success(self):
"""测试创建数据集成功流程"""
# 1. 模拟名称不存在检查 (select Dataset.name) -> 返回 None
mock_result_check = MagicMock()
mock_result_check.scalar_one_or_none.return_value = None
self.mock_db.execute.return_value = mock_result_check

# 2. 调用创建服务
response = await self.service.create_dataset(
name="New Dataset",
dataset_type="IMAGE",
description="Testing create_dataset API",
status="PUBLISHED"
)

# 3. 验证结果
self.assertIsNotNone(response)
self.assertEqual(response.name, "New Dataset")
self.assertEqual(response.datasetType, "IMAGE")
self.assertEqual(response.description, "Testing create_dataset API")
self.assertEqual(response.status, "PUBLISHED")

# 确认 db.add 和 db.commit 被调用
self.mock_db.add.assert_called_once()
self.mock_db.commit.assert_called_once()

async def test_create_dataset_duplicated_name(self):
"""测试创建重名的数据集时抛出异常"""
# 模拟冲突的已有数据集
existing_dataset = Dataset(
id="existing-id",
name="Existing Dataset"
)
mock_result = MagicMock()
mock_result.scalar_one_or_none.return_value = existing_dataset
self.mock_db.execute.return_value = mock_result

# 检查是否正如预期抛出包含关键字 Exception
with self.assertRaises(Exception) as context:
await self.service.create_dataset(
name="Existing Dataset",
dataset_type="AUDIO"
)
self.assertIn("already exists", str(context.exception))

# 校验事务有无进行 commit
self.mock_db.commit.assert_not_called()


if __name__ == "__main__":
unittest.main()
132 changes: 132 additions & 0 deletions runtime/datamate-python/tests/test_module_annotation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
from app.module.annotation.utils.config_validator import LabelStudioConfigValidator
import pytest


def test_validate_xml_success_with_object_and_control() -> None:
xml = """<View>
<Image name=\"image\" value=\"$image\"/>
<Choices name=\"label\" toName=\"image\">
<Choice value=\"Cat\"/>
<Choice value=\"Dog\"/>
</Choices>
</View>"""

valid, error = LabelStudioConfigValidator.validate_xml(xml)

assert valid is True
assert error is None


def test_validate_xml_fails_when_no_controls() -> None:
xml = """<View><Image name=\"image\" value=\"$image\"/></View>"""

valid, error = LabelStudioConfigValidator.validate_xml(xml)

assert valid is False
assert "No annotation controls" in (error or "")


def test_validate_configuration_json_rejects_unknown_object_reference() -> None:
config = {
"labels": [
{
"fromName": "sentiment",
"toName": "missing_object",
"type": "Choices",
"options": ["positive", "negative"],
}
],
"objects": [
{"name": "text", "type": "Text", "value": "$text"}
],
}

valid, error = LabelStudioConfigValidator.validate_configuration_json(config)

assert valid is False
assert "unknown object" in (error or "")


def test_extract_label_values() -> None:
xml = """<View>
<Text name=\"text\" value=\"$text\"/>
<Choices name=\"sentiment\" toName=\"text\">
<Choice value=\"positive\"/>
<Choice value=\"negative\"/>
</Choices>
</View>"""

labels = LabelStudioConfigValidator.extract_label_values(xml)

assert labels == {"sentiment": ["positive", "negative"]}


def test_validate_xml_rejects_invalid_root() -> None:
xml = """<Root><Text name=\"text\" value=\"$text\"/></Root>"""

valid, error = LabelStudioConfigValidator.validate_xml(xml)

assert valid is False
assert "Root element must be <View>" in (error or "")


def test_validate_configuration_json_requires_labels() -> None:
valid, error = LabelStudioConfigValidator.validate_configuration_json({"objects": []})

assert valid is False
assert "Missing 'labels' field" in (error or "")


def test_validate_xml_fails_for_invalid_xml() -> None:
xml = "<View><Text></View>"
valid, error = LabelStudioConfigValidator.validate_xml(xml)
assert valid is False
assert "XML parse error" in (error or "")


@pytest.mark.parametrize(
"label,error_text",
[
({"toName": "obj", "type": "Choices", "options": ["A"]}, "fromName"),
({"fromName": "lbl", "type": "Choices", "options": ["A"]}, "toName"),
({"fromName": "lbl", "toName": "obj", "options": ["A"]}, "type"),
],
)
def test_validate_label_definition_required_fields(label, error_text: str) -> None:
valid, error = LabelStudioConfigValidator._validate_label_definition(label)
assert valid is False
assert error_text in (error or "")


def test_validate_label_definition_rejects_unsupported_type() -> None:
label = {
"fromName": "x",
"toName": "obj",
"type": "NotSupported",
}
valid, error = LabelStudioConfigValidator._validate_label_definition(label)
assert valid is False
assert "Unsupported control type" in (error or "")


def test_validate_object_definition_rejects_value_without_dollar_prefix() -> None:
obj = {"name": "txt", "type": "Text", "value": "text"}
valid, error = LabelStudioConfigValidator._validate_object_definition(obj)
assert valid is False
assert "must start with '$'" in (error or "")


def test_extract_label_values_returns_empty_on_invalid_xml() -> None:
labels = LabelStudioConfigValidator.extract_label_values("<broken")
assert labels == {}


def test_validate_xml_requires_control_name_and_to_name() -> None:
xml = """<View>
<Text name=\"text\" value=\"$text\"/>
<Choices toName=\"text\"><Choice value=\"A\"/></Choices>
</View>"""
valid, error = LabelStudioConfigValidator.validate_xml(xml)
assert valid is False
assert "Missing 'name' attribute" in (error or "")

99 changes: 99 additions & 0 deletions runtime/datamate-python/tests/test_module_cleaning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import pytest

from app.core.exception import BusinessError
from app.module.cleaning.schema.cleaning import OperatorInstanceDto
from app.module.cleaning.service.cleaning_task_validator import CleaningTaskValidator
from app.module.operator.constants import CATEGORY_DATAMATE_ID, CATEGORY_DATA_JUICER_ID


def _op(op_id: str, inputs: str | None, outputs: str | None, categories: list[str] | None = None) -> OperatorInstanceDto:
return OperatorInstanceDto(id=op_id, inputs=inputs, outputs=outputs, categories=categories)


def test_check_input_and_output_passes_with_multimodal() -> None:
instances = [
_op("a", "text", "multimodal"),
_op("b", "image", "text"),
]

CleaningTaskValidator.check_input_and_output(instances)


def test_check_input_and_output_raises_on_type_mismatch() -> None:
instances = [
_op("a", "text", "image"),
_op("b", "text", "text"),
]

with pytest.raises(BusinessError):
CleaningTaskValidator.check_input_and_output(instances)


def test_check_and_get_executor_type_raises_when_mixed_categories() -> None:
instances = [
_op("a", None, None, [CATEGORY_DATAMATE_ID]),
_op("b", None, None, [CATEGORY_DATA_JUICER_ID]),
]

with pytest.raises(BusinessError):
CleaningTaskValidator.check_and_get_executor_type(instances)


def test_check_and_get_executor_type_defaults_to_datamate() -> None:
instances = [_op("a", None, None, None)]

executor = CleaningTaskValidator.check_and_get_executor_type(instances)

assert executor == "datamate"


def test_check_task_id_raises_when_empty() -> None:
with pytest.raises(BusinessError):
CleaningTaskValidator.check_task_id("")


def test_check_task_id_accepts_normal_value() -> None:
CleaningTaskValidator.check_task_id("task-1")


def test_check_input_and_output_returns_for_empty_instances() -> None:
CleaningTaskValidator.check_input_and_output([])


def test_check_input_and_output_raises_when_current_has_no_outputs() -> None:
instances = [
_op("a", "text", None),
_op("b", "text", "text"),
]
with pytest.raises(BusinessError):
CleaningTaskValidator.check_input_and_output(instances)


def test_check_input_and_output_raises_when_next_has_no_inputs() -> None:
instances = [
_op("a", "text", "text"),
_op("b", None, "text"),
]
with pytest.raises(BusinessError):
CleaningTaskValidator.check_input_and_output(instances)


@pytest.mark.parametrize(
"out_type,in_type",
[
("text", "text"),
(" image ", "image"),
("AUDIO", "audio"),
],
)
def test_check_input_and_output_allows_exact_match_with_normalization(out_type: str, in_type: str) -> None:
instances = [
_op("a", "x", out_type),
_op("b", in_type, "y"),
]
CleaningTaskValidator.check_input_and_output(instances)


def test_check_and_get_executor_type_prefers_datajuicer_when_only_datajuicer() -> None:
instances = [_op("a", None, None, [CATEGORY_DATA_JUICER_ID])]
assert CleaningTaskValidator.check_and_get_executor_type(instances) == "default"
Loading
Loading