From fa932a127101745b9ee6e0c04c821c0e815a56e3 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Tue, 26 May 2026 22:38:50 +0800 Subject: [PATCH 1/7] support hy_v3 and ministral3 --- README.md | 4 +- gptqmodel/models/auto.py | 4 ++ gptqmodel/models/definitions/__init__.py | 2 + gptqmodel/models/definitions/hy_v3.py | 47 ++++++++++++++++++++++ gptqmodel/models/definitions/ministral3.py | 23 +++++++++++ tests/test_hy_v3_support.py | 43 ++++++++++++++++++++ tests/test_ministral3_support.py | 31 ++++++++++++++ 7 files changed, 152 insertions(+), 2 deletions(-) create mode 100644 gptqmodel/models/definitions/hy_v3.py create mode 100644 gptqmodel/models/definitions/ministral3.py create mode 100644 tests/test_hy_v3_support.py create mode 100644 tests/test_ministral3_support.py diff --git a/README.md b/README.md index dc7db1b33..9516dc3e1 100644 --- a/README.md +++ b/README.md @@ -251,7 +251,7 @@ Selected public references where teams or companies explicitly mention GPT-QMode | Model | | | | | | | | | | |--------------------------|---|---------------------------------|--|------------------|--|---------------------------------|--|------------------------|---| -| Apertus | ✅ | EXAONE 3/4 | ✅ | Dots1 | ✅ | Mistral3 | ✅ | Qwen 2/3/3.5 (Next/MoE) | ✅ | +| Apertus | ✅ | EXAONE 3/4 | ✅ | Dots1 | ✅ | Mistral3 / Ministral3 | ✅ | Qwen 2/3/3.5 (Next/MoE) | ✅ | | Baichuan | ✅ | Falcon (H1 / Mamba) | ✅ | InternLM 1/2/2.5 | ✅ | Mixtral | ✅ | Qwen 2/2.5/3 VL | ✅ | | Bloom | ✅ | FastVLM | ✅ | Kimi K2 | ✅ | MobileLLM | ✅ | Qwen 2.5/3 Omni | ✅ | | ChatGLM | ✅ | Gemma 1-4 / 3n | ✅ | Klear | ✅ | MOSS | ✅ | RefinedWeb | ✅ | @@ -266,7 +266,7 @@ Selected public references where teams or companies explicitly mention GPT-QMode | XVERSE | ✅ | Brumby | ✅ | Hymba | ✅ | Mistral | ✅ | Qwen 1/2/3/3.5 | ✅ | | MiniMax M2 | ✅ | AfMoE | ✅ | Bailing-MoE | ✅ | LFM2-MoE | ✅ | Marin | ✅ | | InternVL Chat | ✅ | Laguna | ✅ | Mimo / Mimo V2 | ✅ | Zamba / Zamba2 | ✅ | Intern S1 | ✅ | -| HunYuan V1 Dense / MoE | ✅ | | | | | | | | | +| HunYuan V1 Dense / MoE | ✅ | HY-V3 | ✅ | | | | | | | Prism Bonsai GGUF checkpoints are supported for inference only through GPT-QModel's native GGUF path and internal GGUF runtime. Bonsai checkpoints load through the normal model path or repo argument and do not require the external `gguf` package. Prism model quantization is not included. diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py index fe9722c67..ad8585663 100644 --- a/gptqmodel/models/auto.py +++ b/gptqmodel/models/auto.py @@ -116,6 +116,7 @@ from .definitions.hrm_text import HrmTextQModel # noqa: E402 from .definitions.hunyuan_v1_dense import HunYuanDenseV1QModel # noqa: E402 from .definitions.hunyuan_v1_moe import HunYuanMoEV1QModel # noqa: E402 +from .definitions.hy_v3 import HYV3QModel # noqa: E402 from .definitions.hymba import HymbaQModel # noqa: E402 from .definitions.instella import InstellaQModel # noqa: E402 from .definitions.internlm import InternLMQModel # noqa: E402 @@ -139,6 +140,7 @@ from .definitions.minicpmv import MiniCPMVQModel # noqa: E402 from .definitions.minicpmv_4_6 import MiniCPMV4_6QModel # noqa: E402 from .definitions.minimax_m2 import MiniMaxM2GPTQ # noqa: E402 +from .definitions.ministral3 import Ministral3GPTQ # noqa: E402 from .definitions.mistral3 import Mistral3GPTQ from .definitions.mixtral import MixtralQModel # noqa: E402 from .definitions.mllama import MLlamaQModel # noqa: E402 @@ -234,6 +236,7 @@ "hrm_text": HrmTextQModel, "hunyuan_v1_dense": HunYuanDenseV1QModel, "hunyuan_v1_moe": HunYuanMoEV1QModel, + "hy_v3": HYV3QModel, "qwen": QwenQModel, "mistral": LlamaQModel, # 100% llama clone "yi": LlamaQModel, # 100% llama clone @@ -267,6 +270,7 @@ "minicpmv4_6": MiniCPMV4_6QModel, "minimax": MiniMaxM2GPTQ, "minimax_m2": MiniMaxM2GPTQ, + "ministral3": Ministral3GPTQ, "qwen2_moe": Qwen2MoeQModel, "qwen3_moe": Qwen3MoeQModel, "qwen3_next": Qwen3NextGPTQ, diff --git a/gptqmodel/models/definitions/__init__.py b/gptqmodel/models/definitions/__init__.py index 2760a3672..658b0be3b 100644 --- a/gptqmodel/models/definitions/__init__.py +++ b/gptqmodel/models/definitions/__init__.py @@ -43,6 +43,7 @@ from .hrm_text import HrmTextQModel from .hunyuan_v1_dense import HunYuanDenseV1QModel from .hunyuan_v1_moe import HunYuanMoEV1QModel +from .hy_v3 import HYV3QModel from .hymba import HymbaQModel from .instella import InstellaQModel from .internlm import InternLMQModel @@ -56,6 +57,7 @@ from .minicpmv import MiniCPMVQModel from .minicpmv_4_6 import MiniCPMV4_6QModel from .minimax_m2 import MiniMaxM2GPTQ +from .ministral3 import Ministral3GPTQ from .mimo_v2 import MimoV2QModel from .mixtral import MixtralQModel from .mllama import MLlamaQModel diff --git a/gptqmodel/models/definitions/hy_v3.py b/gptqmodel/models/definitions/hy_v3.py new file mode 100644 index 000000000..66d7c4653 --- /dev/null +++ b/gptqmodel/models/definitions/hy_v3.py @@ -0,0 +1,47 @@ +# SPDX-FileCopyrightText: 2026 ModelCloud.ai +# SPDX-License-Identifier: Apache-2.0 + +from ..base import BaseQModel +from ..moe_lifecycle import GateUpDownMoELifecycleHooks + + +class HYV3QModel(BaseQModel): + # HYV3 uses a dense first MLP layer and sparse MoE layers after it. + layer_modules_strict = False + dynamic_expert_index = "num_experts" + + pre_lm_head_norm_module = "model.norm" + + awq_scale_optimize_shape_dependent_modules = ["self_attn.o_proj"] + + moe_lifecycle_hooks = GateUpDownMoELifecycleHooks() + + module_tree = [ + "model", + "layers", + "#", + { + "input_layernorm": ("input_layernorm:!",), + "self_attn": ( + "q_norm:!", + "k_norm:!", + "q_proj:0", + "k_proj:0", + "v_proj:0", + "o_proj:1", + ), + "post_attention_layernorm": ("post_attention_layernorm:!",), + "mlp:moe": { + "gate": ("gate:!",), + "e_score_correction_bias": ("e_score_correction_bias:!",), + "experts": { + "#": ("gate_proj:0", "up_proj:0", "down_proj:1"), + }, + "shared_experts": ("gate_proj:0", "up_proj:0", "down_proj:1"), + "": ("gate_proj:0", "up_proj:0", "down_proj:1"), + }, + }, + ] + + +__all__ = ["HYV3QModel"] diff --git a/gptqmodel/models/definitions/ministral3.py b/gptqmodel/models/definitions/ministral3.py new file mode 100644 index 000000000..0a901132e --- /dev/null +++ b/gptqmodel/models/definitions/ministral3.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: 2026 ModelCloud.ai +# SPDX-License-Identifier: Apache-2.0 + +from ..base import BaseQModel + + +class Ministral3GPTQ(BaseQModel): + pre_lm_head_norm_module = "model.norm" + + module_tree = [ + "model", + "layers", + "#", + { + "input_layernorm": ("input_layernorm:!",), + "self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"), + "post_attention_layernorm": ("post_attention_layernorm:!",), + "mlp": ("gate_proj:0", "up_proj:0", "down_proj:1"), + }, + ] + + +__all__ = ["Ministral3GPTQ"] diff --git a/tests/test_hy_v3_support.py b/tests/test_hy_v3_support.py new file mode 100644 index 000000000..475969bd8 --- /dev/null +++ b/tests/test_hy_v3_support.py @@ -0,0 +1,43 @@ +from types import SimpleNamespace + +from gptqmodel.models import auto +from gptqmodel.models.definitions.hy_v3 import HYV3QModel + +def test_hy_v3_model_type_selects_definition(monkeypatch): + fake_config = SimpleNamespace(model_type="hy_v3") + + monkeypatch.setattr(auto, "resolve_trust_remote_code", lambda path, trust_remote_code=False: trust_remote_code) + monkeypatch.setattr(auto.AutoConfig, "from_pretrained", lambda *args, **kwargs: fake_config) + + assert auto.check_and_get_model_definition("/tmp/hy_v3") is HYV3QModel + + +def test_hy_v3_module_tree_expands_dense_and_sparse_moe_paths(): + layer_modules = HYV3QModel.simple_layer_modules( + model_config=SimpleNamespace(num_experts=3), + quantize_config=SimpleNamespace(dynamic=None), + ) + flat_modules = {name for block in layer_modules for name in block} + first_expert_block = next(i for i, block in enumerate(layer_modules) if "mlp.experts.0.gate_proj" in block) + shared_block = next(i for i, block in enumerate(layer_modules) if "mlp.shared_experts.gate_proj" in block) + + assert HYV3QModel.layer_modules_strict is False + assert HYV3QModel.dynamic_expert_index == "num_experts" + assert "self_attn.q_proj" in flat_modules + assert "self_attn.k_proj" in flat_modules + assert "self_attn.v_proj" in flat_modules + assert "self_attn.o_proj" in flat_modules + assert "self_attn.q_norm" not in flat_modules + assert "self_attn.k_norm" not in flat_modules + assert "mlp.gate_proj" in flat_modules + assert "mlp.up_proj" in flat_modules + assert "mlp.down_proj" in flat_modules + assert "mlp.shared_experts.gate_proj" in flat_modules + assert "mlp.shared_experts.up_proj" in flat_modules + assert "mlp.shared_experts.down_proj" in flat_modules + assert "mlp.experts.0.gate_proj" in flat_modules + assert "mlp.experts.1.up_proj" in flat_modules + assert "mlp.experts.2.down_proj" in flat_modules + assert "mlp.gate" not in flat_modules + assert "mlp.e_score_correction_bias" not in flat_modules + assert first_expert_block < shared_block \ No newline at end of file diff --git a/tests/test_ministral3_support.py b/tests/test_ministral3_support.py new file mode 100644 index 000000000..e41156f3b --- /dev/null +++ b/tests/test_ministral3_support.py @@ -0,0 +1,31 @@ +from types import SimpleNamespace + +from gptqmodel.models import auto +from gptqmodel.models.definitions.ministral3 import Ministral3GPTQ + + +def test_ministral3_model_type_selects_definition(monkeypatch): + fake_config = SimpleNamespace(model_type="ministral3") + + monkeypatch.setattr(auto, "resolve_trust_remote_code", lambda path, trust_remote_code=False: trust_remote_code) + monkeypatch.setattr(auto.AutoConfig, "from_pretrained", lambda *args, **kwargs: fake_config) + + assert auto.check_and_get_model_definition("/tmp/ministral3") is Ministral3GPTQ + + +def test_ministral3_module_tree_matches_text_only_layout(): + layer_modules = Ministral3GPTQ.simple_layer_modules( + model_config=SimpleNamespace(), + quantize_config=SimpleNamespace(dynamic=None), + ) + flat_modules = {name for block in layer_modules for name in block} + + assert Ministral3GPTQ.module_tree[:3] == ["model", "layers", "#"] + assert "self_attn.q_proj" in flat_modules + assert "self_attn.k_proj" in flat_modules + assert "self_attn.v_proj" in flat_modules + assert "self_attn.o_proj" in flat_modules + assert "mlp.gate_proj" in flat_modules + assert "mlp.up_proj" in flat_modules + assert "mlp.down_proj" in flat_modules + From 498a3ce5bd3b4f5dadbc319e75488aeda9d9ad2c Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Wed, 27 May 2026 08:55:26 +0800 Subject: [PATCH 2/7] update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 9516dc3e1..760cb6713 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ ## Latest News +* 05/25/2026 7.1.0-dev `main`: ✨ Added `hy_3` and `ministral3` model support * 05/25/2026 7.1.0-dev `main`: ✨ Added `hunyuan_v1_dense` and `hunyuan_v1_moe` model support * 05/21/2026 7.1.0-dev `main`: ✨ Added `nemotron_labs_diffusion` model support * 05/20/2026 7.1.0-dev `main`: ✨ Added `interns1`, `ovis2_5`, `ovis2_6_moe` and `ovis2_6_next` model support From a96b39f767adf401b9d16ec193ccbcb8e9794c9c Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Wed, 27 May 2026 09:02:39 +0800 Subject: [PATCH 3/7] update module tree of hy_v3 --- gptqmodel/models/definitions/hy_v3.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gptqmodel/models/definitions/hy_v3.py b/gptqmodel/models/definitions/hy_v3.py index 66d7c4653..de26236af 100644 --- a/gptqmodel/models/definitions/hy_v3.py +++ b/gptqmodel/models/definitions/hy_v3.py @@ -33,7 +33,6 @@ class HYV3QModel(BaseQModel): "post_attention_layernorm": ("post_attention_layernorm:!",), "mlp:moe": { "gate": ("gate:!",), - "e_score_correction_bias": ("e_score_correction_bias:!",), "experts": { "#": ("gate_proj:0", "up_proj:0", "down_proj:1"), }, From fbacddf9640c5373b1db2c8a8b3d82567e53d58d Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Wed, 27 May 2026 09:06:40 +0800 Subject: [PATCH 4/7] add test_hy_v3.py and test_ministral3.py --- tests/models/test_hunyuan_v1_dense.py | 7 +++---- tests/models/test_hunyuan_v1_moe.py | 4 ++-- tests/models/test_hy_v3.py | 28 ++++++++++++++++++++++++++ tests/models/test_ministral3.py | 29 +++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 6 deletions(-) create mode 100644 tests/models/test_hy_v3.py create mode 100644 tests/models/test_ministral3.py diff --git a/tests/models/test_hunyuan_v1_dense.py b/tests/models/test_hunyuan_v1_dense.py index 31cec1bed..51576f8f8 100644 --- a/tests/models/test_hunyuan_v1_dense.py +++ b/tests/models/test_hunyuan_v1_dense.py @@ -6,7 +6,7 @@ from model_test import ModelTest -class TestNemotronUltra(ModelTest): +class TestHunyuanV1Dense(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/HY-MT1.5-1.8B" # tencent/HY-MT1.5-1.8B EVAL_TASKS_SLOW = { "arc_challenge": { @@ -24,6 +24,5 @@ class TestNemotronUltra(ModelTest): } EVAL_TASKS_FAST = ModelTest.derive_fast_eval_tasks(EVAL_TASKS_SLOW) - def test_nemotron_ultra(self): - # self.quantize_and_evaluate() - print(self.evaluate_model(self.SAVE_PATH)) + def test_hunyuan_v1_dense(self): + self.quantize_and_evaluate() diff --git a/tests/models/test_hunyuan_v1_moe.py b/tests/models/test_hunyuan_v1_moe.py index b97c47e33..f362875cb 100644 --- a/tests/models/test_hunyuan_v1_moe.py +++ b/tests/models/test_hunyuan_v1_moe.py @@ -6,7 +6,7 @@ from model_test import ModelTest -class TestNemotronUltra(ModelTest): +class TestHunyuanV1MoE(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/Hunyuan-A13B-Instruct" # tencent/Hunyuan-A13B-Instruct EVAL_TASKS_SLOW = { "arc_challenge": { @@ -24,5 +24,5 @@ class TestNemotronUltra(ModelTest): } EVAL_TASKS_FAST = ModelTest.derive_fast_eval_tasks(EVAL_TASKS_SLOW) - def test_nemotron_ultra(self): + def test_hunyuan_v1_moe(self): self.quantize_and_evaluate() diff --git a/tests/models/test_hy_v3.py b/tests/models/test_hy_v3.py new file mode 100644 index 000000000..4e7cabbfe --- /dev/null +++ b/tests/models/test_hy_v3.py @@ -0,0 +1,28 @@ +# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai +# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium + +from model_test import ModelTest + + +class TestHyV3(ModelTest): + NATIVE_MODEL_ID = "/monster/data/model/Hy-MT2-30B-A3B" # tencent/Hy-MT2-30B-A3B + EVAL_TASKS_SLOW = { + "arc_challenge": { + "chat_template": True, + "acc": {"value": 0.3182, "floor_pct": 0.36}, + "acc_norm": {"value": 0.3472, "floor_pct": 0.36}, + }, + "mmlu_stem": { + "chat_template": False, + "acc": { + "value": 0.4024, + "floor_pct": 0.04, + }, + }, + } + EVAL_TASKS_FAST = ModelTest.derive_fast_eval_tasks(EVAL_TASKS_SLOW) + + def test_hy_v3(self): + self.quantize_and_evaluate() diff --git a/tests/models/test_ministral3.py b/tests/models/test_ministral3.py new file mode 100644 index 000000000..b5f6d696e --- /dev/null +++ b/tests/models/test_ministral3.py @@ -0,0 +1,29 @@ +# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai +# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium + +from model_test import ModelTest + + +class TestMinistral3(ModelTest): + NATIVE_MODEL_ID = "/monster/data/model/Ministral-3-3B-Instruct-2512-TextOnly" # "Aratako/Ministral-3-3B-Instruct-2512-TextOnly" + NATIVE_ARC_CHALLENGE_ACC = 0.4974 + NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5256 + NATIVE_ARC_CHALLENGE_ACC_SLOW = NATIVE_ARC_CHALLENGE_ACC + NATIVE_ARC_CHALLENGE_ACC_NORM_SLOW = NATIVE_ARC_CHALLENGE_ACC_NORM + NATIVE_ARC_CHALLENGE_ACC_FAST = NATIVE_ARC_CHALLENGE_ACC_SLOW + NATIVE_ARC_CHALLENGE_ACC_NORM_FAST = NATIVE_ARC_CHALLENGE_ACC_NORM_SLOW + TRUST_REMOTE_CODE = False + EVAL_BATCH_SIZE = 6 + EVAL_TASKS_SLOW = { + "arc_challenge": { + "chat_template": False, + "acc": {"value": NATIVE_ARC_CHALLENGE_ACC}, + "acc_norm": {"value": NATIVE_ARC_CHALLENGE_ACC_NORM}, + }, + } + EVAL_TASKS_FAST = ModelTest.derive_fast_eval_tasks(EVAL_TASKS_SLOW) + + def test_ministral3(self): + self.quantize_and_evaluate() From 25dd5e4b13312b90fd78ee3e856cdfe623c4684f Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Wed, 27 May 2026 09:15:55 +0800 Subject: [PATCH 5/7] cleanup --- gptqmodel/models/auto.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py index ad8585663..794489a20 100644 --- a/gptqmodel/models/auto.py +++ b/gptqmodel/models/auto.py @@ -453,25 +453,6 @@ def _get_config_load_kwargs(kwargs: dict) -> dict: return get_hf_gguf_load_kwargs(kwargs) -def _normalize_supported_model_type(config) -> str: - model_type = config.model_type.lower() - config_class_name = type(config).__name__ - - if model_type == "qwen3_5": - if config_class_name == "Qwen3_5TextConfig": - return "qwen3_5_text" - if not hasattr(config, "text_config") and not hasattr(config, "vision_config"): - return "qwen3_5_text" - - if model_type == "qwen3_5_moe": - if config_class_name == "Qwen3_5MoeTextConfig": - return "qwen3_5_moe_text" - if not hasattr(config, "text_config") and not hasattr(config, "vision_config"): - return "qwen3_5_moe_text" - - return model_type - - def check_and_get_model_definition(model_dir, trust_remote_code=False, **config_load_kwargs): if "gguf_file" not in config_load_kwargs: model_dir = normalize_model_id_or_path_for_hf_gguf( From 2459c79a8350b5d13f3890fb4bcd5ce78d3030a8 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Wed, 24 Jun 2026 21:19:44 +0800 Subject: [PATCH 6/7] update arc_challenge score Signed-off-by: ZX-ModelCloud --- tests/models/test_ministral3.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/models/test_ministral3.py b/tests/models/test_ministral3.py index b5f6d696e..e8aa14249 100644 --- a/tests/models/test_ministral3.py +++ b/tests/models/test_ministral3.py @@ -7,9 +7,9 @@ class TestMinistral3(ModelTest): - NATIVE_MODEL_ID = "/monster/data/model/Ministral-3-3B-Instruct-2512-TextOnly" # "Aratako/Ministral-3-3B-Instruct-2512-TextOnly" - NATIVE_ARC_CHALLENGE_ACC = 0.4974 - NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5256 + NATIVE_MODEL_ID = "/monster/data/model/Ministral-3-3B-Instruct-2512-TextOnly-BF16" # "Aratako/Ministral-3-3B-Instruct-2512-TextOnly" + NATIVE_ARC_CHALLENGE_ACC = 0.5870 + NATIVE_ARC_CHALLENGE_ACC_NORM = 0.6032 NATIVE_ARC_CHALLENGE_ACC_SLOW = NATIVE_ARC_CHALLENGE_ACC NATIVE_ARC_CHALLENGE_ACC_NORM_SLOW = NATIVE_ARC_CHALLENGE_ACC_NORM NATIVE_ARC_CHALLENGE_ACC_FAST = NATIVE_ARC_CHALLENGE_ACC_SLOW From 0a3c388968507e1709ccd7f5da1ccad10d3534db Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Wed, 24 Jun 2026 22:26:29 +0800 Subject: [PATCH 7/7] update arc_challenge score Signed-off-by: ZX-ModelCloud --- tests/models/test_hy_v3.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tests/models/test_hy_v3.py b/tests/models/test_hy_v3.py index 4e7cabbfe..3e08fe5dc 100644 --- a/tests/models/test_hy_v3.py +++ b/tests/models/test_hy_v3.py @@ -11,15 +11,8 @@ class TestHyV3(ModelTest): EVAL_TASKS_SLOW = { "arc_challenge": { "chat_template": True, - "acc": {"value": 0.3182, "floor_pct": 0.36}, - "acc_norm": {"value": 0.3472, "floor_pct": 0.36}, - }, - "mmlu_stem": { - "chat_template": False, - "acc": { - "value": 0.4024, - "floor_pct": 0.04, - }, + "acc": {"value": 0.5324, "floor_pct": 0.04}, + "acc_norm": {"value": 0.5341, "floor_pct": 0.04}, }, } EVAL_TASKS_FAST = ModelTest.derive_fast_eval_tasks(EVAL_TASKS_SLOW)