diff --git a/README.md b/README.md index 6c1275928..c597da528 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ ## Latest News +* 06/30/2026 7.1.0-dev `main`: ✨ Added `minimax_m3_vl` / MiniMax M3 model support * 06/25/2026 7.1.0-dev `main`: ✨ Added `cohere2_moe` model support * 05/25/2026 7.1.0-dev `main`: ✨ Added `hy_3` and `ministral3` model support * 05/25/2026 7.1.0-dev `main`: ✨ Added `hunyuan_v1_dense` and `hunyuan_v1_moe` model support @@ -266,7 +267,7 @@ Selected public references where teams or companies explicitly mention GPT-QMode | Dream | ✅ | GRIN-MoE | ✅ | Instella | ✅ | Phi 1-4 | ✅ | Voxtral | ✅ | | ERNIE 4.5 / MoE / VL MoE | ✅ | GLM 4/4V/4.5V/4.6V/5/5.1/OCR/ASR | ✅ | GLM4 MoE / Lite / 4.5V MoE | ✅ | MiniCPM 3/O/V/V 4_6 | ✅ | PanGu-α | ✅ | | XVERSE | ✅ | Brumby | ✅ | Hymba | ✅ | Mistral | ✅ | Qwen 1/2/3/3.5 | ✅ | -| MiniMax M2 | ✅ | AfMoE | ✅ | Bailing-MoE | ✅ | LFM2-MoE | ✅ | Marin | ✅ | +| MiniMax M2/M3 | ✅ | AfMoE | ✅ | Bailing-MoE | ✅ | LFM2-MoE | ✅ | Marin | ✅ | | InternVL Chat | ✅ | Laguna | ✅ | Mimo / Mimo V2 | ✅ | Zamba / Zamba2 | ✅ | Intern S1 | ✅ | | HunYuan V1 Dense / MoE | ✅ | HY-V3 | ✅ | | | | | | | diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py index 58be756de..f2103551f 100644 --- a/gptqmodel/models/auto.py +++ b/gptqmodel/models/auto.py @@ -142,6 +142,7 @@ from .definitions.minicpmv import MiniCPMVQModel # noqa: E402 from .definitions.minicpmv_4_6 import MiniCPMV4_6QModel # noqa: E402 from .definitions.minimax_m2 import MiniMaxM2GPTQ # noqa: E402 +from .definitions.minimax_m3_vl import MiniMaxM3VLGPTQ # noqa: E402 from .definitions.ministral3 import Ministral3GPTQ # noqa: E402 from .definitions.mistral3 import Mistral3GPTQ from .definitions.mixtral import MixtralQModel # noqa: E402 @@ -277,6 +278,7 @@ "minicpmv4_6": MiniCPMV4_6QModel, "minimax": MiniMaxM2GPTQ, "minimax_m2": MiniMaxM2GPTQ, + "minimax_m3_vl": MiniMaxM3VLGPTQ, "ministral3": Ministral3GPTQ, "qwen2_moe": Qwen2MoeQModel, "qwen3_moe": Qwen3MoeQModel, diff --git a/gptqmodel/models/definitions/__init__.py b/gptqmodel/models/definitions/__init__.py index e7bd5d9cf..26490e4e8 100644 --- a/gptqmodel/models/definitions/__init__.py +++ b/gptqmodel/models/definitions/__init__.py @@ -59,6 +59,7 @@ from .minicpmv import MiniCPMVQModel from .minicpmv_4_6 import MiniCPMV4_6QModel from .minimax_m2 import MiniMaxM2GPTQ +from .minimax_m3_vl import MiniMaxM3VLGPTQ from .ministral3 import Ministral3GPTQ from .mimo_v2 import MimoV2QModel from .mixtral import MixtralQModel diff --git a/gptqmodel/models/definitions/minimax_m3_vl.py b/gptqmodel/models/definitions/minimax_m3_vl.py new file mode 100644 index 000000000..d66e1fee6 --- /dev/null +++ b/gptqmodel/models/definitions/minimax_m3_vl.py @@ -0,0 +1,59 @@ +# SPDX-FileCopyrightText: 2026 ModelCloud.ai +# SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium + +from transformers import AutoModelForImageTextToText + +from ..base import BaseQModel +from ..moe_lifecycle import GateUpDownMoELifecycleHooks + +class MiniMaxM3VLGPTQ(BaseQModel): + loader = AutoModelForImageTextToText + require_load_processor = False + support_batch_quantize = False + + pre_lm_head_norm_module = "model.language_model.norm" + rotary_embedding = "model.language_model.rotary_emb" + + # MiniMax-M3 starts with dense MLP layers, then switches to sparse MoE. + layer_modules_strict = False + dynamic_expert_index = "num_local_experts" + + # Defuser splits MiniMax-M3 packed expert tensors into gate/up/down modules. + moe_lifecycle_hooks = GateUpDownMoELifecycleHooks() + + module_tree = [ + "model", + "language_model", + "layers", + "#", + { + "input_layernorm": ("input_layernorm:!",), + "self_attn": ( + "q_proj:0", + "q_norm:0:!", + "k_proj:0", + "k_norm:0:!", + "v_proj:0", + "indexer.q_proj:0", + "indexer.q_norm:0:!", + "indexer.k_proj:0", + "indexer.k_norm:0:!", + "o_proj:1", + ), + "post_attention_layernorm": ("post_attention_layernorm:!",), + "mlp:moe": { + # Dense fallback used by early decoder blocks. + "": ("gate_up_proj:0", "down_proj:1"), + "gate": ("gate:!", "e_score_correction_bias:!"), + "shared_experts": ("gate_up_proj:0", "down_proj:1"), + "experts": { + "#": ("gate_proj:0", "up_proj:0", "down_proj:1"), + }, + }, + }, + ] + + +__all__ = ["MiniMaxM3VLGPTQ"] diff --git a/gptqmodel/models/loader.py b/gptqmodel/models/loader.py index 45233949c..eaac2fffa 100644 --- a/gptqmodel/models/loader.py +++ b/gptqmodel/models/loader.py @@ -545,6 +545,7 @@ def from_pretrained( model_init_kwargs_without_internal["trust_remote_code"] = trust_remote_code config = AutoConfig.from_pretrained(model_local_path, **model_init_kwargs_without_internal, **hf_gguf_load_kwargs) + print("config", config) defuser.replace_fused_blocks(config.model_type) @@ -721,6 +722,8 @@ def skip(*args, **kwargs): if quantize_config.offload_to_disk: shell_config = copy.deepcopy(config) + print("shell_config", shell_config) + print("shell_config", shell_config.vision_config) try: model = build_shell_model(cls.loader, config=shell_config, **model_init_kwargs_without_internal) except RuntimeError as exc: diff --git a/gptqmodel/utils/structure.py b/gptqmodel/utils/structure.py index 5b68e61cd..00015fd20 100644 --- a/gptqmodel/utils/structure.py +++ b/gptqmodel/utils/structure.py @@ -1855,6 +1855,57 @@ def _resolve_converter_tensor_source( return None, None, None, None + def _resolve_concat_checkpoint_tensor_sources( + self, + module_path: str, + rel_name: str, + ) -> Optional[tuple[list[str], int]]: + """Resolve one fused runtime tensor backed by several split checkpoint tensors.""" + + combined_name = self._join_tensor_name(module_path, rel_name) + + for converter in self._runtime_to_checkpoint_converters: + if "Concatenate" not in converter.operation_names: + continue + if len(converter.source_patterns) != 1 or len(converter.target_patterns) < 2: + continue + + runtime_pattern = converter.source_patterns[0] + if _LazyWeightRenaming(runtime_pattern, runtime_pattern).rename_source_key(combined_name)[1] is None: + continue + + checkpoint_names = [] + for checkpoint_pattern in converter.target_patterns: + renamed, matched_pattern = _LazyWeightRenaming( + runtime_pattern, + checkpoint_pattern, + ).rename_source_key(combined_name) + if matched_pattern is None or renamed == combined_name: + checkpoint_names = [] + break + + resolved_name = None + for candidate in self._runtime_to_checkpoint_alias_candidates(renamed): + if candidate in self._weight_map: + resolved_name = candidate + break + if resolved_name is None: + checkpoint_names = [] + break + checkpoint_names.append(resolved_name) + + if len(checkpoint_names) != len(converter.target_patterns): + continue + + concat_dim = 0 + for operation in converter.operations: + if type(operation).__name__ == "Concatenate": + concat_dim = getattr(operation, "dim", 0) + break + return checkpoint_names, concat_dim + + return None + def _resolve_direct_checkpoint_tensor_source( self, module_path: str, @@ -2130,7 +2181,14 @@ def _copy_checkpoint_tensors_into_submodule( missing_nonpersistent_buffers: list[tuple[str, str]] = [] grouped_names: Dict[str, list[tuple[str, str, str, Optional[int], Optional[int], Optional[int]]]] = {} + concat_entries: list[tuple[str, str, list[str], int]] = [] for rel_name in t_params: + concat_source = self._resolve_concat_checkpoint_tensor_sources(module_path, rel_name) + if concat_source is not None: + full_names, concat_dim = concat_source + concat_entries.append(("param", rel_name, full_names, concat_dim)) + continue + full_name, expert_index, split_index, split_dim = self._resolve_checkpoint_tensor_source(module_path, rel_name) if full_name is None: continue @@ -2153,6 +2211,12 @@ def _copy_checkpoint_tensors_into_submodule( grouped_names.setdefault(shard, []).append(("param", rel_name, full_name, expert_index, split_index, split_dim)) for rel_name, target_buffer in list(t_bufs.items()): + concat_source = self._resolve_concat_checkpoint_tensor_sources(module_path, rel_name) + if concat_source is not None: + full_names, concat_dim = concat_source + concat_entries.append(("buffer", rel_name, full_names, concat_dim)) + continue + full_name, expert_index, split_index, split_dim = self._resolve_checkpoint_tensor_source(module_path, rel_name) if full_name is None: full_name = self._resolve_checkpoint_tensor_name(module_path, rel_name) @@ -2177,7 +2241,7 @@ def _copy_checkpoint_tensors_into_submodule( continue grouped_names.setdefault(shard, []).append(("buffer", rel_name, full_name, expert_index, split_index, split_dim)) - total_entries = sum(len(entries) for entries in grouped_names.values()) + total_entries = sum(len(entries) for entries in grouped_names.values()) + len(concat_entries) progress = None loaded_entries = 0 if total_entries: @@ -2189,6 +2253,112 @@ def _copy_checkpoint_tensors_into_submodule( try: with torch.inference_mode(): + for kind, rel_name, full_names, concat_dim in concat_entries: + if progress is not None: + progress.current_iter_step = loaded_entries + progress.subtitle(f"{rel_name}: {loaded_entries + 1}/{total_entries}") + progress.draw() + + target_tensor = t_params.get(rel_name) if kind == "param" else t_bufs.get(rel_name) + expected_shape = tuple(target_tensor.shape) if target_tensor is not None else None + parts = [] + for full_name in full_names: + shard = self._weight_map.get(full_name) + if shard is None: + raise RuntimeError( + self._materialization_issue_message( + phase="submodule materialization", + kind=kind, + module_path=module_path, + rel_name=rel_name, + reason="checkpoint tensor mapping resolved to a missing shard", + full_name=full_name, + target_shape=expected_shape, + ) + ) + shard_path = os.path.join(self.model_local_path, shard) + with safe_open(shard_path, framework="pt", device="cpu") as handler: + parts.append(handler.get_tensor(full_name)) + + try: + tensor = torch.cat(parts, dim=concat_dim).contiguous() + except Exception as exc: + raise RuntimeError( + self._materialization_issue_message( + phase="submodule materialization", + kind=kind, + module_path=module_path, + rel_name=rel_name, + reason=f"checkpoint tensors could not be concatenated: {exc}", + full_name=", ".join(full_names), + target_shape=expected_shape, + ) + ) from exc + + if expected_shape is not None and tuple(tensor.shape) != expected_shape: + raise RuntimeError( + self._materialization_issue_message( + phase="submodule materialization", + kind=kind, + module_path=module_path, + rel_name=rel_name, + reason="concatenated checkpoint tensor shape does not match target tensor", + full_name=", ".join(full_names), + source_shape=tuple(tensor.shape), + target_shape=expected_shape, + ) + ) + + if kind == "param": + target_param = t_params.get(rel_name) + if target_param is None: + raise RuntimeError( + self._materialization_issue_message( + phase="submodule materialization", + kind=kind, + module_path=module_path, + rel_name=rel_name, + reason="target tensor disappeared before materialization", + full_name=", ".join(full_names), + source_shape=tuple(tensor.shape), + ) + ) + target_param_new = _ensure_target_storage_on_device_(target_param, device) + if target_param_new is not target_param: + t_parent, leaf = _get_parent_and_leaf_by_path(target_submodule, rel_name) + setattr(t_parent, leaf, target_param_new) + target_param = target_param_new + source = tensor.detach() + if source.dtype != target_param.dtype: + source = source.to(dtype=target_param.dtype) + target_param.detach().copy_(source, non_blocking=(non_blocking and source.is_pinned())) + else: + target_buffer = t_bufs.get(rel_name) + t_parent, leaf = _get_parent_and_leaf_by_path(target_submodule, rel_name) + persistent = leaf not in getattr(t_parent, "_non_persistent_buffers_set", set()) + source = tensor.detach() + if target_buffer is None: + new_buffer = source.to(device=device) + t_parent.register_buffer(leaf, new_buffer, persistent=persistent) + t_bufs[rel_name] = new_buffer + elif getattr(target_buffer, "is_meta", False) or target_buffer.device.type == "meta": + new_buffer = torch.empty_like(target_buffer, device=device) + new_buffer.copy_( + source.to(dtype=new_buffer.dtype), + non_blocking=(non_blocking and source.is_pinned()), + ) + t_parent.register_buffer(leaf, new_buffer, persistent=persistent) + t_bufs[rel_name] = new_buffer + else: + if source.dtype != target_buffer.dtype: + source = source.to(dtype=target_buffer.dtype) + target_buffer.copy_(source, non_blocking=(non_blocking and source.is_pinned())) + + loaded_entries += 1 + if progress is not None: + progress.current_iter_step = loaded_entries + progress.draw() + for shard, entries in grouped_names.items(): shard_path = os.path.join(self.model_local_path, shard) with safe_open(shard_path, framework="pt", device="cpu") as handler: @@ -2457,6 +2627,66 @@ def _materialize_direct_meta_tensors( if not _is_meta_tensor(shell_param): continue + concat_source = self._resolve_concat_checkpoint_tensor_sources(module_path, name) + if concat_source is not None: + full_names, concat_dim = concat_source + parts = [] + for full_name in full_names: + shard = self._weight_map.get(full_name) + if shard is None: + raise RuntimeError(self._materialization_issue_message( + phase="direct-meta sync", + kind="param", + module_path=module_path, + rel_name=name, + reason="checkpoint tensor mapping resolved to a missing shard", + full_name=full_name, + target_shape=tuple(shell_param.shape), + )) + source_path = os.path.join(self.model_local_path, shard) + with safe_open(source_path, framework="pt", device="cpu") as handler: + parts.append(handler.get_tensor(full_name)) + + try: + source_param = torch.cat(parts, dim=concat_dim).contiguous() + except Exception as exc: + raise RuntimeError(self._materialization_issue_message( + phase="direct-meta sync", + kind="param", + module_path=module_path, + rel_name=name, + reason=f"checkpoint tensors could not be concatenated: {exc}", + full_name=", ".join(full_names), + target_shape=tuple(shell_param.shape), + )) from exc + + if shell_param.shape != source_param.shape: + raise RuntimeError(self._materialization_issue_message( + phase="direct-meta sync", + kind="param", + module_path=module_path, + rel_name=name, + reason="concatenated checkpoint tensor shape does not match target tensor", + full_name=", ".join(full_names), + source_shape=tuple(source_param.shape), + target_shape=tuple(shell_param.shape), + )) + + cache_key = (",".join(full_names), None, None, concat_dim, shell_param.dtype, shell_param.requires_grad) + new_param = param_cache.get(cache_key) + if new_param is None: + if source_param.dtype != shell_param.dtype: + source_param = source_param.to(dtype=shell_param.dtype) + new_param = nn.Parameter( + source_param.clone(), + requires_grad=shell_param.requires_grad, + ) + param_cache[cache_key] = new_param + + shell_sub.register_parameter(name, new_param) + synced += 1 + continue + full_name, expert_index, split_index, split_dim = self._resolve_checkpoint_tensor_source(module_path, name) if full_name is None: continue @@ -2534,6 +2764,64 @@ def _materialize_direct_meta_tensors( if not _is_meta_tensor(shell_buffer): continue + concat_source = self._resolve_concat_checkpoint_tensor_sources(module_path, name) + if concat_source is not None: + full_names, concat_dim = concat_source + parts = [] + for full_name in full_names: + shard = self._weight_map.get(full_name) + if shard is None: + raise RuntimeError(self._materialization_issue_message( + phase="direct-meta sync", + kind="buffer", + module_path=module_path, + rel_name=name, + reason="checkpoint tensor mapping resolved to a missing shard", + full_name=full_name, + target_shape=tuple(shell_buffer.shape), + )) + source_path = os.path.join(self.model_local_path, shard) + with safe_open(source_path, framework="pt", device="cpu") as handler: + parts.append(handler.get_tensor(full_name)) + + try: + source_buffer = torch.cat(parts, dim=concat_dim).contiguous() + except Exception as exc: + raise RuntimeError(self._materialization_issue_message( + phase="direct-meta sync", + kind="buffer", + module_path=module_path, + rel_name=name, + reason=f"checkpoint tensors could not be concatenated: {exc}", + full_name=", ".join(full_names), + target_shape=tuple(shell_buffer.shape), + )) from exc + + if shell_buffer.shape != source_buffer.shape: + raise RuntimeError(self._materialization_issue_message( + phase="direct-meta sync", + kind="buffer", + module_path=module_path, + rel_name=name, + reason="concatenated checkpoint tensor shape does not match target tensor", + full_name=", ".join(full_names), + source_shape=tuple(source_buffer.shape), + target_shape=tuple(shell_buffer.shape), + )) + + persistent = name not in getattr(shell_sub, "_non_persistent_buffers_set", set()) + cache_key = (",".join(full_names), None, None, concat_dim, shell_buffer.dtype) + new_buffer = buffer_cache.get(cache_key) + if new_buffer is None: + if source_buffer.dtype != shell_buffer.dtype: + source_buffer = source_buffer.to(dtype=shell_buffer.dtype) + new_buffer = source_buffer.clone() + buffer_cache[cache_key] = new_buffer + + shell_sub.register_buffer(name, new_buffer, persistent=persistent) + synced += 1 + continue + full_name, expert_index, split_index, split_dim = self._resolve_checkpoint_tensor_source(module_path, name) if full_name is None: continue diff --git a/tests/models/test_minimax_m3.py b/tests/models/test_minimax_m3.py new file mode 100644 index 000000000..3efa51dc9 --- /dev/null +++ b/tests/models/test_minimax_m3.py @@ -0,0 +1,28 @@ +# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai +# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium +from gptqmodel import BACKEND +from model_test import ModelTest + + +class TestMinimaxM3(ModelTest): + + NATIVE_MODEL_ID = "/monster/data/model/MiniMax-M3" + USE_FLASH_ATTN = False + TRUST_REMOTE_CODE = False + DELETE_QUANTIZED_MODEL = False + DATASET_SIZE = 1024 + GROUP_SIZE = 32 + EVAL_TASKS_SLOW = { + "arc_challenge": { + "acc": {"value": 0.5026, "floor_pct": 0.04}, + "acc_norm": {"value": 0.5171, "floor_pct": 0.04}, + }, + } + EVAL_BATCH_SIZE = 1 + EVAL_TASKS_FAST = ModelTest.derive_fast_eval_tasks(EVAL_TASKS_SLOW) + LOAD_BACKEND = BACKEND.AUTO + + def test_minimax_m3(self): + self.quantize_and_evaluate() diff --git a/tests/test_lazy_turtle_conversion_mapping.py b/tests/test_lazy_turtle_conversion_mapping.py index c003283c5..49b5d715c 100644 --- a/tests/test_lazy_turtle_conversion_mapping.py +++ b/tests/test_lazy_turtle_conversion_mapping.py @@ -124,6 +124,50 @@ def __init__(self): self.mlp = _MlpShell() +class _FusedDenseMlpShell(nn.Module): + def __init__(self, hidden_dim: int = 4, intermediate_dim: int = 3): + super().__init__() + self.gate_up_proj = nn.Linear(hidden_dim, 2 * intermediate_dim, bias=False, device="meta") + + +class _FusedDenseLayerShell(nn.Module): + def __init__(self): + super().__init__() + self.mlp = _FusedDenseMlpShell() + + +class _MiniMaxM3Shell(nn.Module): + def __init__(self): + super().__init__() + self.config = SimpleNamespace(model_type="minimax_m3_vl") + self.model = nn.Module() + self.model.language_model = nn.Module() + self.model.language_model.layers = nn.ModuleList([_FusedDenseLayerShell()]) + + +class _MiniMaxM3SharedExpertsMlpShell(nn.Module): + def __init__(self, hidden_dim: int = 4, intermediate_dim: int = 3): + super().__init__() + self.shared_experts = nn.Module() + self.shared_experts.gate_up_proj = nn.Linear(hidden_dim, 2 * intermediate_dim, bias=False, device="meta") + + +class _MiniMaxM3SharedExpertsLayerShell(nn.Module): + def __init__(self): + super().__init__() + self.mlp = _MiniMaxM3SharedExpertsMlpShell() + + +class _MiniMaxM3SharedExpertsShell(nn.Module): + def __init__(self): + super().__init__() + self.config = SimpleNamespace(model_type="minimax_m3_vl") + self.model = nn.Module() + self.model.language_model = nn.Module() + self.model.language_model.layers = nn.ModuleList([nn.Identity() for _ in range(4)]) + self.model.language_model.layers.append(_MiniMaxM3SharedExpertsLayerShell()) + + class _DeepseekV4Shell(nn.Module): base_model_prefix = "model" @@ -970,6 +1014,137 @@ def test_lazy_turtle_materializes_defused_deepseek_v4_expert_linears_from_w123_a assert torch.equal(expert.down_proj.weight, checkpoint_tensors["model.layers.0.mlp.experts.0.w2.weight"]) +def test_lazy_turtle_materializes_fused_dense_mlp_from_split_gate_up_checkpoint(tmp_path): + reversed_map = LazyTurtle.reverse_hf_conversion_map( + [ + _WeightRenamingStub( + r"^language_model\.model\.", + r"model.language_model.", + ), + _WeightConverterStub( + source_patterns=[ + "mlp.gate_proj.weight", + "mlp.up_proj.weight", + ], + target_patterns="mlp.gate_up_proj.weight", + operations=[Concatenate(dim=0)], + ), + ] + ) + assert reversed_map is not None + + gate = torch.arange(12, dtype=torch.float32).reshape(3, 4) + up = torch.arange(12, 24, dtype=torch.float32).reshape(3, 4) + turtle = _build_lazy_turtle( + tmp_path, + { + "language_model.model.layers.0.mlp.gate_proj.weight": gate, + "language_model.model.layers.0.mlp.up_proj.weight": up, + }, + hf_conversion_map_reversed=reversed_map, + ) + + shell = _MiniMaxM3Shell() + target_submodule = shell.model.language_model.layers[0] + turtle._copy_checkpoint_tensors_into_submodule( + target_model=shell, + target_submodule=target_submodule, + module_path="model.language_model.layers.0", + device=torch.device("cpu"), + recurse=True, + non_blocking=False, + ) + + weight = target_submodule.mlp.gate_up_proj.weight + assert weight.device.type != "meta" + assert torch.equal(weight, torch.cat([gate, up], dim=0)) + + +def test_lazy_turtle_sync_all_meta_materializes_fused_dense_mlp_from_split_gate_up_checkpoint(tmp_path): + reversed_map = LazyTurtle.reverse_hf_conversion_map( + [ + _WeightRenamingStub( + r"^language_model\.model\.", + r"model.language_model.", + ), + _WeightConverterStub( + source_patterns=[ + "mlp.gate_proj.weight", + "mlp.up_proj.weight", + ], + target_patterns="mlp.gate_up_proj.weight", + operations=[Concatenate(dim=0)], + ), + ] + ) + assert reversed_map is not None + + gate = torch.arange(12, dtype=torch.float32).reshape(3, 4) + up = torch.arange(12, 24, dtype=torch.float32).reshape(3, 4) + turtle = _build_lazy_turtle( + tmp_path, + { + "language_model.model.layers.0.mlp.gate_proj.weight": gate, + "language_model.model.layers.0.mlp.up_proj.weight": up, + }, + hf_conversion_map_reversed=reversed_map, + ) + + shell = _MiniMaxM3Shell() + materialized = turtle.sync_all_meta(shell_model=shell, tie_after=False) + + weight = shell.model.language_model.layers[0].mlp.gate_up_proj.weight + assert materialized == 1 + assert weight.device.type != "meta" + assert torch.equal(weight, torch.cat([gate, up], dim=0)) + + +def test_lazy_turtle_direct_meta_sync_materializes_minimax_m3_shared_expert_gate_up(tmp_path): + reversed_map = LazyTurtle.reverse_hf_conversion_map( + [ + _WeightRenamingStub( + r"^language_model\.model\.", + r"model.language_model.", + ), + _WeightRenamingStub( + r"\.block_sparse_moe\.shared_experts\.", + r".mlp.shared_experts.", + ), + _WeightConverterStub( + source_patterns=[ + "mlp.shared_experts.gate_proj.weight", + "mlp.shared_experts.up_proj.weight", + ], + target_patterns="mlp.shared_experts.gate_up_proj.weight", + operations=[Concatenate(dim=0)], + ), + ] + ) + assert reversed_map is not None + + gate = torch.arange(12, dtype=torch.float32).reshape(3, 4) + up = torch.arange(12, 24, dtype=torch.float32).reshape(3, 4) + turtle = _build_lazy_turtle( + tmp_path, + { + "language_model.model.layers.4.block_sparse_moe.shared_experts.gate_proj.weight": gate, + "language_model.model.layers.4.block_sparse_moe.shared_experts.up_proj.weight": up, + }, + hf_conversion_map_reversed=reversed_map, + ) + + shell = _MiniMaxM3SharedExpertsShell() + leaf = shell.model.language_model.layers[4].mlp.shared_experts.gate_up_proj + turtle.materialize_direct_meta_tensors( + target_model=shell, + target_submodule=leaf, + device=torch.device("cpu"), + ) + + assert leaf.weight.device.type != "meta" + assert torch.equal(leaf.weight, torch.cat([gate, up], dim=0)) + + def test_lazy_turtle_falls_back_to_legacy_checkpoint_conversion_mapping(tmp_path, monkeypatch): def _raise_import_error(_name: str): raise ImportError("transformers.conversion_mapping is unavailable") diff --git a/tests/test_minimax_m3_support.py b/tests/test_minimax_m3_support.py new file mode 100644 index 000000000..d7f724a35 --- /dev/null +++ b/tests/test_minimax_m3_support.py @@ -0,0 +1,146 @@ +# SPDX-FileCopyrightText: 2026 ModelCloud.ai +# SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium + +from pathlib import Path +from types import SimpleNamespace + +import pytest +from torch import nn +from transformers import AutoConfig, AutoModelForImageTextToText + +from gptqmodel.models import auto +from gptqmodel.models.definitions.minimax_m3_vl import MiniMaxM3VLGPTQ + + +MODEL_PATH = Path("/monster/data/model/MiniMax-M3") + + +def test_minimax_m3_model_type_selects_definition(monkeypatch): + fake_config = SimpleNamespace(model_type="minimax_m3_vl") + + monkeypatch.setattr(auto, "resolve_trust_remote_code", lambda path, trust_remote_code=False: trust_remote_code) + monkeypatch.setattr(auto.AutoConfig, "from_pretrained", lambda *args, **kwargs: fake_config) + + assert auto.check_and_get_model_definition("/tmp/minimax-m3") is MiniMaxM3VLGPTQ + + +@pytest.mark.skipif(not MODEL_PATH.exists(), reason="MiniMax-M3 model not found") +def test_minimax_m3_local_config_selects_definition(): + config = AutoConfig.from_pretrained(MODEL_PATH, trust_remote_code=False) + + assert config.model_type == "minimax_m3_vl" + assert auto.check_and_get_model_definition(MODEL_PATH) is MiniMaxM3VLGPTQ + + +def test_minimax_m3_module_tree_covers_dense_sparse_and_indexer_paths(): + layer_modules = MiniMaxM3VLGPTQ.simple_layer_modules( + model_config=SimpleNamespace(text_config=SimpleNamespace(num_local_experts=2)), + quantize_config=SimpleNamespace(dynamic=None), + ) + flat_modules = {name for block in layer_modules for name in block} + + assert MiniMaxM3VLGPTQ.layer_modules_strict is False + assert MiniMaxM3VLGPTQ.require_load_processor is False + assert MiniMaxM3VLGPTQ.pre_lm_head_norm_module == "model.language_model.norm" + assert MiniMaxM3VLGPTQ.rotary_embedding == "model.language_model.rotary_emb" + + assert "self_attn.q_proj" in flat_modules + assert "self_attn.indexer.q_proj" in flat_modules + assert "self_attn.indexer.k_proj" in flat_modules + assert "self_attn.o_proj" in flat_modules + assert "mlp.gate_up_proj" in flat_modules + assert "mlp.down_proj" in flat_modules + assert "mlp.shared_experts.gate_up_proj" in flat_modules + assert "mlp.shared_experts.down_proj" in flat_modules + assert "mlp.experts.0.gate_proj" in flat_modules + assert "mlp.experts.1.up_proj" in flat_modules + assert "mlp.experts.0.down_proj" in flat_modules + assert not any("block_sparse_moe" in name for name in flat_modules) + + +def test_minimax_m3_multimodal_base_modules_include_non_language_children(): + class _LanguageModel(nn.Module): + def __init__(self): + super().__init__() + self.layers = nn.ModuleList([nn.Identity()]) + self.embed_tokens = nn.Embedding(4, 4) + self.norm = nn.LayerNorm(4) + self.rotary_emb = nn.Identity() + + class _MiniMaxM3Core(nn.Module): + def __init__(self): + super().__init__() + self.language_model = _LanguageModel() + self.vision_tower = nn.Identity() + self.multi_modal_projector = nn.Identity() + + class _MiniMaxM3Wrapper(nn.Module): + def __init__(self): + super().__init__() + self.model = _MiniMaxM3Core() + self.lm_head = nn.Linear(4, 4, bias=False) + + base_modules = set(MiniMaxM3VLGPTQ.get_base_modules(_MiniMaxM3Wrapper())) + + assert MiniMaxM3VLGPTQ.extract_layers_node() == ["model.language_model.layers"] + assert "model.vision_tower" in base_modules + assert "model.multi_modal_projector" in base_modules + assert "model.language_model.embed_tokens" in base_modules + assert "model.language_model.norm" in base_modules + assert "model.language_model.rotary_emb" in base_modules + + +@pytest.mark.skipif(not MODEL_PATH.exists(), reason="MiniMax-M3 model not found") +def test_minimax_m3_defuser_splits_native_packed_experts(): + from defuser import convert_model + from defuser.model_registry import MODEL_CONFIG + + config = _tiny_minimax_m3_config() + model = AutoModelForImageTextToText.from_config(config, trust_remote_code=False) + + experts = model.model.language_model.layers[3].mlp.experts + assert "minimax_m3_vl" in MODEL_CONFIG + assert hasattr(experts, "gate_up_proj") + assert hasattr(experts, "down_proj") + + assert convert_model(model, cleanup_original=False) is True + + experts = model.model.language_model.layers[3].mlp.experts + expert0 = getattr(experts, "0") + assert not hasattr(experts, "gate_up_proj") + assert hasattr(expert0, "gate_proj") + assert hasattr(expert0, "up_proj") + assert hasattr(expert0, "down_proj") + + +def _tiny_minimax_m3_config(): + config = AutoConfig.from_pretrained(MODEL_PATH, trust_remote_code=False) + text_config = config.text_config + text_config.hidden_size = 16 + text_config.intermediate_size = 8 + text_config.dense_intermediate_size = 32 + text_config.shared_intermediate_size = 8 + text_config.num_hidden_layers = 4 + text_config.num_attention_heads = 2 + text_config.num_key_value_heads = 1 + text_config.head_dim = 8 + text_config.num_local_experts = 2 + text_config.num_experts_per_tok = 1 + text_config.n_shared_experts = 1 + text_config.moe_layer_freq = [0, 0, 0, 1] + text_config.vocab_size = 128 + config.vocab_size = 128 + + vision_config = config.vision_config + vision_config.hidden_size = 8 + vision_config.intermediate_size = 16 + vision_config.num_hidden_layers = 1 + vision_config.num_attention_heads = 2 + vision_config.image_size = 16 + vision_config.patch_size = 8 + if hasattr(vision_config, "spatial_merge_size"): + vision_config.spatial_merge_size = 1 + + return config