ModelCloud · Qubitium · Jun 30, 2026 · Jun 26, 2026 · Jun 30, 2026 · Jun 30, 2026
diff --git a/README.md b/README.md
@@ -21,6 +21,7 @@
 
 ## Latest News
 
+* 06/30/2026 7.1.0-dev `main`: ✨ Added `minimax_m3_vl` / MiniMax M3 model support
 * 06/25/2026 7.1.0-dev `main`: ✨ Added `cohere2_moe` model support
 * 05/25/2026 7.1.0-dev `main`: ✨ Added `hy_3` and `ministral3` model support
 * 05/25/2026 7.1.0-dev `main`: ✨ Added `hunyuan_v1_dense` and `hunyuan_v1_moe` model support
@@ -266,7 +267,7 @@ Selected public references where teams or companies explicitly mention GPT-QMode
 | Dream                    | ✅ | GRIN-MoE                        | ✅ | Instella         | ✅ | Phi 1-4                         | ✅ | Voxtral                | ✅ |
 | ERNIE 4.5 / MoE / VL MoE | ✅ | GLM 4/4V/4.5V/4.6V/5/5.1/OCR/ASR | ✅ | GLM4 MoE / Lite / 4.5V MoE | ✅ | MiniCPM 3/O/V/V 4_6             | ✅ | PanGu-α                | ✅ |
 | XVERSE                   | ✅ | Brumby                          | ✅ | Hymba            | ✅ | Mistral                         | ✅ | Qwen 1/2/3/3.5         | ✅ |
-| MiniMax M2               | ✅ | AfMoE                           | ✅ | Bailing-MoE      | ✅ | LFM2-MoE                        | ✅ | Marin                  | ✅ |
+| MiniMax M2/M3            | ✅ | AfMoE                           | ✅ | Bailing-MoE      | ✅ | LFM2-MoE                        | ✅ | Marin                  | ✅ |
 | InternVL Chat            | ✅ | Laguna                          | ✅ | Mimo / Mimo V2   | ✅ | Zamba / Zamba2                  | ✅ | Intern S1              | ✅ |
 | HunYuan V1 Dense / MoE   | ✅ | HY-V3                           | ✅ |    |  |                  |  |                |   |
 

diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py
@@ -142,6 +142,7 @@
 from .definitions.minicpmv import MiniCPMVQModel  # noqa: E402
 from .definitions.minicpmv_4_6 import MiniCPMV4_6QModel  # noqa: E402
 from .definitions.minimax_m2 import MiniMaxM2GPTQ  # noqa: E402
+from .definitions.minimax_m3_vl import MiniMaxM3VLGPTQ  # noqa: E402
 from .definitions.ministral3 import Ministral3GPTQ  # noqa: E402
 from .definitions.mistral3 import Mistral3GPTQ
 from .definitions.mixtral import MixtralQModel  # noqa: E402
@@ -277,6 +278,7 @@
     "minicpmv4_6": MiniCPMV4_6QModel,
     "minimax": MiniMaxM2GPTQ,
     "minimax_m2": MiniMaxM2GPTQ,
+    "minimax_m3_vl": MiniMaxM3VLGPTQ,
     "ministral3": Ministral3GPTQ,
     "qwen2_moe": Qwen2MoeQModel,
     "qwen3_moe": Qwen3MoeQModel,

diff --git a/gptqmodel/models/definitions/__init__.py b/gptqmodel/models/definitions/__init__.py
@@ -59,6 +59,7 @@
 from .minicpmv import MiniCPMVQModel
 from .minicpmv_4_6 import MiniCPMV4_6QModel
 from .minimax_m2 import MiniMaxM2GPTQ
+from .minimax_m3_vl import MiniMaxM3VLGPTQ
 from .ministral3 import Ministral3GPTQ
 from .mimo_v2 import MimoV2QModel
 from .mixtral import MixtralQModel

diff --git a/gptqmodel/models/definitions/minimax_m3_vl.py b/gptqmodel/models/definitions/minimax_m3_vl.py
@@ -0,0 +1,59 @@
+# SPDX-FileCopyrightText: 2026 ModelCloud.ai
+# SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai
+# SPDX-License-Identifier: Apache-2.0
+# Contact: qubitium@modelcloud.ai, x.com/qubitium
+
+from transformers import AutoModelForImageTextToText
+
+from ..base import BaseQModel
+from ..moe_lifecycle import GateUpDownMoELifecycleHooks
+
+class MiniMaxM3VLGPTQ(BaseQModel):
+    loader = AutoModelForImageTextToText
+    require_load_processor = False
+    support_batch_quantize = False
+
+    pre_lm_head_norm_module = "model.language_model.norm"
+    rotary_embedding = "model.language_model.rotary_emb"
+
+    # MiniMax-M3 starts with dense MLP layers, then switches to sparse MoE.
+    layer_modules_strict = False
+    dynamic_expert_index = "num_local_experts"
+
+    # Defuser splits MiniMax-M3 packed expert tensors into gate/up/down modules.
+    moe_lifecycle_hooks = GateUpDownMoELifecycleHooks()
+
+    module_tree = [
+        "model",
+        "language_model",
+        "layers",
+        "#",
+        {
+            "input_layernorm": ("input_layernorm:!",),
+            "self_attn": (
+                "q_proj:0",
+                "q_norm:0:!",
+                "k_proj:0",
+                "k_norm:0:!",
+                "v_proj:0",
+                "indexer.q_proj:0",
+                "indexer.q_norm:0:!",
+                "indexer.k_proj:0",
+                "indexer.k_norm:0:!",
+                "o_proj:1",
+            ),
+            "post_attention_layernorm": ("post_attention_layernorm:!",),
+            "mlp:moe": {
+                # Dense fallback used by early decoder blocks.
+                "": ("gate_up_proj:0", "down_proj:1"),
+                "gate": ("gate:!", "e_score_correction_bias:!"),
+                "shared_experts": ("gate_up_proj:0", "down_proj:1"),
+                "experts": {
+                    "#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
+                },
+            },
+        },
+    ]
+
+
+__all__ = ["MiniMaxM3VLGPTQ"]
diff --git a/gptqmodel/models/loader.py b/gptqmodel/models/loader.py
@@ -545,6 +545,7 @@ def from_pretrained(
         model_init_kwargs_without_internal["trust_remote_code"] = trust_remote_code
 
         config = AutoConfig.from_pretrained(model_local_path, **model_init_kwargs_without_internal, **hf_gguf_load_kwargs)
+        print("config", config)
 
         defuser.replace_fused_blocks(config.model_type)
 
@@ -721,6 +722,8 @@ def skip(*args, **kwargs):
 
         if quantize_config.offload_to_disk:
             shell_config = copy.deepcopy(config)
+            print("shell_config", shell_config)
+            print("shell_config", shell_config.vision_config)
             try:
                 model = build_shell_model(cls.loader, config=shell_config, **model_init_kwargs_without_internal)
             except RuntimeError as exc: