Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

## Latest News

* 06/30/2026 7.1.0-dev `main`: ✨ Added `minimax_m3_vl` / MiniMax M3 model support
* 06/25/2026 7.1.0-dev `main`: ✨ Added `cohere2_moe` model support
* 05/25/2026 7.1.0-dev `main`: ✨ Added `hy_3` and `ministral3` model support
* 05/25/2026 7.1.0-dev `main`: ✨ Added `hunyuan_v1_dense` and `hunyuan_v1_moe` model support
Expand Down Expand Up @@ -266,7 +267,7 @@ Selected public references where teams or companies explicitly mention GPT-QMode
| Dream | ✅ | GRIN-MoE | ✅ | Instella | ✅ | Phi 1-4 | ✅ | Voxtral | ✅ |
| ERNIE 4.5 / MoE / VL MoE | ✅ | GLM 4/4V/4.5V/4.6V/5/5.1/OCR/ASR | ✅ | GLM4 MoE / Lite / 4.5V MoE | ✅ | MiniCPM 3/O/V/V 4_6 | ✅ | PanGu-α | ✅ |
| XVERSE | ✅ | Brumby | ✅ | Hymba | ✅ | Mistral | ✅ | Qwen 1/2/3/3.5 | ✅ |
| MiniMax M2 | ✅ | AfMoE | ✅ | Bailing-MoE | ✅ | LFM2-MoE | ✅ | Marin | ✅ |
| MiniMax M2/M3 | ✅ | AfMoE | ✅ | Bailing-MoE | ✅ | LFM2-MoE | ✅ | Marin | ✅ |
| InternVL Chat | ✅ | Laguna | ✅ | Mimo / Mimo V2 | ✅ | Zamba / Zamba2 | ✅ | Intern S1 | ✅ |
| HunYuan V1 Dense / MoE | ✅ | HY-V3 | ✅ | | | | | | |

Expand Down
2 changes: 2 additions & 0 deletions gptqmodel/models/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@
from .definitions.minicpmv import MiniCPMVQModel # noqa: E402
from .definitions.minicpmv_4_6 import MiniCPMV4_6QModel # noqa: E402
from .definitions.minimax_m2 import MiniMaxM2GPTQ # noqa: E402
from .definitions.minimax_m3_vl import MiniMaxM3VLGPTQ # noqa: E402
from .definitions.ministral3 import Ministral3GPTQ # noqa: E402
from .definitions.mistral3 import Mistral3GPTQ
from .definitions.mixtral import MixtralQModel # noqa: E402
Expand Down Expand Up @@ -277,6 +278,7 @@
"minicpmv4_6": MiniCPMV4_6QModel,
"minimax": MiniMaxM2GPTQ,
"minimax_m2": MiniMaxM2GPTQ,
"minimax_m3_vl": MiniMaxM3VLGPTQ,
"ministral3": Ministral3GPTQ,
"qwen2_moe": Qwen2MoeQModel,
"qwen3_moe": Qwen3MoeQModel,
Expand Down
1 change: 1 addition & 0 deletions gptqmodel/models/definitions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
from .minicpmv import MiniCPMVQModel
from .minicpmv_4_6 import MiniCPMV4_6QModel
from .minimax_m2 import MiniMaxM2GPTQ
from .minimax_m3_vl import MiniMaxM3VLGPTQ
from .ministral3 import Ministral3GPTQ
from .mimo_v2 import MimoV2QModel
from .mixtral import MixtralQModel
Expand Down
59 changes: 59 additions & 0 deletions gptqmodel/models/definitions/minimax_m3_vl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# SPDX-FileCopyrightText: 2026 ModelCloud.ai
# SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai
# SPDX-License-Identifier: Apache-2.0
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from transformers import AutoModelForImageTextToText

from ..base import BaseQModel
from ..moe_lifecycle import GateUpDownMoELifecycleHooks

class MiniMaxM3VLGPTQ(BaseQModel):
loader = AutoModelForImageTextToText
require_load_processor = False
support_batch_quantize = False

pre_lm_head_norm_module = "model.language_model.norm"
rotary_embedding = "model.language_model.rotary_emb"

# MiniMax-M3 starts with dense MLP layers, then switches to sparse MoE.
layer_modules_strict = False
dynamic_expert_index = "num_local_experts"

# Defuser splits MiniMax-M3 packed expert tensors into gate/up/down modules.
moe_lifecycle_hooks = GateUpDownMoELifecycleHooks()

module_tree = [
"model",
"language_model",
"layers",
"#",
{
"input_layernorm": ("input_layernorm:!",),
"self_attn": (
"q_proj:0",
"q_norm:0:!",
"k_proj:0",
"k_norm:0:!",
"v_proj:0",
"indexer.q_proj:0",
"indexer.q_norm:0:!",
"indexer.k_proj:0",
"indexer.k_norm:0:!",
"o_proj:1",
),
"post_attention_layernorm": ("post_attention_layernorm:!",),
"mlp:moe": {
# Dense fallback used by early decoder blocks.
"": ("gate_up_proj:0", "down_proj:1"),
"gate": ("gate:!", "e_score_correction_bias:!"),
"shared_experts": ("gate_up_proj:0", "down_proj:1"),
"experts": {
"#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
},
},
},
]


__all__ = ["MiniMaxM3VLGPTQ"]
3 changes: 3 additions & 0 deletions gptqmodel/models/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@ def from_pretrained(
model_init_kwargs_without_internal["trust_remote_code"] = trust_remote_code

config = AutoConfig.from_pretrained(model_local_path, **model_init_kwargs_without_internal, **hf_gguf_load_kwargs)
print("config", config)

defuser.replace_fused_blocks(config.model_type)

Expand Down Expand Up @@ -721,6 +722,8 @@ def skip(*args, **kwargs):

if quantize_config.offload_to_disk:
shell_config = copy.deepcopy(config)
print("shell_config", shell_config)
print("shell_config", shell_config.vision_config)
try:
model = build_shell_model(cls.loader, config=shell_config, **model_init_kwargs_without_internal)
except RuntimeError as exc:
Expand Down
Loading