diff --git a/README.md b/README.md index c597da528..91935aef2 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@

## Latest News - +* 01/07/2026 7.1.0-dev `main`: ✨ Added `lfm2` model support * 06/30/2026 7.1.0-dev `main`: ✨ Added `minimax_m3_vl` / MiniMax M3 model support * 06/25/2026 7.1.0-dev `main`: ✨ Added `cohere2_moe` model support * 05/25/2026 7.1.0-dev `main`: ✨ Added `hy_3` and `ministral3` model support @@ -267,7 +267,7 @@ Selected public references where teams or companies explicitly mention GPT-QMode | Dream | ✅ | GRIN-MoE | ✅ | Instella | ✅ | Phi 1-4 | ✅ | Voxtral | ✅ | | ERNIE 4.5 / MoE / VL MoE | ✅ | GLM 4/4V/4.5V/4.6V/5/5.1/OCR/ASR | ✅ | GLM4 MoE / Lite / 4.5V MoE | ✅ | MiniCPM 3/O/V/V 4_6 | ✅ | PanGu-α | ✅ | | XVERSE | ✅ | Brumby | ✅ | Hymba | ✅ | Mistral | ✅ | Qwen 1/2/3/3.5 | ✅ | -| MiniMax M2/M3 | ✅ | AfMoE | ✅ | Bailing-MoE | ✅ | LFM2-MoE | ✅ | Marin | ✅ | +| MiniMax M2/M3 | ✅ | AfMoE | ✅ | Bailing-MoE | ✅ | LFM2 / LFM2-MoE | ✅ | Marin | ✅ | | InternVL Chat | ✅ | Laguna | ✅ | Mimo / Mimo V2 | ✅ | Zamba / Zamba2 | ✅ | Intern S1 | ✅ | | HunYuan V1 Dense / MoE | ✅ | HY-V3 | ✅ | | | | | | | diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py index f2103551f..adc71efaa 100644 --- a/gptqmodel/models/auto.py +++ b/gptqmodel/models/auto.py @@ -128,6 +128,7 @@ from .definitions.klear import KlearQModel # noqa: E402 from .definitions.kimi_k25 import KimiK25QModel # noqa: E402 from .definitions.laguna import LagunaQModel # noqa: E402 +from .definitions.lfm2 import LFM2QModel # noqa: E402 from .definitions.lfm2_moe import LFM2MoeQModel # noqa: E402 from .definitions.llada2 import LLaDA2MoeQModel from .definitions.llama import LlamaQModel # noqa: E402 @@ -333,6 +334,7 @@ "nemotronh_nano_omni_reasoning_v3": NemotronOmniQModel, "bailing_moe": BailingMoeQModel, "bailing_hybrid": BailingMoeQModel, + "lfm2": LFM2QModel, "lfm2_moe": LFM2MoeQModel, "llada2_moe": LLaDA2MoeQModel, "mistral3": Mistral3GPTQ, diff --git a/gptqmodel/models/definitions/lfm2.py b/gptqmodel/models/definitions/lfm2.py new file mode 100644 index 000000000..49481c23e --- /dev/null +++ b/gptqmodel/models/definitions/lfm2.py @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: 2026 ModelCloud.ai +# SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium + +from ..base import BaseQModel + + +class LFM2QModel(BaseQModel): + pre_lm_head_norm_module = "model.embedding_norm" + + layer_modules_strict = False + + module_tree = [ + "model", + "layers", + "#", + { + "operator_norm": ("operator_norm:!",), + "conv": ("in_proj", "out_proj"), + "self_attn": ( + "q_proj:0", + "k_proj:0", + "v_proj:0", + "out_proj:1", + ), + "ffn_norm": ("ffn_norm:!",), + "feed_forward": ("w1:0", "w3:0", "w2:1"), + }, + ] diff --git a/tests/models/test_lfm2.py b/tests/models/test_lfm2.py new file mode 100644 index 000000000..812fc6744 --- /dev/null +++ b/tests/models/test_lfm2.py @@ -0,0 +1,32 @@ +# SPDX-FileCopyrightText: 2026 ModelCloud.ai +# SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium + +from model_test import ModelTest + + +class TestLFM2(ModelTest): + NATIVE_MODEL_ID = "/monster/data/model/LFM2.5-1.2B-Instruct" # "LiquidAI/LFM2.5-1.2B-Instruct" + TRUST_REMOTE_CODE = False + USE_FLASH_ATTN = False + GROUP_SIZE = 32 + DATASET_SIZE = 512 + EVAL_BATCH_SIZE = 4 + EVAL_TASKS_SLOW = { + "arc_challenge": { + "chat_template": True, + "acc": { + "value": 0.3780, + "floor_pct": 0.04, + }, + "acc_norm": { + "value": 0.3968, + "floor_pct": 0.04, + }, + }, + } + EVAL_TASKS_FAST = ModelTest.derive_fast_eval_tasks(EVAL_TASKS_SLOW) + + def test_lfm2(self): + self.quantize_and_evaluate()