From 6542d4a4e00fd832ab2ef0f197951b642d908585 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Apr 2026 00:16:25 +0800 Subject: [PATCH 1/6] fix internvl --- src/mcore_bridge/bridge/gpt_bridge.py | 40 ++++++++++++++++++++++ src/mcore_bridge/model/mm_gpts/internvl.py | 30 ++++++++++++++-- 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/src/mcore_bridge/bridge/gpt_bridge.py b/src/mcore_bridge/bridge/gpt_bridge.py index c33a572..95b55de 100644 --- a/src/mcore_bridge/bridge/gpt_bridge.py +++ b/src/mcore_bridge/bridge/gpt_bridge.py @@ -5,11 +5,14 @@ import torch import torch.distributed as dist import torch.nn.functional as F +from contextlib import contextmanager from megatron.core import mpu from packaging import version from peft import PeftModel from peft.utils import ModulesToSaveWrapper from tqdm import tqdm +from transformers import PreTrainedModel +from transformers.utils import ContextManagers from typing import Callable, List, Optional, Union from mcore_bridge.tuners import LoraParallelLinear @@ -1744,6 +1747,43 @@ def save_weights( saver.finalize() dist.barrier() # Ensure all weights are saved completely + @contextmanager + def _patch_hf_initialize_weight(self): + + _origin_initialize_weight = PreTrainedModel._initialize_weights + + def _initialize_weight(self, *args, **kwargs): + return + + PreTrainedModel._initialize_weights = _initialize_weight + try: + yield + finally: + PreTrainedModel._initialize_weights = _origin_initialize_weight + + @contextmanager + def _patch_device_meta(self, model_cls): + __origin_init__ = model_cls.__init__ + + def __init__(self, *args, **kwargs): + with torch.device('meta'): + __origin_init__(self, *args, **kwargs) + + model_cls.__init__ = __init__ + + try: + yield + finally: + model_cls.__init__ = __origin_init__ + + def _get_meta_model_context(self, ignore_init_model_cls=None): + ignore_init_model_cls = ignore_init_model_cls or [] + if not isinstance(ignore_init_model_cls, list): + ignore_init_model_cls = [ignore_init_model_cls] + context_list = [self._patch_device_meta(model_cls) for model_cls in ignore_init_model_cls] + context_list.append(self._patch_hf_initialize_weight()) + return ContextManagers(context_list) + class MultimodalGPTBridge(GPTBridge): hf_layers_prefix = 'model.language_model.layers' diff --git a/src/mcore_bridge/model/mm_gpts/internvl.py b/src/mcore_bridge/model/mm_gpts/internvl.py index aa142c0..484f91e 100644 --- a/src/mcore_bridge/model/mm_gpts/internvl.py +++ b/src/mcore_bridge/model/mm_gpts/internvl.py @@ -1,7 +1,7 @@ # Copyright (c) ModelScope Contributors. All rights reserved. import torch from torch import nn -from transformers import AutoModel, PretrainedConfig +from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer, PretrainedConfig from transformers.dynamic_module_utils import get_class_from_dynamic_module from mcore_bridge.bridge import GPTBridge, MultimodalGPTBridge @@ -18,6 +18,31 @@ class InternvlBridge(GPTBridge): hf_lm_head_key = 'language_model.lm_head.weight' hf_score_key = 'language_model.score.weight' + def get_hf_meta_model(self): + model_cls = [] + from transformers.models.qwen2 import Qwen2ForCausalLM + model_cls.append(Qwen2ForCausalLM) + try: + from transformers.models import Qwen3ForCausalLM + model_cls.append(Qwen3ForCausalLM) + except ImportError: + pass + try: + from transformers.models import Qwen3MoeForCausalLM + model_cls.append(Qwen3MoeForCausalLM) + except ImportError: + pass + try: + from transformers import GptOssForCausalLM + model_cls.append(GptOssForCausalLM) + except ImportError: + pass + contexts = self._get_meta_model_context(model_cls) + hf_config = self.config.hf_config + with contexts: + model = AutoModelForCausalLM.from_pretrained(hf_config.name_or_path, trust_remote_code=True) + return model + class InternvlVit(HuggingFaceVit): module_mapping = {'vision_model': 'vision_model', 'mlp1': 'mlp1'} @@ -33,7 +58,6 @@ def prepare_attn_impl(self): self.hf_config.vision_config.use_flash_attn = use_flash_attn def prepare_model(self, hf_config: PretrainedConfig): - from transformers import AutoProcessor llm_model_type = self.config.llm_model_type if llm_model_type not in ['qwen2', 'qwen3', 'qwen3_moe', 'gpt_oss']: raise ValueError(f'{llm_model_type} is not supported for internvl_chat model') @@ -52,7 +76,7 @@ def prepare_model(self, hf_config: PretrainedConfig): self.select_layer = hf_config.select_layer self.downsample_ratio = hf_config.downsample_ratio self.ps_version = hf_config.ps_version - self.processor = AutoProcessor.from_pretrained(hf_config.name_or_path, trust_remote_code=True) + self.tokenizer = AutoTokenizer.from_pretrained(hf_config.name_or_path, trust_remote_code=True) def get_inputs_embeds(self, inputs_embeds, **kwargs): input_ids = kwargs['input_ids'] From c64ce8c4cdf8eab5d00fd1e2b57b835d0500ad5f Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Apr 2026 00:34:46 +0800 Subject: [PATCH 2/6] fix --- src/mcore_bridge/model/mm_gpts/internvl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mcore_bridge/model/mm_gpts/internvl.py b/src/mcore_bridge/model/mm_gpts/internvl.py index 484f91e..c1ab6d1 100644 --- a/src/mcore_bridge/model/mm_gpts/internvl.py +++ b/src/mcore_bridge/model/mm_gpts/internvl.py @@ -39,8 +39,9 @@ def get_hf_meta_model(self): pass contexts = self._get_meta_model_context(model_cls) hf_config = self.config.hf_config + model_cls = get_class_from_dynamic_module('modeling_internvl_chat.InternVLChatModel', hf_config.name_or_path) with contexts: - model = AutoModelForCausalLM.from_pretrained(hf_config.name_or_path, trust_remote_code=True) + model = model_cls(hf_config) return model From c17fb0e3f7bdee8b37e812093484dd5ce26b5266 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Apr 2026 00:39:56 +0800 Subject: [PATCH 3/6] fix --- src/mcore_bridge/model/mm_gpts/internvl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcore_bridge/model/mm_gpts/internvl.py b/src/mcore_bridge/model/mm_gpts/internvl.py index c1ab6d1..e1c17d5 100644 --- a/src/mcore_bridge/model/mm_gpts/internvl.py +++ b/src/mcore_bridge/model/mm_gpts/internvl.py @@ -1,7 +1,7 @@ # Copyright (c) ModelScope Contributors. All rights reserved. import torch from torch import nn -from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer, PretrainedConfig +from transformers import AutoModel, AutoTokenizer, PretrainedConfig from transformers.dynamic_module_utils import get_class_from_dynamic_module from mcore_bridge.bridge import GPTBridge, MultimodalGPTBridge From 375e979caa4d5965cd236cd2632395a2f97e1a37 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Apr 2026 10:57:05 +0800 Subject: [PATCH 4/6] fix --- src/mcore_bridge/model/mm_gpts/internvl.py | 25 +++++++--------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/src/mcore_bridge/model/mm_gpts/internvl.py b/src/mcore_bridge/model/mm_gpts/internvl.py index e1c17d5..c28aa3f 100644 --- a/src/mcore_bridge/model/mm_gpts/internvl.py +++ b/src/mcore_bridge/model/mm_gpts/internvl.py @@ -1,4 +1,5 @@ # Copyright (c) ModelScope Contributors. All rights reserved. +import importlib import torch from torch import nn from transformers import AutoModel, AutoTokenizer, PretrainedConfig @@ -20,23 +21,13 @@ class InternvlBridge(GPTBridge): def get_hf_meta_model(self): model_cls = [] - from transformers.models.qwen2 import Qwen2ForCausalLM - model_cls.append(Qwen2ForCausalLM) - try: - from transformers.models import Qwen3ForCausalLM - model_cls.append(Qwen3ForCausalLM) - except ImportError: - pass - try: - from transformers.models import Qwen3MoeForCausalLM - model_cls.append(Qwen3MoeForCausalLM) - except ImportError: - pass - try: - from transformers import GptOssForCausalLM - model_cls.append(GptOssForCausalLM) - except ImportError: - pass + class_names = ['Qwen2ForCausalLM', 'Qwen3ForCausalLM', 'Qwen3MoeForCausalLM', 'GptOssForCausalLM'] + module = importlib.import_module('transformers') + for cls_name in class_names: + try: + model_cls.append(getattr(module, cls_name)) + except (ImportError, AttributeError): + pass contexts = self._get_meta_model_context(model_cls) hf_config = self.config.hf_config model_cls = get_class_from_dynamic_module('modeling_internvl_chat.InternVLChatModel', hf_config.name_or_path) From c9e424c5b00b964c2ffd4e2baa72caf39b60a219 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Apr 2026 11:16:08 +0800 Subject: [PATCH 5/6] fix --- src/mcore_bridge/model/mm_gpts/internvl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mcore_bridge/model/mm_gpts/internvl.py b/src/mcore_bridge/model/mm_gpts/internvl.py index c28aa3f..4745020 100644 --- a/src/mcore_bridge/model/mm_gpts/internvl.py +++ b/src/mcore_bridge/model/mm_gpts/internvl.py @@ -33,6 +33,7 @@ def get_hf_meta_model(self): model_cls = get_class_from_dynamic_module('modeling_internvl_chat.InternVLChatModel', hf_config.name_or_path) with contexts: model = model_cls(hf_config) + model._auto_class = 'AutoModelForCausalLM' return model From 679b0230ed7f5e04128e1f782112ee8454f3cbb6 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 2 Apr 2026 11:20:08 +0800 Subject: [PATCH 6/6] fix --- src/mcore_bridge/model/mm_gpts/internvl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcore_bridge/model/mm_gpts/internvl.py b/src/mcore_bridge/model/mm_gpts/internvl.py index 4745020..1f3bedc 100644 --- a/src/mcore_bridge/model/mm_gpts/internvl.py +++ b/src/mcore_bridge/model/mm_gpts/internvl.py @@ -80,7 +80,7 @@ def get_inputs_embeds(self, inputs_embeds, **kwargs): inputs_embeds = inputs_embeds + vit_embeds.mean() * 0. else: vit_embeds = self.extract_feature(pixel_values.to(self.vision_model.dtype)) - selected = (input_ids == self.processor.encode('', add_special_tokens=False)[0]) + selected = (input_ids == self.tokenizer.encode('', add_special_tokens=False)[0]) inputs_embeds = inputs_embeds.clone() inputs_embeds[selected] = vit_embeds.reshape(-1, vit_embeds.shape[-1]).to(dtype=inputs_embeds.dtype) return inputs_embeds