phi-3 test still failing, llama 3 OOM errors

Signed-off-by: Brian Dellabetta <[email protected]>
vllm-project · Feb 14, 2025 · b5fccc7 · b5fccc7
1 parent b1bf7b8
commit b5fccc7
Show file tree

Hide file tree

Showing 7 changed files with 22 additions and 12 deletions.
diff --git a/tests/e2e/e2e_utils.py b/tests/e2e/e2e_utils.py
@@ -1,7 +1,7 @@
+import torch
 from datasets import load_dataset
 from loguru import logger
 from transformers import AutoProcessor
-import torch
 
 from llmcompressor.modifiers.quantization import GPTQModifier, QuantizationModifier
 from llmcompressor.transformers import oneshot
@@ -26,9 +26,13 @@ def run_oneshot_for_e2e_testing(
     oneshot_kwargs = {}
     pretrained_model_class = get_model_class(model_class)
     loaded_model = pretrained_model_class.from_pretrained(
-        model, device_map=device, torch_dtype="auto"
+        model,
+        device_map=device,
+        torch_dtype="auto",
+        trust_remote_code=True,
+        _attn_implementation="eager",
     )
-    processor = AutoProcessor.from_pretrained(model)
+    processor = AutoProcessor.from_pretrained(model, trust_remote_code=True)
 
     if dataset_id:
         ds = load_dataset(dataset_id, name=dataset_config, split=dataset_split)
@@ -37,13 +41,14 @@ def run_oneshot_for_e2e_testing(
         oneshot_kwargs["dataset"] = ds
         oneshot_kwargs["max_seq_length"] = max_seq_length
         oneshot_kwargs["num_calibration_samples"] = num_calibration_samples
-        
-        #TODO better conditional on when multimodal data-collator should be added
+
+        # TODO better conditional on when multimodal data-collator should be added
         if "flickr30k" in dataset_id:
             # Define a oneshot data collator for multimodal inputs.
             def data_collator(batch):
                 assert len(batch) == 1
                 return {key: torch.tensor(value) for key, value in batch[0].items()}
+
             oneshot_kwargs["data_collator"] = data_collator
 
     oneshot_kwargs["model"] = loaded_model

diff --git a/tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml b/tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml
@@ -3,7 +3,7 @@ quant_stage:
     SmoothQuantModifier:
       smoothing_strength: 0.8
     GPTQModifier:
-      ignore: [lm_head]
+      ignore: [lm_head, "re:vision_tower.*", "re:multi_modal_projector.*"]
       config_groups:
         group_0:
           weights: {num_bits: 8, type: int, symmetric: true, strategy: channel}

diff --git a/tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml b/tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml
@@ -1,7 +1,7 @@
 quant_stage:
   quant_modifiers:
     GPTQModifier:
-      ignore: ["lm_head"]
+      ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*"]
       config_groups:
         group_0:
           weights:

diff --git a/tests/lmeval/configs/vl_fp8_dynamic_per_token.yaml b/tests/lmeval/configs/vl_fp8_dynamic_per_token.yaml
@@ -11,5 +11,6 @@ lmeval:
   task: mmmu_val_economics
   num_fewshot: 0
   limit: 1000
+  batch_size: 8
   metrics:
-    acc,none: 0.266
+    acc,none: 0.333
diff --git a/tests/lmeval/configs/vl_w4a16_actorder_weight.yaml b/tests/lmeval/configs/vl_w4a16_actorder_weight.yaml
@@ -1,6 +1,8 @@
 cadence: "weekly"
-model: meta-llama/Llama-3.2-11B-Vision-Instruct
-model_class: TraceableMllamaForConditionalGeneration
+#TODO llama 3.2 11B keeps hitting OOM errors
+# model: meta-llama/Llama-3.2-11B-Vision-Instruct
+# model_class: TraceableMllamaForConditionalGeneration
+model: microsoft/Phi-3-vision-128k-instruct
 recipe: tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml
 dataset_id: lmms-lab/flickr30k
 dataset_split: "test[:512]"

diff --git a/tests/lmeval/test_lmeval.py b/tests/lmeval/test_lmeval.py
@@ -20,6 +20,7 @@ class LmEvalConfig(BaseModel):
     num_fewshot: int
     limit: int
     metrics: dict
+    batch_size: int = 100
 
 
 try:
@@ -86,7 +87,7 @@ def test_lm_eval(self):
             self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
         oneshot_model, processor = run_oneshot_for_e2e_testing(
             model=self.model,
-            model_class = self.model_class,
+            model_class=self.model_class,
             device=self.device,
             num_calibration_samples=self.num_calibration_samples,
             max_seq_length=self.max_seq_length,
@@ -122,6 +123,7 @@ def test_lm_eval(self):
             num_fewshot=self.lmeval.num_fewshot,
             limit=self.lmeval.limit,
             device="cuda:0",
+            batch_size=self.lmeval.batch_size,
         )
 
         metrics = results["results"][self.lmeval.task]

diff --git a/tests/testing_utils.py b/tests/testing_utils.py
@@ -209,6 +209,7 @@ def process(sample):
             )
 
     elif ds_name == "flickr30k":
+
         def process(sample):
             messages = [
                 {
@@ -227,7 +228,6 @@ def process(sample):
                 "images": sample["image"],
             }
 
-
     else:
         raise NotImplementedError(f"Cannot preprocess dataset {ds.info.dataset_name}")