[XPU] Fix spec-decode UTs under tests/v1/spec_decode (#38491)

yma11 · jikunshang · web-flow · commit 0f3ce4c74b18 · 2026-04-11T01:31:00.000Z
Signed-off-by: Yan Ma &lt;yan.ma@intel.com&gt;
Co-authored-by: Kunshang Ji &lt;kunshang.ji@intel.com&gt;
diff --git a/tests/v1/spec_decode/test_eagle.py b/tests/v1/spec_decode/test_eagle.py
@@ -755,12 +755,6 @@ def test_load_model(
     use_distinct_lm_head,
     monkeypatch,
 ):
-    if attn_backend == "TRITON_ATTN" and not current_platform.is_rocm():
-        pytest.skip(
-            "TRITON_ATTN does not support "
-            "multi-token eagle spec decode on current platform"
-        )
-
     if attn_backend == "ROCM_AITER_FA" and current_platform.is_rocm():
         monkeypatch.setenv("VLLM_ROCM_USE_AITER", "1")
 
diff --git a/tests/v1/spec_decode/test_eagle_step_kernel.py b/tests/v1/spec_decode/test_eagle_step_kernel.py
@@ -15,8 +15,8 @@
 
 # Skip if no CUDA - Triton kernel requires GPU
 pytest.importorskip("triton")
-if not torch.cuda.is_available():
-    pytest.skip("CUDA required for EAGLE kernel tests", allow_module_level=True)
+if not current_platform.is_cuda_alike() and not current_platform.is_xpu():
+    pytest.skip("CUDA/XPU required for EAGLE kernel tests", allow_module_level=True)
 
 
 def _reference_eagle_step_slot_mapping(
diff --git a/tests/v1/spec_decode/test_max_len.py b/tests/v1/spec_decode/test_max_len.py
@@ -38,12 +38,6 @@ def test_ngram_max_len(num_speculative_tokens: int):
 def test_eagle_max_len(
     monkeypatch: pytest.MonkeyPatch, num_speculative_tokens: int, attn_backend: str
 ):
-    if attn_backend == "TRITON_ATTN" and not current_platform.is_rocm():
-        pytest.skip(
-            "TRITON_ATTN does not support "
-            "multi-token eagle spec decode on current platform"
-        )
-
     if attn_backend == "ROCM_AITER_FA" and current_platform.is_rocm():
         monkeypatch.setenv("VLLM_ROCM_USE_AITER", "1")