From 8f75dce00a56da009cb2045b60d6d5a50a103a10 Mon Sep 17 00:00:00 2001
From: Seunghyuk Park <separk@habana.ai>
Date: Fri, 30 Aug 2024 00:10:17 +0000
Subject: [PATCH] Update changes from pr 194

  https://github.com/HabanaAI/vllm-fork/pull/194
---
 vllm/model_executor/models/qwen2.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py
index 3deb3d8840cc4..1220db0210382 100644
--- a/vllm/model_executor/models/qwen2.py
+++ b/vllm/model_executor/models/qwen2.py
@@ -45,6 +45,7 @@
     ParallelLMHead, VocabParallelEmbedding)
 from vllm.model_executor.model_loader.weight_utils import (
     default_weight_loader, maybe_remap_kv_scale_name)
+from vllm.platforms import current_platform
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.sequence import IntermediateTensors, SamplerOutput
 
@@ -259,6 +260,11 @@ def forward(
             hidden_states = inputs_embeds
         else:
             hidden_states = self.embed_tokens(input_ids)
+
+        if current_platform.is_hpu():
+            import habana_frameworks.torch as htorch
+            htorch.core.mark_step()
+
         residual = None
         for i in range(len(self.layers)):
             layer = self.layers[i]
@@ -269,6 +275,9 @@ def forward(
                 attn_metadata,
                 residual,
             )
+            if current_platform.is_hpu():
+                htorch.core.mark_step()
+
         hidden_states, _ = self.norm(hidden_states, residual)
         return hidden_states