Skip to content

Commit

Permalink
[Bugfix] Fix type annotations in CPU model runner (vllm-project#4256)
Browse files Browse the repository at this point in the history
  • Loading branch information
WoosukKwon authored Apr 22, 2024
1 parent 296cdf8 commit e73ed0f
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions vllm/worker/cpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def load_model(self) -> None:
def _prepare_prompt(
self,
seq_group_metadata_list: List[SequenceGroupMetadata],
) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata, List[int]]:
) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata, List[int],
Optional[torch.Tensor]]:
assert len(seq_group_metadata_list) > 0
input_tokens: List[int] = []
input_positions: List[int] = []
Expand Down Expand Up @@ -347,8 +348,8 @@ def _prepare_sample(
def prepare_input_tensors(
self,
seq_group_metadata_list: List[SequenceGroupMetadata],
) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata,
SamplingMetadata]:
) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata, SamplingMetadata,
Optional[torch.Tensor]]:
multi_modal_input = None
if self.is_driver_worker:
# NOTE: We assume that all sequences in the group are all prompts or
Expand Down

0 comments on commit e73ed0f

Please sign in to comment.